Usenet 1994 October

home *** CD-ROM | disk | FTP | other *** search

/ Usenet 1994 October / usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso / unix / volume23 / trn / part05 / mt-process.c < prev

Wrap

C/C++ Source or Header | 1991-08-22 | 34KB | 1,350 lines

/* $Header: mt-process.c,v 4.3.3.3 91/01/18 19:13:20 davison Trn $ ** ** $Log: mt-process.c,v $ ** Revision 4.3.3.3 91/01/18 19:13:20 davison ** Removed the code that tried to exclude certain message ids. Added -s option ** ** Revision 4.3.3.2 90/08/20 16:40:31 davison ** Added check of caught_interrupt flag into main loops. ** ** Revision 4.3.3.1 90/07/28 18:04:45 davison ** Initial Trn Release ** */ #include "EXTERN.h" #include "common.h" #include "mthreads.h" #ifdef SERVER #include "server.h" #endif #include <time.h> #ifndef TZSET # include <sys/timeb.h> #endif char buff[1024]; char references[1024]; char subject_str[80]; bool found_Re; char author_str[20]; extern int log_verbosity, slow_down; DOMAIN *next_domain; void insert_article(), expire(), trim_roots(), order_roots(), trim_authors(); void make_root(), use_root(), merge_roots(), set_root(), unlink_root(); void link_child(), unlink_child(); void free_article(), free_domain(), free_subject(), free_root(), free_author(); void get_subject_str(), get_author_str(); ARTICLE *get_article(); SUBJECT *new_subject(); AUTHOR *new_author(); #ifdef TZSET extern time_t tnow; #else extern struct timeb ftnow; #endif #ifndef SERVER static FILE *fp_article; #endif /* Given the upper/lower bounds of the articles in the current group, add all ** the ones that we don't know about and remove all the ones that have expired. ** The current directory must be the newgroup's spool directory. */ void process_articles( first_article, last_article ) ART_NUM first_article, last_article; { register char *cp, *str; register ARTICLE *article; register ART_NUM i; time_t date; int len; #ifdef SERVER bool orig_extra = extra_expire; #endif extern int errno; extern int sys_nerr; extern char *sys_errlist[]; if( first_article > (i = total.last+1) ) { i = first_article; } added_count = last_article - i + 1; expired_count = 0; for( ; i <= last_article; i++ ) { if( caught_interrupt ) { return; } #ifdef SERVER if( slow_down ) { sleep( slow_down ); } sprintf( buff, "HEAD %ld", (long)i ); put_server( buff ); if( get_server( buff, sizeof buff ) < 0 || *buff == CHAR_FATAL ) { last_article = i - 1; extra_expire = FALSE; break; } if( *buff != CHAR_OK ) { added_count--; continue; } #else /* Open article in current directory. */ sprintf( buff, "%ld", (long)i ); /* Set errno for purely paranoid reasons */ errno = 0; if( (fp_article = fopen( buff, "r" )) == Nullfp ) { /* Missing files are ok -- they've just been expired or canceled */ if( errno != 0 && errno != ENOENT ) { if( errno < 0 || errno > sys_nerr ) { log_error( "Can't open `%s': Error %d.\n", buff, errno ); } else { log_error( "Can't open `%s': %s.\n", buff, sys_errlist[errno] ); } } added_count--; continue; } #endif article = Nullart; *references = '\0'; *author_str = '\0'; *subject_str = '\0'; found_Re = 0; date = 0; #ifdef SERVER while( get_server( cp = buff, sizeof buff ) == 0 ) { process_line: if( *cp == '.' ) { break; } #else while( (cp = fgets( buff, sizeof buff, fp_article )) != Nullch ) { process_line: if( *cp == '\n' ) { /* check for end of header */ break; /* break out when found */ } #endif if( (unsigned char)*cp <= ' ' ) { /* skip continuation lines */ continue; /* (except references -- see below) */ } if( (str = index( cp, ':' )) == Nullch ) { break; /* end of header if no colon found */ } if( (len = str - cp) > 10 ) { continue; /* skip keywords > 10 chars */ } #ifndef SERVER cp[strlen(cp)-1] = '\0'; /* remove newline */ #endif while( cp < str ) { /* lower-case the keyword */ if( (unsigned char)*cp <= ' ' ) { /* stop at any whitespace */ break; } if( isupper(*cp) ) { *cp = tolower(*cp); } cp++; } *cp = '\0'; cp = buff; if( len == 4 && strEQ( cp, "date" ) ) { #ifdef TZSET date = getdate( str + 1, tnow, timezone ); #else date = getdate( str + 1, ftnow.time, (long) ftnow.timezone ); #endif } else if( len == 4 && strEQ( cp, "from" ) ) { get_author_str( str + 1 ); } else if( len == 7 && strEQ( cp, "subject" ) ) { get_subject_str( str + 1 ); } else if( len == 10 && strEQ( cp, "message-id" ) ) { if( !article ) { article = get_article( str + 1 ); } else { if( log_verbosity ) { log_error( "Found multiple Message-IDs! [%ld].\n", (long)i ); } } } else if( len == 10 && strEQ( cp, "references" ) ) { /* include preceding space in saved reference */ len = strlen( str + 1 ); bcopy( str + 1, references, len + 1 ); str = references + len; /* check for continuation lines */ #ifdef SERVER while( get_server( cp = buff, sizeof buff ) == 0 ) { #else while( (cp = fgets( buff, sizeof buff, fp_article )) != Nullch ) { #endif if( *cp != ' ' && *cp != '\t' ) { goto process_line; } while( *++cp == ' ' || *cp == '\t' ) { ; } *--cp = ' '; /* If the references are too long, shift them over to ** always save the most recent ones. */ if( (len += strlen( cp )) > 1023 ) { strcpy( buff, buff + len - 1023 ); str -= len - 1023; len = 1023; } strcpy( str, cp ); }/* while */ break; }/* if */ }/* while */ if( article ) { insert_article( article, date, i ); } else { if( log_verbosity ) { log_error( "Message-ID line missing! [%ld].\n", (long)i ); } } #ifndef SERVER fclose( fp_article ); #endif } if( extra_expire || first_article > total.first ) { expire( first_article ); } if( caught_interrupt ) { return; } trim_roots(); order_roots(); trim_authors(); total.first = first_article; total.last = last_article; #ifdef SERVER extra_expire = orig_extra; #endif } /* Search all articles for numbers less than new_first. Traverse the list ** using the domain links so we don't have to deal with the tree structure. ** If extra_expire is true, stat() all valid articles to make sure they are ** really there and expire them if they're not. */ void expire( new_first ) ART_NUM new_first; { register DOMAIN *domain; register ARTICLE *article, *next_art, *hold; for( domain = &unk_domain; domain; domain = next_domain ) { next_domain = domain->link; for( article = domain->ids; article; article = next_art ) { if( caught_interrupt ) { return; } next_art = article->id_link; if( !article->subject || (article->flags & NEW_ARTICLE) ) { continue; } if( extra_expire && article->num >= new_first ) { #ifdef SERVER sprintf( buff, "STAT %ld", (long)article->num ); put_server( buff ); if( get_server( buff, sizeof buff ) == 0 && *buff == CHAR_OK ) { continue; } #else sprintf( buff, "%ld", (long)article->num ); if( !stat( buff, &filestat ) || errno != ENOENT ) { continue; } #endif } if( extra_expire || article->num < new_first ) { article->subject->count--; article->subject = 0; article->author->count--; article->author = 0; /* Free expired article if it has no children. Then check ** if the parent(s) are also fake and can be freed. We'll ** free any empty roots later. */ while( !article->children ) { hold = article->parent; unlink_child( article ); free_article( article ); if( hold && !hold->subject ) { if( (article = hold) == next_art ) { next_art = next_art->id_link; } } else { break; } } expired_count++; }/* if */ }/* for */ }/* for */ next_domain = Null(DOMAIN*); } /* Trim the article chains down so that we don't have more than one faked ** article between the root any real ones. */ void trim_roots() { register ROOT *root, *last_root; register ARTICLE *article, *next; register SUBJECT *subject, *last_subj; register int found; #ifndef lint last_root = (ROOT *)&root_root; #else last_root = Null(ROOT*); #endif for( root = root_root; root; root = last_root->link ) { for( article = root->articles; article; article = article->siblings ) { /* If an article has no subject, it is a "fake" reference node. ** If all of its immediate children are also fakes, delete it ** and graduate the children to the root. If everyone is fake, ** the chain dies. */ while( !article->subject ) { found = 0; for( next = article->children; next; next = next->siblings ) { if( next->subject ) { found = 1; break; } } if( !found ) { /* Remove this faked article and move all its children ** up to the root. */ next = article->children; unlink_child( article ); free_article( article ); for( article = next; article; article = next ) { next = article->siblings; article->parent = Nullart; link_child( article ); } article = root->articles; /* start this root over */ } else { break; /* else, on to next article */ } } } /* Free all unused subject strings. Begin by trying to find a ** subject for the root's pointer. */ for( subject = root->subjects; subject && !subject->count; subject = root->subjects ) { root->subjects = subject->link; free_subject( subject ); root->subject_cnt--; } /* Then free up any unsed intermediate subjects. */ if( (last_subj = subject) != Null(SUBJECT*) ) { while( (subject = subject->link) != Null(SUBJECT*) ) { if( !subject->count ) { last_subj->link = subject->link; free_subject( subject ); root->subject_cnt--; subject = last_subj; } else { last_subj = subject; } } } /* Now, free all roots without articles. Flag unexpeced errors. */ if( !root->articles ) { if( root->subjects ) { log_error( "** Empty root still had subjects remaining! **\n" ); } last_root->link = root->link; free_root( root ); } else { last_root = root; } } } /* Descend the author list, find any author names that aren't used ** anymore and free them. */ void trim_authors() { register AUTHOR *author, *last_author; #ifndef lint last_author = (AUTHOR *)&author_root; #else last_author = Null(AUTHOR*); #endif for( author = author_root; author; author = last_author->link ) { if( !author->count ) { last_author->link = author->link; free_author( author ); } else { last_author = author; } } } /* Reorder the roots to place the oldest ones first (age determined by ** date of oldest article). */ void order_roots() { register ROOT *root, *next, *search; /* If we don't have at least two roots, we're done! */ if( !(root = root_root) || !(next = root->link) ) { return; /* RETURN */ } /* Break the old list off after the first root, and then start ** inserting the roots into the list by date. */ root->link = Null(ROOT*); while( (root = next) != Null(ROOT*) ) { next = next->link; if( (search = root_root)->articles->date >= root->articles->date ) { root->link = root_root; root_root = root; } else { while( search->link && search->link->articles->date < root->articles->date ) { search = search->link; } root->link = search->link; search->link = root; } } } #define EQ(x,y) ((isupper(x) ? tolower(x) : (x)) == (y)) /* Parse the subject into 72 characters or less. Remove any "Re[:^]"s from ** the front (noting that it's there), and any "(was: old)" stuff from ** the end. Then, compact multiple whitespace characters into one space, ** trimming leading/trailing whitespace. If it's still too long, unmercifully ** cut it off. We don't bother with subject continuation lines either. */ void get_subject_str( str ) register char *str; { register char *cp; register int len; while( *str && (unsigned char)*str <= ' ' ) { str++; } if( !*str ) { bcopy( "<None>", subject_str, 7 ); return; /* RETURN */ } cp = str; while( EQ( cp[0], 'r' ) && EQ( cp[1], 'e' ) ) { /* check for Re: */ cp += 2; if( *cp == '^' ) { /* allow Re^2: */ while( *++cp <= '9' && *cp >= '0' ) { ; } } if( *cp != ':' ) { break; } while( *++cp == ' ' ) { ; } found_Re = 1; str = cp; } /* Remove "(was Re: oldsubject)", because we already know the old subjects. ** Also match "(Re: oldsubject)". Allow possible spaces after the ('s. */ for( cp = str; (cp = index( cp+1, '(' )) != Nullch; ) { while( *++cp == ' ' ) { ; } if( EQ( cp[0], 'w' ) && EQ( cp[1], 'a' ) && EQ( cp[2], 's' ) && (cp[3] == ':' || cp[3] == ' ') ) { *--cp = '\0'; break; } if( EQ( cp[0], 'r' ) && EQ( cp[1], 'e' ) && ((cp[2]==':' && cp[3]==' ') || (cp[2]=='^' && cp[4]==':')) ) { *--cp = '\0'; break; } } /* Copy subject to a temporary string, compacting multiple spaces/tabs */ for( len = 0, cp = subject_str; len < 72 && *str; len++ ) { if( (unsigned char)*str <= ' ' ) { while( *++str && (unsigned char)*str <= ' ' ) { ; } *cp++ = ' '; } else { *cp++ = *str++; } } if( cp[-1] == ' ' ) { cp--; } *cp = '\0'; } /* Try to fit the author name in 16 bytes. Use the comment portion in ** parenthesis if present. Cut off non-commented names at the '@' or '%'. ** Then, put as many characters as we can into the 16 bytes, packing multiple ** whitespace characters into a single space. ** We should really implement a nice name shortening algorithm, or simply ** grab the name packing code from nn. */ void get_author_str( str ) char *str; { register char *cp, *cp2; if( (cp = index( str, '(' )) != Nullch ) { str = cp+1; if( (cp = rindex( str, ')' )) != Nullch ) { *cp = '\0'; } } else { if( (cp = index( str, '@' )) != Nullch ) { *cp = '\0'; } if( (cp = index( str, '%' )) != Nullch ) { *cp = '\0'; } } for( cp = str, cp2 = author_str; *cp && cp2-author_str < 16; ) { /* Pack white space and turn ctrl-chars into spaces. */ if( *cp <= ' ' ) { while( *++cp && *cp <= ' ' ) { ; } if( cp2 != author_str ) { *cp2++ = ' '; } } else { *cp2++ = *cp++; } } *cp2 = '\0'; } /* Take a message-id and see if we already know about it. If so, return it. ** If not, create it. We separate the id into its id@domain parts, and ** link all the unique ids to one copy of the domain portion. This saves ** a bit of space. */ ARTICLE * get_article( msg_id ) char *msg_id; { register DOMAIN *domain; register ARTICLE *article; register char *cp, *after_at; /* Take message id, break it up into <id@domain>, and try to match it. */ while( *msg_id == ' ' ) { msg_id++; } cp = msg_id + strlen( msg_id ) - 1; if( msg_id >= cp ) { if( log_verbosity ) { log_error( "Message-ID is empty!\n" ); } return Nullart; } if( *msg_id++ != '<' ) { if( log_verbosity ) { log_error( "Message-ID doesn't start with '<'.\n" ); } msg_id--; } if( *cp != '>' ) { if( log_verbosity ) { log_error( "Message-ID doesn't end with '>'.\n" ); } cp++; } *cp = '\0'; if( msg_id == cp ) { if( log_verbosity ) { log_error( "Message-ID is null!\n" ); } return Nullart; } if( (after_at = index( msg_id, '@' )) == Nullch ) { domain = &unk_domain; } else { *after_at++ = '\0'; for( cp = after_at; *cp; cp++ ) { if( isupper(*cp) ) { *cp = tolower(*cp); /* lower-case domain portion */ } } *cp = '\0'; /* Try to find domain name in database. */ for( domain = unk_domain.link; domain; domain = domain->link ) { if( strEQ( domain->name, after_at ) ) { break; } } if( !domain ) { /* if domain doesn't exist, create it */ register int len = cp - after_at + 1; domain = (DOMAIN *)safemalloc( sizeof (DOMAIN) ); total.domain++; domain->name = safemalloc( len ); total.string2 += len; bcopy( after_at, domain->name, len ); domain->ids = Nullart; domain->link = unk_domain.link; unk_domain.link = domain; } } /* Try to find id in this domain. */ for( article = domain->ids; article; article = article->id_link ) { if( strEQ( article->id, msg_id ) ) { break; } } if( !article ) { /* If it doesn't exist, create an article */ register int len = strlen( msg_id ) + 1; article = (ARTICLE *)safemalloc( sizeof (ARTICLE) ); bzero( article, sizeof (ARTICLE) ); total.article++; article->num = 0; article->id = safemalloc( len ); total.string2 += len; bcopy( msg_id, article->id, len ); article->domain = domain; article->id_link = domain->ids; domain->ids = article; } return article; } /* Take all the data we've accumulated about the article and shove it into ** the article tree at the best place we can possibly imagine. */ void insert_article( article, date, num ) ARTICLE *article; time_t date; ART_NUM num; { register ARTICLE *node, *last; register char *cp, *end; int len; if( article->subject ) { if( log_verbosity ) { log_error( "We've already seen article #%ld (%s@%s)\n", (long)num, article->id, article->domain->name ); } return; /* RETURN */ } article->date = date; article->num = num; article->flags = NEW_ARTICLE; if( !*references && found_Re ) { if( log_verbosity > 1 ) { log_error( "Missing reference line! [%ld]\n", (long)num ); } } /* If the article has a non-zero root, it is already in a thread somewhere. ** Unlink it to try to put it in the best possible spot. */ if( article->root ) { /* Check for a real or shared-fake parent. Articles that have never ** existed have a num of 0. Expired articles that remain as references ** have a valid num. (Valid date too, but no subject.) */ for( node = article->parent; node && !node->num && node->child_cnt == 1; node = node->parent ) { ; } unlink_child( article ); if( node ) { /* do we have decent parents? */ /* Yes: assume that our references are ok, and just reorder us ** with our siblings by date. */ link_child( article ); use_root( article, article->root ); /* Freshen the date in any faked parent articles. */ for( node = article->parent; node && !node->num && date < node->date; node = node->parent ) { node->date = date; unlink_child( node ); link_child( node ); } return; /* RETURN */ } /* We'll assume that this article has as good or better references ** than the child that faked us initially. Free the fake reference- ** chain and process our references as usual. */ for( node = article->parent; node; node = node->parent ) { unlink_child( node ); free_article( node ); } article->parent = Nullart; /* neaten up */ article->siblings = Nullart; } check_references: if( !*references ) { /* If no references but "Re:" in subject, */ if( found_Re ) { /* search for a reference in any cited text */ #ifndef SERVER for( len = 4; len && fgets( buff, sizeof buff, fp_article ); len-- ) { if( (cp = index( buff, '<' )) && (end = index( cp, ' ' )) ) { if( end[-1] == ',' ) { end--; } *end = '\0'; if( (end = index( cp, '>' )) == Nullch ) { end = cp + strlen( cp ) - 1; } if( valid_message_id( cp, end ) ) { strcpy( references+1, cp ); *references = ' '; if( log_verbosity > 2 ) { log_error( "Found cited-text reference: '%s' [%ld]\n", references+1, (long)num ); } break; } } } #endif } else { article->flags |= ROOT_ARTICLE; } } /* If we have references, process them from the right end one at a time ** until we either run into somebody, or we run out of references. */ if( *references ) { last = article; node = Nullart; end = references + strlen( references ) - 1; while( (cp = rindex( references, ' ' )) != Nullch ) { *cp++ = '\0'; while( end >= cp && ((unsigned char)*end <= ' ' || *end == ',') ) { end--; } end[1] = '\0'; /* Quit parsing references if this one is garbage. */ if( !valid_message_id( cp, end ) ) { if( log_verbosity ) { log_error( "Bad ref '%s' [%ld]\n", cp, (long)num ); } break; } /* Dump all domains that end in '.', such as "..." & "1@DEL." */ if( end[-1] == '.' ) { break; } node = get_article( cp ); /* Check for duplicates on the reference line. Brand-new data has ** no date. Data we just allocated earlier on this line has a ** date but no root. Special-case the article itself, since it ** MIGHT have a root. */ if( (node->date && !node->root) || node == article ) { if( log_verbosity ) { log_error( "Reference line contains duplicates [%ld]\n", (long)num ); } if( (node = last) == article ) { node = Nullart; } continue; } last->parent = node; link_child( last ); if( node->root ) { break; } node->date = date; last = node; end = cp-2; } if( !node ) { *references = '\0'; goto check_references; } /* Check if we ran into anybody that was already linked. If so, we ** just use their root. */ if( node->root ) { /* See if this article spans the gap between what we thought ** were two different roots. */ if( article->root && article->root != node->root ) { merge_roots( node->root, article->root ); /* Set the roots of any children we brought with us. */ set_root( article, node->root ); } use_root( article, node->root ); } else { /* We didn't find anybody we knew, so either create a new root or ** use the article's root if it was previously faked. */ if( !article->root ) { make_root( node ); use_root( article, node->root ); } else { use_root( article, article->root ); node->root = article->root; link_child( node ); } } /* Set the roots of the faked articles we created as references. */ for( node = article->parent; node && !node->root; node = node->parent ) { node->root = article->root; } /* Make sure we didn't circularly link to a child article(!), by ** ensuring that we run into the root before we run into ourself. */ while( node && node->parent != article ) { node = node->parent; } if( node ) { /* Ugh. Someone's tweaked reference line with an incorrect ** article order arrived first, and one of our children is ** really one of our ancestors. Cut off the bogus child branch ** right where we are and link it to the root. */ if( log_verbosity ) { log_error("Found ancestral child -- fixing.\n"); } unlink_child( node ); node->parent = Nullart; link_child( node ); } } else { /* The article has no references. Either turn it into a new root, or ** re-attach fleshed-out (previously faked) article to its old root. */ if( !article->root ) { make_root( article ); } else { use_root( article, article->root ); link_child( article ); } } } /* Check if the string we've found looks like a valid message-id reference. */ int valid_message_id( start, end ) register char *start, *end; { char *mid; if( *end != '>' ) { /* Compensate for spacecadets who include the header in their ** subsitution of all '>'s into another citation character. */ if( *end == '<' || *end == '-' || *end == '!' || *end == '%' || *end == ')' || *end == '|' || *end == ':' || *end == '}' || *end == '*' || *end == '+' || *end == '#' || *end == ']' || *end == '@' ) { if( log_verbosity ) { log_error( "Reference ended in '%c'.\n", *end ); } *end = '>'; } } /* Id must be "<...@...>" */ if( *start != '<' || *end != '>' || (mid = index( start, '@' )) == Nullch || mid == start+1 || mid+1 == end ) { return 0; /* RETURN */ } return 1; } /* Remove an article from its parent/siblings. Leave parent pointer intact. */ void unlink_child( child ) register ARTICLE *child; { register ARTICLE *last; if( !(last = child->parent) ) { child->root->thread_cnt--; if( (last = child->root->articles) == child ) { child->root->articles = child->siblings; } else { goto sibling_search; } } else { last->child_cnt--; if( last->children == child ) { last->children = child->siblings; } else { last = last->children; sibling_search: while( last->siblings != child ) { last = last->siblings; } last->siblings = child->siblings; } } } /* Link an article to its parent article. If its parent pointer is zero, ** link it to its root. Sorts siblings by date. */ void link_child( child ) register ARTICLE *child; { register ARTICLE *node; register ROOT *root; if( !(node = child->parent) ) { root = child->root; root->thread_cnt++; node = root->articles; if( !node || child->date < node->date ) { child->siblings = node; root->articles = child; } else { goto sibling_search; } } else { node->child_cnt++; node = node->children; if( !node || child->date < node->date ) { child->siblings = node; child->parent->children = child; } else { sibling_search: for( ; node->siblings; node = node->siblings ) { if( node->siblings->date > child->date ) { break; } } child->siblings = node->siblings; node->siblings = child; } } } /* Create a new root for the specified article. If the current subject_str ** matches any pre-existing root's subjects, we'll instead add it on as a ** parallel thread. */ void make_root( article ) ARTICLE *article; { register ROOT *new, *node; register SUBJECT *subject; #ifndef NO_SUBJECT_MATCHING /* First, check the other root's subjects for a match. */ for( node = root_root; node; node = node->link ) { for( subject = node->subjects; subject; subject = subject->link ) { if( subject_equal( subject->str, subject_str ) ) { use_root( article, node ); /* use it instead */ link_child( article ); return; /* RETURN */ } } } #endif /* Create a new root. */ new = (ROOT *)safemalloc( sizeof (ROOT) ); total.root++; new->articles = article; new->root_num = article->num; new->thread_cnt = 1; if( article->num ) { article->author = new_author(); new->subject_cnt = 1; new->subjects = article->subject = new_subject(); } else { new->subject_cnt = 0; new->subjects = Null(SUBJECT*); } article->root = new; new->link = root_root; root_root = new; } /* Add this article's subject onto the indicated root's list. Point the ** article at the root. */ void use_root( article, root ) ARTICLE *article; ROOT *root; { register SUBJECT *subject; register ROOT *root2; SUBJECT *hold, *child_subj = Null(SUBJECT*); ARTICLE *node; article->root = root; /* If it's a fake, there's no subject to add. */ if( !article->num ) { return; /* RETURN */ } /* If we haven't picked a unique message number to represent this root, ** use the first non-zero number we encounter. Which one doesn't matter. */ if( !root->root_num ) { root->root_num = article->num; } article->author = new_author(); /* Check if the new subject matches any of the other subjects in this root. ** If so, we just update the count. If not, check all the other roots for ** a match. If found, the new subject is common between the two roots, so ** we merge the two roots together. */ root2 = root; #ifndef NO_SUBJECT_MATCHING do { #endif for( subject = root2->subjects; subject; subject = subject->link ) { if( subject_equal( subject->str, subject_str ) ) { article->subject = subject; subject->count++; #ifndef NO_SUBJECT_MATCHING if( root2 != root ) { merge_roots( root, root2 ); } #endif return; /* RETURN */ } } #ifndef NO_SUBJECT_MATCHING if( (root2 = root2->link) == Null(ROOT*) ) { root2 = root_root; } } while( root2 != root ); #endif article->subject = hold = new_subject(); root->subject_cnt++; /* Find subject of any pre-existing children. We want to insert the new ** subject before a child's to keep the subject numbering intuitive ** in the newsreader. */ for( node = article->children; node; node = node->children ) { if( node->subject ) { child_subj = node->subject; break; } } if( !(subject = root->subjects) || subject == child_subj ) { hold->link = root->subjects; root->subjects = hold; } else { while( subject->link && subject->link != child_subj ) { subject = subject->link; } hold->link = subject->link; subject->link = hold; } } /* Check subjects in a case-insignificant, punctuation ignoring manner. */ int subject_equal( str1, str2 ) register char *str1, *str2; { register char ch1, ch2; while( (ch1 = *str1++) ) { if( ch1 == ' ' || ispunct( ch1 ) ) { while( *str1 && (*str1 == ' ' || ispunct( *str1 )) ) { str1++; } ch1 = ' '; } else if( isupper( ch1 ) ) { ch1 = tolower( ch1 ); } if( !(ch2 = *str2++) ) { return 0; } if( ch2 == ' ' || ispunct( ch2 ) ) { while( *str2 && (*str2 == ' ' || ispunct( *str2 )) ) { str2++; } ch2 = ' '; } else if( isupper( ch2 ) ) { ch2 = tolower( ch2 ); } if( ch1 != ch2 ) { return 0; } } if( *str2 ) { return 0; } return 1; } /* Create a new subject structure. */ SUBJECT * new_subject() { register int len = strlen( subject_str ) + 1; register SUBJECT *subject; subject = (SUBJECT *)safemalloc( sizeof (SUBJECT) ); total.subject++; subject->count = 1; subject->link = Null(SUBJECT*); subject->str = safemalloc( len ); total.string1 += len; bcopy( subject_str, subject->str, len ); return subject; } /* Create a new author structure. */ AUTHOR * new_author() { register len = strlen( author_str ) + 1; register AUTHOR *author, *last_author; last_author = Null(AUTHOR*); for( author = author_root; author; author = author->link ) { #ifndef DONT_COMPARE_AUTHORS /* might like to define this to save time */ if( strEQ( author->name, author_str ) ) { author->count++; return author; /* RETURN */ } #endif last_author = author; } author = (AUTHOR *)safemalloc( sizeof (AUTHOR) ); total.author++; author->count = 1; author->link = Null(AUTHOR*); author->name = safemalloc( len ); total.string1 += len; bcopy( author_str, author->name, len ); if( last_author ) { last_author->link = author; } else { author_root = author; } return author; } /* Insert all of root2 into root1, setting the proper root values and ** updating subject counts. */ void merge_roots( root1, root2 ) ROOT *root1, *root2; { register ARTICLE *node, *next; register SUBJECT *subject; /* Remember whoever's root num is lower. This could screw up a ** newsreader's kill-thread code if someone already saw the roots as ** being separate, but it must be done. The newsreader code will have ** to handle this as best as it can. */ if( root1->root_num > root2->root_num ) { root1->root_num = root2->root_num; } for( node = root2->articles; node; node = next ) { /* For each article attached to root2, detach them, set the ** branch's root pointers to root1, and then attach it to root1. */ next = node->siblings; unlink_child( node ); node->siblings = Nullart; set_root( node, root1 ); /* sets children too */ /* Link_child() depends on node->parent being null and node->root ** being set. */ link_child( node ); } root1->subject_cnt += root2->subject_cnt; if( !(subject = root1->subjects) ) { root1->subjects = root2->subjects; } else { while( subject->link ) { subject = subject->link; } subject->link = root2->subjects; } unlink_root( root2 ); free_root( root2 ); } /* When merging roots, we need to reset all the root pointers. */ void set_root( node, root ) ARTICLE *node; ROOT *root; { do { node->root = root; if( node->children ) { set_root( node->children, root ); } } while( node = node->siblings ); } /* Unlink a root from its neighbors. */ void unlink_root( root ) register ROOT *root; { register ROOT *node; if( (node = root_root) == root ) { root_root = root->link; } else { while( node->link != root ) { node = node->link; } node->link = root->link; } } /* Free an article and its message-id string. All other resources must ** already be free, and it must not be attached to any threads. */ void free_article( this ) ARTICLE *this; { register ARTICLE *art; if( (art = this->domain->ids) == this ) { if( !(this->domain->ids = this->id_link) ) { free_domain( this->domain ); } } else { while( this != art->id_link ) { art = art->id_link; } art->id_link = this->id_link; } total.string2 -= strlen( this->id ) + 1; free( this->id ); free( this ); total.article--; } /* Free the domain only when its last unique id has been freed. */ void free_domain( this ) DOMAIN *this; { register DOMAIN *domain; if( this == (domain = &unk_domain) ) { return; } if( this == next_domain ) { /* help expire routine skip freed domains */ next_domain = next_domain->link; } while( this != domain->link ) { domain = domain->link; } domain->link = this->link; total.string2 -= strlen( this->name ) + 1; free( this->name ); free( this ); total.domain--; } /* Free the subject structure and its string. */ void free_subject( this ) SUBJECT *this; { total.string1 -= strlen( this->str ) + 1; free( this->str ); free( this ); total.subject--; } /* Free a root. It must already be unlinked. */ void free_root( this ) ROOT *this; { free( this ); total.root--; } /* Free the author structure when it's not needed any more. */ void free_author( this ) AUTHOR *this; { total.string1 -= strlen( this->name ) + 1; free( this->name ); free( this ); total.author--; }