home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
unix
/
volume27
/
mthreads
/
part03
/
mt-process.c
Wrap
C/C++ Source or Header
|
1993-11-20
|
44KB
|
1,829 lines
/* $Id: mt-process.c,v 3.0 1993/10/01 00:14:03 davison Trn $
*/
/* The authors make no claims as to the fitness or correctness of this software
* for any use whatsoever, and it is provided as is. Any use of this software
* is at the user's own risk.
*/
#include "EXTERN.h"
#include "common.h"
#include "thread.h"
#include "mthreads.h"
#include "ndir.h"
#include "nntpclient.h"
char references[1024];
char subject_str[80];
bool found_Re;
char author_str[20];
ART_NUM absfirst;
ART_NUM lastart;
char *ctlarea;
extern int log_verbosity, slow_down;
long num;
DOMAIN *next_domain;
void insert_article(), expire(), trim_roots(), order_roots(), trim_authors();
void make_root(), use_root(), merge_roots(), set_root(), unlink_root();
void link_child(), unlink_child();
void free_article(), free_domain(), free_subject(), free_root(), free_author();
void get_subject_str(), get_author_str();
int valid_message_id _((char *, char *));
int subject_equal _((char *, char *));
ARTICLE *get_article();
SUBJECT *new_subject();
AUTHOR *new_author();
#ifndef USE_NNTP
static FILE *fp_article;
#endif
/* Given the upper/lower bounds of the articles in the current group, add all
** the ones that we don't know about and remove all the ones that have expired.
** The current directory must be the newsgroup's spool directory.
*/
void
process_articles(first_article, last_article)
ART_NUM first_article, last_article;
{
register char *cp, *str;
register ARTICLE *article;
register ART_NUM i;
time_t date;
bool has_xrefs;
int len;
#ifdef USE_NNTP
bool orig_extra = extra_expire;
#else
extern int sys_nerr;
extern char *sys_errlist[];
#endif
int start = total.last + 1;
if (first_article > start) {
start = first_article;
}
added_count = last_article - start + 1;
if (added_count < 0) {
added_count = 0;
} else if (added_count > 1000) {
/* Don't overwork ourselves the first time */
added_count = 1000;
start = last_article - 1000 + 1;
}
expired_count = 0;
for (i = start; i <= last_article; i++) {
#ifdef USE_NNTP
if (slow_down) {
usleep(slow_down);
}
sprintf(ser_line, "HEAD %ld", (long)i);
nntp_command(ser_line);
if (nntp_check(FALSE) == NNTP_CLASS_FATAL) {
last_article = i - 1;
extra_expire = FALSE;
break;
}
if (*ser_line != NNTP_CLASS_OK) {
added_count--;
continue;
}
#else
/* Open article in current directory. */
sprintf(buf, "%ld", (long)i);
/* Set errno for purely paranoid reasons */
errno = 0;
if ((fp_article = fopen(buf, "r")) == Nullfp) {
/* Missing files are ok -- they've just been expired or canceled */
if (errno != 0 && errno != ENOENT) {
if (errno < 0 || errno > sys_nerr) {
log_error("Can't open `%s': Error %d.\n", buf, errno);
} else {
log_error("Can't open `%s': %s.\n", buf,
sys_errlist[errno]);
}
}
added_count--;
continue;
}
#endif
article = Nullart;
*references = '\0';
*author_str = '\0';
*subject_str = '\0';
found_Re = 0;
date = 0;
has_xrefs = FALSE;
#ifdef USE_NNTP
while (nntp_gets(cp = buf, sizeof buf) == 0) {
process_line:
if (*cp == '.') {
if (cp[1]) {
log_error("Header line starts with '.'! [%ld].\n",
(long)i);
continue;
}
break;
}
#else
while ((cp = fgets(buf, sizeof buf, fp_article)) != Nullch) {
process_line:
if (*cp == '\n') { /* check for end of header */
break; /* break out when found */
}
#endif
if ((unsigned char)*cp <= ' ') { /* skip continuation lines */
continue; /* (except references -- see below) */
}
if ((str = index(cp, ':')) == Nullch) {
#ifdef USE_NNTP
if (log_verbosity) {
log_error("Header line missing colon! [%ld].\n", (long)i);
}
continue; /* skip bogus header line */
#else
break; /* end of header if no colon found */
#endif
}
if ((len = str - cp) > 10) {
continue; /* skip keywords > 10 chars */
}
#ifndef USE_NNTP
cp[strlen(cp)-1] = '\0'; /* remove newline */
#endif
while (cp < str) { /* lower-case the keyword */
if ((unsigned char)*cp <= ' ') { /* stop at any whitespace */
break;
}
if (isupper(*cp)) {
*cp = tolower(*cp);
}
cp++;
}
*cp = '\0';
cp = buf;
if (len == 4 && strEQ(cp, "date")) {
date = parsedate(str + 1);
} else
if (len == 4 && strEQ(cp, "from")) {
get_author_str(str + 1);
} else
if (len == 4 && strEQ(cp, "xref")) {
has_xrefs = TRUE;
} else
if (len == 7 && strEQ(cp, "subject")) {
get_subject_str(str + 1);
} else
if (len == 10 && strEQ(cp, "message-id")) {
if (!article) {
article = get_article(str + 1);
} else {
if (log_verbosity) {
log_error("Found multiple Message-IDs! [%ld].\n",
(long)i);
}
}
} else
if (len == 10 && strEQ(cp, "references")) {
/* include preceding space in saved reference */
len = strlen(str + 1);
bcopy(str + 1, references, len + 1);
str = references + len;
/* check for continuation lines */
#ifdef USE_NNTP
while (nntp_gets(cp = buf, sizeof buf) == 0) {
#else
while ((cp = fgets(buf, sizeof buf, fp_article)) != Nullch) {
#endif
if (*cp != ' ' && *cp != '\t') {
goto process_line;
}
while (*++cp == ' ' || *cp == '\t') {
;
}
*--cp = ' ';
/* If the references are too long, shift them over to
** always save the most recent ones.
*/
if ((len += strlen(cp)) > 1023) {
strcpy(buf, buf + len - 1023);
str -= len - 1023;
len = 1023;
}
strcpy(str, cp);
}/* while */
break;
}/* if */
}/* while */
if (article) {
num = i;
insert_article(article, date);
if (has_xrefs) {
article->flags |= HAS_XREFS;
}
} else {
if (log_verbosity) {
log_error("Message-ID line missing! [%ld].\n", (long)i);
}
}
#ifndef USE_NNTP
fclose(fp_article);
#endif
}
if (extra_expire || first_article > total.first) {
absfirst = first_article;
lastart = last_article;
expire(first_article <= last_article ? extra_expire : FALSE);
}
trim_roots();
order_roots();
trim_authors();
total.first = first_article;
total.last = last_article;
#ifdef USE_NNTP
extra_expire = orig_extra;
#endif
}
/* Search all articles for numbers less than new_first. Traverse the list
** using the domain links so we don't have to deal with the tree structure.
** If extra is true, list all articles in the directory to setup a bitmap
** with the existing articles marked as 'read', and drop everything that
** isn't there.
*/
void
expire(extra)
bool_int extra;
{
register DOMAIN *domain;
register ARTICLE *article, *next_art, *hold;
register ART_NUM art;
#ifdef USE_NNTP
static int listgroup_type = CHECK_LISTGROUP;
extern char line[]; /* line contains the group name */
#else
register DIR *dirp;
#endif
if (extra) {
MEM_SIZE ctlsize;
/* Allocate a bitmap large enough for absfirst thru lastart. */
ctlsize = (MEM_SIZE)(OFFSET(lastart)/BITSPERBYTE+20);
ctlarea = safemalloc(ctlsize);
bzero(ctlarea, ctlsize);
/* List all articles and use ctl_set() to keep track of what's there. */
#ifdef USE_NNTP
try_again:
switch (listgroup_type) {
case GOOD_LISTGROUP:
nntp_command("LISTGROUP");
(void)nntp_check(FALSE);
break;
case BAD_LISTGROUP:
sprintf(ser_line, "LISTGROUP %s", line);
nntp_command(ser_line);
(void)nntp_check(FALSE);
break;
case CHECK_LISTGROUP:
/* Check if LISTGROUP is available. */
nntp_command("LISTGROUP");
if (nntp_check(FALSE) == NNTP_CLASS_OK) {
listgroup_type = GOOD_LISTGROUP;
} else if (atoi(ser_line) == NNTP_SYNTAX_VAL) {
/* A command syntax error (not an unrecongnized command) is
** the LISTGROUP that takes a newsgroup name. */
listgroup_type = BAD_LISTGROUP;
goto try_again;
} else {
listgroup_type = NO_LISTGROUP;
goto try_again;
}
break;
default:
sprintf(ser_line,"XHDR lines %ld-%ld",(long)absfirst,(long)lastart);
nntp_command(ser_line);
(void)nntp_check(FALSE);
}
if (*ser_line == NNTP_CLASS_OK) {
while (1) {
if (nntp_gets(buf, sizeof buf) < 0) {
extra = 0;
break;
}
if (*buf == '.') {
break;
}
art = atol(buf);
if (art >= absfirst && art <= lastart) {
ctl_set(art);
}
}
} else {
extra = 0;
}
#else
if ((dirp = opendir(".")) != 0) {
register struct direct *dp;
while ((dp = readdir(dirp)) != Null(struct direct *)) {
register char *p;
for (p = dp->d_name; *p; p++) {
if (!isdigit(*p)) {
goto nope;
}
}
art = atol(dp->d_name);
if (art >= absfirst && art <= lastart) {
ctl_set(art);
}
nope: ;
}
closedir(dirp);
} else {
extra = 0;
}
#endif
} else {
ctlarea = Nullch;
}
for (domain = &unk_domain; domain; domain = next_domain) {
next_domain = domain->link;
for (article = domain->ids; article; article = next_art) {
next_art = article->id_link;
if (!article->subject) {
continue;
}
if (article->num < absfirst
|| (extra && !ctl_check(article->num))) {
article->subject->count--;
article->subject = 0;
article->flags &= ~HAS_XREFS;
article->author->count--;
article->author = 0;
/* Free expired article if it has no children. Then check
** if the parent(s) are also fake and can be freed. We'll
** free any empty roots later.
*/
while (!article->children) {
hold = article->parent;
unlink_child(article);
free_article(article);
if (hold && !hold->subject) {
if ((article = hold) == next_art) {
next_art = next_art->id_link;
}
} else {
break;
}
}
expired_count++;
}/* if */
}/* for */
}/* for */
next_domain = Null(DOMAIN*);
safefree(&ctlarea);
}
/* Trim the article chains down so that we don't have more than one faked
** article between the root and any real ones.
*/
void
trim_roots()
{
register ROOT *root, *last_root;
register ARTICLE *article, *next;
register SUBJECT *subject, *last_subj;
register int found;
#ifndef lint
last_root = (ROOT *)&root_root;
#else
last_root = Null(ROOT*);
#endif
for (root = root_root; root; root = last_root->link) {
for (article = root->articles; article; article = article->siblings) {
/* If an article has no subject, it is a "fake" reference node.
** If all of its immediate children are also fakes, delete it
** and graduate the children to the root. If everyone is fake,
** the chain dies.
*/
while (!article->subject) {
found = 0;
for (next = article->children; next; next = next->siblings) {
if (next->subject) {
found = 1;
break;
}
}
if (!found) {
/* Remove this faked article and move all its children
** up to the root.
*/
next = article->children;
unlink_child(article);
free_article(article);
for (article = next; article; article = next) {
next = article->siblings;
article->parent = Nullart;
link_child(article);
}
article = root->articles; /* start this root over */
} else {
break; /* else, on to next article */
}
}
}
/* Free all unused subject strings. Begin by trying to find a
** subject for the root's pointer.
*/
for (subject = root->subjects; subject && !subject->count; subject = root->subjects) {
root->subjects = subject->link;
free_subject(subject);
root->subject_cnt--;
}
/* Then free up any unused intermediate subjects.
*/
if ((last_subj = subject) != Null(SUBJECT*)) {
while ((subject = subject->link) != Null(SUBJECT*)) {
if (!subject->count) {
last_subj->link = subject->link;
free_subject(subject);
root->subject_cnt--;
subject = last_subj;
} else {
last_subj = subject;
}
}
}
/* Now, free all roots without articles. Flag unexpeced errors.
*/
if (!root->articles) {
if (root->subjects) {
log_error("** Empty root still had subjects remaining! **\n");
}
last_root->link = root->link;
free_root(root);
} else {
last_root = root;
}
}
}
/* Descend the author list, find any author names that aren't used
** anymore and free them.
*/
void
trim_authors()
{
register AUTHOR *author, *last_author;
#ifndef lint
last_author = (AUTHOR *)&author_root;
#else
last_author = Null(AUTHOR*);
#endif
for (author = author_root; author; author = last_author->link) {
if (!author->count) {
last_author->link = author->link;
free_author(author);
} else {
last_author = author;
}
}
}
/* Reorder the roots to place the oldest ones first (age determined by
** date of oldest article).
*/
void
order_roots()
{
register ROOT *root, *next, *search, *link;
/* If we don't have at least two roots, we're done! */
if (!(root = root_root) || !(next = root->link)) {
return; /* RETURN */
}
/* Break the old list off after the first root, and then start
** inserting the roots into the list by date.
*/
root->link = Null(ROOT*);
while ((root = next) != Null(ROOT*)) {
next = next->link;
if ((search = root_root)->articles->date >= root->articles->date) {
root->link = root_root;
root_root = root;
} else {
register time_t radate = root->articles->date;
while ((link = search->link) != NULL
&& link->articles->date < radate) {
search = link;
}
root->link = link;
search->link = root;
}
}
}
#define EQ(x,y) ((isupper(x) ? tolower(x) : (x)) == (y))
/* Parse the subject into 72 characters or less. Remove any "Re[:^]"s from
** the front (noting that it's there), and any "(was: old)" stuff from
** the end. Then, compact multiple whitespace characters into one space,
** trimming leading/trailing whitespace. If it's still too long, unmercifully
** cut it off. We don't bother with subject continuation lines either.
*/
void
get_subject_str(str)
register char *str;
{
register char *cp;
register int len;
while (*str && (unsigned char)*str <= ' ') {
str++;
}
if (!*str) {
bcopy("<None>", subject_str, 7);
return; /* RETURN */
}
cp = str;
while (EQ(cp[0], 'r') && EQ(cp[1], 'e')) { /* check for Re: */
cp += 2;
if (*cp == '^') { /* allow Re^2: */
while (*++cp <= '9' && *cp >= '0') {
;
}
}
if (*cp != ':') {
break;
}
while (*++cp == ' ') {
;
}
found_Re = 1;
str = cp;
}
/* Remove "(was: oldsubject)", because we already know the old subjects.
** Also match "(Re: oldsubject)". Allow possible spaces after the ('s.
*/
for (cp = str; (cp = index(cp+1, '(')) != Nullch;) {
while (*++cp == ' ') {
;
}
if (EQ(cp[0], 'w') && EQ(cp[1], 'a') && EQ(cp[2], 's')
&& (cp[3] == ':' || cp[3] == ' '))
{
*--cp = '\0';
break;
}
if (EQ(cp[0], 'r') && EQ(cp[1], 'e')
&& ((cp[2]==':' && cp[3]==' ') || (cp[2]=='^' && cp[4]==':'))) {
*--cp = '\0';
break;
}
}
/* Copy subject to a temporary string, compacting multiple spaces/tabs */
for (len = 0, cp = subject_str; len < 72 && *str; len++) {
if ((unsigned char)*str <= ' ') {
while (*++str && (unsigned char)*str <= ' ') {
;
}
*cp++ = ' ';
} else {
*cp++ = *str++;
}
}
if (cp[-1] == ' ') {
cp--;
}
*cp = '\0';
}
#ifndef OLD_AUTHOR_CODE
/* Name-munging routines written by Ross Ridge. Public Domain.
** Enhanced by Wayne Davison.
*/
/* If necessary, compress a net user's full name by playing games with
** initials and the middle name(s). If we start with "Ross Douglas Ridge"
** we try "Ross D Ridge", "Ross Ridge", "R D Ridge" and finally "R Ridge"
** before simply truncating the thing. We also turn "R. Douglas Ridge"
** into "Douglas Ridge" and "Ross Ridge D.D.S." into "Ross Ridge" as a
** first step of the compaction, if needed.
*/
static char *
compress_name(name, max)
char *name;
int max;
{
register char *s, *last, *mid, *d;
register int len, namelen, midlen;
int notlast;
/* First remove white space from both ends. */
while (isspace(*name)) {
name++;
}
if ((len = strlen(name)) == 0) {
return name;
}
s = name + len - 1;
while (isspace(*s)) {
s--;
}
s[1] = '\0';
if (s - name + 1 <= max) {
return name;
}
/* Look for characters that likely mean the end of the name
** and the start of some hopefully uninteresting additional info.
** Spliting at a comma is somewhat questionalble, but since
** "Ross Ridge, The Great HTMU" comes up much more often than
** "Ridge, Ross" and since "R HTMU" is worse than "Ridge" we do
** it anyways.
*/
for (d = name + 1; *d; d++) {
if (*d == ',' || *d == ';' || *d == '(' || *d == '@'
|| (*d == '-' && (d[1] == '-' || d[1] == ' '))) {
*d-- = '\0';
s = d;
break;
}
}
/* Find the last name */
do {
notlast = 0;
while (isspace(*s)) {
s--;
}
s[1] = '\0';
len = s - name + 1;
if (len <= max) {
return name;
}
/* If the last name is an abbreviation it's not the one we want. */
if (*s == '.')
notlast = 1;
while (!isspace(*s)) {
if (s == name) { /* only one name */
name[max] = '\0';
return name;
}
if (isdigit(*s)) { /* probably a phone number */
notlast = 1; /* so chuck it */
}
s--;
}
} while (notlast);
last = s-- + 1;
/* Look for a middle name */
while (isspace(*s)) { /* get rid of any extra space */
len--;
s--;
}
mid = name;
while (!isspace(*mid)) {
mid++;
}
namelen = mid - name + 1;
if (mid == s+1) { /* no middle name */
mid = 0;
midlen = 0;
} else {
*mid++ = '\0';
while (isspace(*mid)) {
len--;
mid++;
}
midlen = s - mid + 2;
/* If first name is an initial and middle isn't and it all fits
** without the first initial, drop it. */
if (len > max && mid != s && mid[1] != '.'
&& (!name[1] || (name[1] == '.' && !name[2]))
&& len - namelen <= max) {
len -= namelen;
name = mid;
mid = 0;
}
}
s[1] = '\0';
if (mid && len > max) {
/* Turn middle names into intials */
len -= s - mid + 2;
d = s = mid;
while (*s) {
if (isalpha(*s)) {
if (d != mid) {
*d++ = ' ';
}
*d++ = *s++;
}
while (*s && !isspace(*s)) {
s++;
}
while (isspace(*s)) {
s++;
}
}
if (d != mid) {
*d = '\0';
midlen = d - mid + 1;
len += midlen;
} else {
mid = 0;
}
}
if (len > max) {
/* If the first name fits without the middle initials, drop them */
if (mid && len - midlen <= max) {
len -= midlen;
mid = 0;
} else {
/* Turn the first name into an initial */
len -= namelen - 2;
name[1] = '\0';
namelen = 2;
if (len > max) {
/* Dump the middle initials (if present) */
if (mid) {
len -= midlen;
mid = 0;
}
if (len > max) {
/* Finally just truncate the last name */
last[max - 2] = '\0';
}
}
}
}
/* Paste the names back together */
d = name + namelen;
if (mid) {
d[-1] = ' ';
strcpy(d, mid);
d += midlen;
}
d[-1] = ' ';
strcpy(d, last);
return name;
}
/* Compress an email address, trying to keep as much of the local part of
** the addresses as possible. The order of precence is @ ! %, but
** @ % ! may be better...
*/
static char *
compress_address(name, max)
char *name;
int max;
{
char *s, *at, *bang, *hack, *start;
int len;
/* Remove white space from both ends. */
while (isspace(*name)) {
name++;
}
if ((len = strlen(name)) == 0) {
return name;
}
s = name + len - 1;
while (isspace(*s)) {
s--;
}
s[1] = '\0';
if (*name == '<') {
name++;
if (*s == '>') {
*s-- = '\0';
}
}
if ((len = s - name + 1) <= max) {
return name;
}
at = bang = hack = NULL;
for (s = name + 1; *s; s++) {
/* If there's whitespace in the middle then it's probably not
** really an email address. */
if (isspace(*s)) {
name[max] = '\0';
return name;
}
switch (*s) {
case '@':
if (at == NULL) {
at = s;
}
break;
case '!':
if (at == NULL) {
bang = s;
hack = NULL;
}
break;
case '%':
if (at == NULL && hack == NULL) {
hack = s;
}
break;
}
}
if (at == NULL) {
at = name + len;
}
if (hack != NULL) {
if (bang != NULL) {
if (at - bang - 1 >= max) {
start = bang + 1;
} else if (at - name >= max) {
start = at - max;
} else {
start = name;
}
} else {
start = name;
}
} else if (bang != NULL) {
if (at - name >= max) {
start = at - max;
} else {
start = name;
}
} else {
start = name;
}
if (len - (start - name) > max) {
start[max] = '\0';
}
return start;
}
/* Extract the full-name part of an email address, returning NULL if not
** found.
*/
static char *
extract_name(name)
char *name;
{
char *s;
char *lparen, *rparen;
char *langle;
while (isspace(*name)) {
name++;
}
lparen = index(name, '(');
rparen = rindex(name, ')');
langle = index(name, '<');
if (!lparen && !langle) {
return NULL;
} else
if (langle && (!lparen || !rparen || lparen > langle || rparen < langle)) {
if (langle == name) {
return NULL;
}
*langle = '\0';
} else {
name = lparen;
*name++ = '\0';
while (isspace(*name)) {
name++;
}
if (name == rparen) {
return NULL;
}
if (rparen != NULL) {
*rparen = '\0';
}
}
if (*name == '"') {
name++;
while (isspace(*name)) {
name++;
}
if ((s = rindex(name, '"')) != NULL) {
*s = '\0';
}
}
return name;
}
/* Try to fit the author name in 16 bytes. Use the comment portion if
** present.
*/
void
get_author_str(addr)
char *addr;
{
char *s;
/* TODO: Do we need to eliminate ctrl chars here? */
if ((s = extract_name(addr)) != NULL) {
s = compress_name(s, 16);
} else {
s = compress_address(addr, 16);
}
strcpy(author_str, s);
}
#else /* Here's the old, simple method in case someone wants it. */
/* Try to fit the author name in 16 bytes. Use the comment portion in
** parenthesis if present. Cut off non-commented names at the '@' or '%'.
** Then, put as many characters as we can into the 16 bytes, packing multiple
** whitespace characters into a single space.
*/
void
get_author_str(str)
char *str;
{
register char *cp, *cp2;
if ((cp = index(str, '(')) != Nullch) {
str = cp+1;
if ((cp = rindex(str, ')')) != Nullch) {
*cp = '\0';
}
} else {
if ((cp = index(str, '@')) != Nullch) {
*cp = '\0';
}
if ((cp = index(str, '%')) != Nullch) {
*cp = '\0';
}
}
for (cp = str, cp2 = author_str; *cp && cp2-author_str < 16;) {
/* Pack white space and turn ctrl-chars into spaces. */
if (*cp <= ' ') {
while (*++cp && *cp <= ' ') {
;
}
if (cp2 != author_str) {
*cp2++ = ' ';
}
} else {
*cp2++ = *cp++;
}
}
*cp2 = '\0';
}
#endif
/* Take a message-id and see if we already know about it. If so, return it.
** If not, create it. We separate the id into its id@domain parts, and
** link all the unique ids to one copy of the domain portion. This saves
** a bit of space.
*/
ARTICLE *
get_article(msg_id)
char *msg_id;
{
register DOMAIN *domain;
register ARTICLE *article;
register char *cp, *after_at;
/* Take message id, break it up into <id@domain>, and try to match it.
*/
while (*msg_id == ' ') {
msg_id++;
}
cp = msg_id + strlen(msg_id) - 1;
if (msg_id >= cp) {
if (log_verbosity) {
log_error("Message-ID is empty! [%ld]\n", num);
}
return Nullart;
}
if (*msg_id++ != '<') {
if (log_verbosity) {
log_error("Message-ID doesn't start with '<' [%ld]\n", num);
}
msg_id--;
}
if (*cp != '>') {
if (log_verbosity) {
log_error("Message-ID doesn't end with '>' [%ld]\n", num);
}
cp++;
}
*cp = '\0';
if (msg_id == cp) {
if (log_verbosity) {
log_error("Message-ID is null! [%ld]\n", num);
}
return Nullart;
}
if ((after_at = index(msg_id, '@')) == Nullch) {
domain = &unk_domain;
} else {
*after_at++ = '\0';
for (cp = after_at; *cp; cp++) {
if (isupper(*cp)) {
*cp = tolower(*cp); /* lower-case domain portion */
}
}
*cp = '\0';
/* Try to find domain name in database. */
for (domain = unk_domain.link; domain; domain = domain->link) {
if (strEQ(domain->name, after_at)) {
break;
}
}
if (!domain) { /* if domain doesn't exist, create it */
register int len = cp - after_at + 1;
domain = (DOMAIN *)safemalloc(sizeof (DOMAIN));
total.domain++;
domain->name = safemalloc(len);
total.string2 += len;
bcopy(after_at, domain->name, len);
domain->ids = Nullart;
domain->link = unk_domain.link;
unk_domain.link = domain;
}
}
/* Try to find id in this domain. */
for (article = domain->ids; article; article = article->id_link) {
if (strEQ(article->id, msg_id)) {
break;
}
}
if (!article) { /* If it doesn't exist, create an article */
register int len = strlen(msg_id) + 1;
article = (ARTICLE *)safemalloc(sizeof (ARTICLE));
bzero(article, sizeof (ARTICLE));
total.article++;
article->num = 0;
article->id = safemalloc(len);
total.string2 += len;
bcopy(msg_id, article->id, len);
article->domain = domain;
article->id_link = domain->ids;
domain->ids = article;
}
return article;
}
/* Take all the data we've accumulated about the article and shove it into
** the article tree at the best place we can possibly imagine.
*/
void
insert_article(article, date)
ARTICLE *article;
time_t date;
{
register ARTICLE *node, *last;
register char *cp, *end;
#ifndef USE_NNTP
int len;
#endif
if (article->subject) {
if (log_verbosity) {
log_error("We've already seen article #%ld (%s@%s)\n",
num, article->id, article->domain->name);
}
return; /* RETURN */
}
article->date = date;
article->num = num;
article->flags = 0;
if (!*references && found_Re) {
if (log_verbosity > 1) {
log_error("Missing reference line! [%ld]\n", num);
}
}
/* If the article has a non-zero root, it is already in a thread somewhere.
** Unlink it to try to put it in the best possible spot.
*/
if (article->root) {
/* Check for a real or shared-fake parent. Articles that have never
** existed have a num of 0. Expired articles that remain as references
** have a valid num. (Valid date too, but no subject.)
*/
for (node = article->parent;
node && !node->num && node->child_cnt == 1;
node = node->parent)
{
;
}
unlink_child(article);
if (node) { /* do we have decent parents? */
/* Yes: assume that our references are ok, and just reorder us
** with our siblings by date.
*/
link_child(article);
use_root(article, article->root);
/* Freshen the date in any faked parent articles. */
for (node = article->parent;
node && !node->num && date < node->date;
node = node->parent)
{
node->date = date;
unlink_child(node);
link_child(node);
}
return; /* RETURN */
}
/* We'll assume that this article has as good or better references
** than the child that faked us initially. Free the fake reference-
** chain and process our references as usual.
*/
for (node = article->parent; node; node = last) {
unlink_child(node);
last = node->parent;
free_article(node);
}
article->parent = Nullart; /* neaten up */
article->siblings = Nullart;
}
check_references:
if (!*references) { /* If no references but "Re:" in subject, */
if (found_Re) { /* search for a reference in any cited text */
#ifndef USE_NNTP
for (len = 4; len && fgets(buf, sizeof buf, fp_article); len--) {
if ((cp = index(buf, '<')) && (end = index(cp, ' '))) {
if (end[-1] == ',') {
end--;
}
*end = '\0';
if ((end = index(cp, '>')) == Nullch) {
end = cp + strlen(cp) - 1;
}
if (valid_message_id(cp, end)) {
strcpy(references+1, cp);
*references = ' ';
if (log_verbosity > 2) {
log_error("Found cited-text reference: '%s' [%ld]\n",
references+1, num);
}
break;
}
}
}
#endif
} else {
article->flags |= ROOT_ARTICLE;
}
}
/* If we have references, process them from the right end one at a time
** until we either run into somebody, or we run out of references.
*/
if (*references) {
last = article;
node = Nullart;
end = references + strlen(references) - 1;
while ((cp = rindex(references, '<')) != Nullch) {
while (end >= cp && ((unsigned char)*end <= ' ' || *end == ',')) {
end--;
}
end[1] = '\0';
/* Quit parsing references if this one is garbage. */
if (!valid_message_id(cp, end)) {
if (log_verbosity) {
log_error("Bad ref '%s' [%ld]\n", cp, num);
}
break;
}
/* Dump all domains that end in '.', such as "..." & "1@DEL." */
if (end[-1] == '.') {
break;
}
node = get_article(cp);
*cp = '\0';
/* Check for duplicates on the reference line. Brand-new data has
** no date. Data we just allocated earlier on this line has a
** date but no root. Special-case the article itself, since it
** MIGHT have a root.
*/
if ((node->date && !node->root) || node == article) {
if (log_verbosity) {
log_error("Reference line contains duplicates [%ld]\n",
num);
}
if ((node = last) == article) {
node = Nullart;
}
continue;
}
last->parent = node;
link_child(last);
if (node->root) {
break;
}
node->date = date;
last = node;
end = cp-1;
}
if (!node) {
*references = '\0';
goto check_references;
}
/* Check if we ran into anybody that was already linked. If so, we
** just use their root.
*/
if (node->root) {
/* See if this article spans the gap between what we thought
** were two different roots.
*/
if (article->root && article->root != node->root) {
merge_roots(node->root, article->root);
/* Set the roots of any children we brought with us. */
set_root(article, node->root);
}
use_root(article, node->root);
} else {
/* We didn't find anybody we knew, so either create a new root or
** use the article's root if it was previously faked.
*/
if (!article->root) {
make_root(node);
use_root(article, node->root);
} else {
node->root = article->root;
link_child(node);
use_root(article, article->root);
}
}
/* Set the roots of the faked articles we created as references. */
for (node = article->parent; node && !node->root; node = node->parent) {
node->root = article->root;
}
/* Make sure we didn't circularly link to a child article(!), by
** ensuring that we run into the root before we run into ourself.
*/
while (node && node->parent != article) {
node = node->parent;
}
if (node) {
/* Ugh. Someone's tweaked reference line with an incorrect
** article-order arrived first, and one of our children is
** really one of our ancestors. Cut off the bogus child branch
** right where we are and link it to the root.
*/
if (log_verbosity) {
log_error("Found ancestral child -- fixing.\n");
}
unlink_child(node);
node->parent = Nullart;
link_child(node);
}
} else {
/* The article has no references. Either turn it into a new root, or
** re-attach fleshed-out (previously faked) article to its old root.
*/
if (!article->root) {
make_root(article);
} else {
link_child(article);
use_root(article, article->root);
}
}
}
/* Check if the string we've found looks like a valid message-id reference.
*/
int
valid_message_id(start, end)
register char *start, *end;
{
char *mid;
if (start == end) {
return 0;
}
if (*end != '>') {
/* Compensate for space cadets who include the header in their
** subsitution of all '>'s into another citation character.
*/
if (*end == '<' || *end == '-' || *end == '!' || *end == '%'
|| *end == ')' || *end == '|' || *end == ':' || *end == '}'
|| *end == '*' || *end == '+' || *end == '#' || *end == ']'
|| *end == '@' || *end == '$') {
if (log_verbosity) {
log_error("Reference ended in '%c' [%ld]\n", *end, num);
}
*end = '>';
}
} else if (end[-1] == '>') {
if (log_verbosity) {
log_error("Reference ended in '>>' [%ld]\n", num);
}
*(end--) = '\0';
}
/* Id must be "<...@...>" */
if (*start != '<' || *end != '>' || (mid = index(start, '@')) == Nullch
|| mid == start+1 || mid+1 == end) {
return 0; /* RETURN */
}
return 1;
}
/* Remove an article from its parent/siblings. Leave parent pointer intact.
*/
void
unlink_child(child)
register ARTICLE *child;
{
register ARTICLE *last;
if (!(last = child->parent)) {
child->root->thread_cnt--;
if ((last = child->root->articles) == child) {
child->root->articles = child->siblings;
} else {
goto sibling_search;
}
} else {
last->child_cnt--;
if (last->children == child) {
last->children = child->siblings;
} else {
last = last->children;
sibling_search:
while (last->siblings != child) {
last = last->siblings;
}
last->siblings = child->siblings;
}
}
}
/* Link an article to its parent article. If its parent pointer is zero,
** link it to its root. Sorts siblings by date.
*/
void
link_child(child)
register ARTICLE *child;
{
register ARTICLE *node;
register ROOT *root;
if (!(node = child->parent)) {
root = child->root;
root->thread_cnt++;
node = root->articles;
if (!node || child->date < node->date) {
child->siblings = node;
root->articles = child;
} else {
goto sibling_search;
}
} else {
node->child_cnt++;
node = node->children;
if (!node || child->date < node->date) {
child->siblings = node;
child->parent->children = child;
} else {
sibling_search:
for (; node->siblings; node = node->siblings) {
if (node->siblings->date > child->date) {
break;
}
}
child->siblings = node->siblings;
node->siblings = child;
}
}
}
/* Create a new root for the specified article. If the current subject_str
** matches any pre-existing root's subjects, we'll instead add it on as a
** parallel thread.
*/
void
make_root(article)
register ARTICLE *article;
{
register ROOT *new, *node;
register SUBJECT *subject;
#ifndef NO_SUBJECT_MATCHING
/* First, check the other root's subjects for a match. */
for (node = root_root; node; node = node->link) {
for (subject = node->subjects; subject; subject = subject->link) {
if (subject_equal(subject->str, subject_str)) {
use_root(article, node); /* use it instead */
link_child(article);
return; /* RETURN */
}
}
}
#endif
/* Create a new root. */
new = (ROOT *)safemalloc(sizeof (ROOT));
total.root++;
new->articles = article;
new->root_num = article->num;
new->thread_cnt = 1;
if (article->num) {
article->author = new_author();
new->subject_cnt = 1;
new->subjects = article->subject = new_subject();
} else {
new->subject_cnt = 0;
new->subjects = Null(SUBJECT*);
}
article->root = new;
new->link = root_root;
root_root = new;
}
/* Add this article's subject onto the indicated root's list. Point the
** article at the root.
*/
void
use_root(article, root)
ARTICLE *article;
ROOT *root;
{
register SUBJECT *subject;
register ROOT *root2;
SUBJECT *hold, *child_subj = Null(SUBJECT*), *sib_subj = Null(SUBJECT*);
ARTICLE *node;
article->root = root;
/* If it's a fake, there's no subject to add. */
if (!article->num) {
return; /* RETURN */
}
/* If we haven't picked a unique message number to represent this root,
** use the first non-zero number we encounter. Which one doesn't matter.
*/
if (!root->root_num) {
root->root_num = article->num;
}
article->author = new_author();
/* Check if the new subject matches any of the other subjects in this root.
** If so, we just update the count. If not, check all the other roots for
** a match. If found, the new subject is common between the two roots, so
** we merge the two roots together.
*/
root2 = root;
#ifndef NO_SUBJECT_MATCHING
do {
#endif
for (subject = root2->subjects; subject; subject = subject->link) {
if (subject_equal(subject->str, subject_str)) {
article->subject = subject;
subject->count++;
#ifndef NO_SUBJECT_MATCHING
if (root2 != root) {
merge_roots(root, root2);
}
#endif
return; /* RETURN */
}
}
#ifndef NO_SUBJECT_MATCHING
if ((root2 = root2->link) == Null(ROOT*)) {
root2 = root_root;
}
} while (root2 != root);
#endif
article->subject = hold = new_subject();
root->subject_cnt++;
/* Find the subject of any pre-existing children or siblings. We want
** to insert the new subject before one of these to keep the numbering
** intuitive in the newsreader. Never insert prior to our parent's
** subject, however.
*/
for (node = article->children; node; node = node->children) {
if (node->subject) {
child_subj = node->subject;
break;
}
}
for (node = article->siblings; node; node = node->siblings) {
if (node->subject) {
sib_subj = node->subject;
break;
}
}
if (article->parent) {
if (article->parent->subject == child_subj) {
child_subj = Null(SUBJECT*);
}
if (article->parent->subject == sib_subj) {
sib_subj = Null(SUBJECT*);
}
}
if (!(subject = root->subjects)
|| subject == child_subj || subject == sib_subj) {
hold->link = root->subjects;
root->subjects = hold;
} else {
while (subject->link
&& subject->link != child_subj && subject->link != sib_subj) {
subject = subject->link;
}
hold->link = subject->link;
subject->link = hold;
}
}
/* Check subjects in a case-insignificant, punctuation-ignoring manner.
*/
int
subject_equal(str1, str2)
register char *str1, *str2;
{
register char ch1, ch2;
while ((ch1 = *str1++)) {
if (ch1 == ' ' || ispunct(ch1)) {
while (*str1 && (*str1 == ' ' || ispunct(*str1))) {
str1++;
}
ch1 = ' ';
} else if (isupper(ch1)) {
ch1 = tolower(ch1);
}
if (!(ch2 = *str2++)) {
return 0;
}
if (ch2 == ' ' || ispunct(ch2)) {
while (*str2 && (*str2 == ' ' || ispunct(*str2))) {
str2++;
}
ch2 = ' ';
} else if (isupper(ch2)) {
ch2 = tolower(ch2);
}
if (ch1 != ch2) {
return 0;
}
}
if (*str2) {
return 0;
}
return 1;
}
/* Create a new subject structure. */
SUBJECT *
new_subject()
{
register int len = strlen(subject_str) + 1;
register SUBJECT *subject;
subject = (SUBJECT *)safemalloc(sizeof (SUBJECT));
total.subject++;
subject->count = 1;
subject->link = Null(SUBJECT*);
subject->str = safemalloc(len);
total.string1 += len;
bcopy(subject_str, subject->str, len);
return subject;
}
/* Create a new author structure. */
AUTHOR *
new_author()
{
register len = strlen(author_str) + 1;
register AUTHOR *author, *last_author;
last_author = Null(AUTHOR*);
for (author = author_root; author; author = author->link) {
#ifndef DONT_COMPARE_AUTHORS /* might like to define this to save time */
if (strEQ(author->name, author_str)) {
author->count++;
return author; /* RETURN */
}
#endif
last_author = author;
}
author = (AUTHOR *)safemalloc(sizeof (AUTHOR));
total.author++;
author->count = 1;
author->link = Null(AUTHOR*);
author->name = safemalloc(len);
total.string1 += len;
bcopy(author_str, author->name, len);
if (last_author) {
last_author->link = author;
} else {
author_root = author;
}
return author;
}
/* Insert all of root2 into root1, setting the proper root values and
** updating subject counts.
*/
void
merge_roots(root1, root2)
ROOT *root1, *root2;
{
register ARTICLE *node, *next;
register SUBJECT *subject;
/* Remember whoever's root num is lower. This could screw up a
** newsreader's kill-thread code if someone already saw the roots as
** being separate, but it must be done. The newsreader code will have
** to handle this as best as it can.
*/
if (root1->root_num > root2->root_num) {
root1->root_num = root2->root_num;
}
for (node = root2->articles; node; node = next) {
/* For each article attached to root2: detach it, set the branch's
** root pointer to root1, and then attach it to root1.
*/
next = node->siblings;
unlink_child(node);
node->siblings = Nullart;
set_root(node, root1); /* sets children too */
/* Link_child() depends on node->parent being null and node->root
** being set.
*/
link_child(node);
}
root1->subject_cnt += root2->subject_cnt;
if (!(subject = root1->subjects)) {
root1->subjects = root2->subjects;
} else {
while (subject->link) {
subject = subject->link;
}
subject->link = root2->subjects;
}
unlink_root(root2);
free_root(root2);
}
/* When merging roots, we need to reset all the root pointers.
*/
void
set_root(node, root)
ARTICLE *node;
ROOT *root;
{
while (node) {
node->root = root;
if (node->children) {
set_root(node->children, root);
}
node = node->siblings;
}
}
/* Unlink a root from its neighbors. */
void
unlink_root(root)
register ROOT *root;
{
register ROOT *node;
if ((node = root_root) == root) {
root_root = root->link;
} else {
while (node->link != root) {
node = node->link;
}
node->link = root->link;
}
}
/* Free an article and its message-id string. All other resources must
** already be free, and it must not be attached to any threads.
*/
void
free_article(this)
ARTICLE *this;
{
register ARTICLE *art;
if ((art = this->domain->ids) == this) {
if (!(this->domain->ids = this->id_link)) {
free_domain(this->domain);
}
} else {
while (this != art->id_link) {
art = art->id_link;
}
art->id_link = this->id_link;
}
total.string2 -= strlen(this->id) + 1;
free(this->id);
free(this);
total.article--;
}
/* Free the domain only when its last unique id has been freed. */
void
free_domain(this)
DOMAIN *this;
{
register DOMAIN *domain;
if (this == (domain = &unk_domain)) {
return;
}
if (this == next_domain) { /* help expire routine skip freed domains */
next_domain = next_domain->link;
}
while (this != domain->link) {
domain = domain->link;
}
domain->link = this->link;
total.string2 -= strlen(this->name) + 1;
free(this->name);
free(this);
total.domain--;
}
/* Free the subject structure and its string. */
void
free_subject(this)
SUBJECT *this;
{
total.string1 -= strlen(this->str) + 1;
free(this->str);
free(this);
total.subject--;
}
/* Free a root. It must already be unlinked. */
void
free_root(this)
ROOT *this;
{
free(this);
total.root--;
}
/* Free the author structure when it's not needed any more. */
void
free_author(this)
AUTHOR *this;
{
total.string1 -= strlen(this->name) + 1;
free(this->name);
free(this);
total.author--;
}
#if defined(USE_NNTP) && !defined(HAS_USLEEP)
usleep(usec)
long usec;
{
# ifndef USELECT
if (usec /= 1000000) {
sleep((int)usec);
}
# else
struct timeval t;
if (usec <= 0) {
return;
}
t.tv_usec = usec % 1000000;
t.tv_sec = usec / 1000000;
(void) select(1, 0, 0, 0, &t);
# endif
}
#endif