4.4BSD/usr/src/contrib/news/trn3/rt-mt.c

/* $Id: rt-mt.c,v 3.0 1992/12/14 00:14:13 davison Trn $
*/

#include "EXTERN.h"
#include "common.h"
#include "intrp.h"
#include "trn.h"
#include "cache.h"
#include "bits.h"
#include "ng.h"
#include "ngdata.h"
#include "rcln.h"
#include "util.h"
#include "hash.h"
#include "nntp.h"
#include "rthread.h"
#include "rt-process.h"

#ifdef USE_MT
#include "INTERN.h"
#include "rt-mt.h"

extern HASHTABLE *msgid_hash;
extern bool try_mt;

#ifndef USE_XTHREAD
static FILE *fp;
#endif
static bool word_same, long_same;
static BMAP my_bmap, mt_bmap;

static char *strings = Nullch;
static WORD *author_cnts = 0;
static WORD *ids = 0;

static ARTICLE **article_array = 0;
static SUBJECT **subject_array = 0;
static char **author_array = 0;

static TOTAL total;
static PACKED_ROOT p_root;
static PACKED_ARTICLE p_article;

/* Initialize our thread code by determining the byte-order of the thread
** files and our own current byte-order.  If they differ, set flags to let
** the read code know what we'll need to translate.
*/
bool
mt_init()
{
    int i;
#ifdef USE_XTHREAD
    long size;
#endif
    bool success = TRUE;			/* I'm an optimist */

    word_same = long_same = TRUE;
#ifdef USE_XTHREAD
    sprintf(ser_line, "XTHREAD DBINIT");
    nntp_command(ser_line);
    size = nntp_readcheck();
    if (size < 0)
	return FALSE;
    size = nntp_read((char*)&mt_bmap, (long)sizeof (BMAP));
    if (size >= sizeof (BMAP) - 1) {
#else /* !USE_XTHREAD */
    if ((fp = fopen(filexp(DBINIT), FOPEN_RB)) != Nullfp
     && fread((char*)&mt_bmap, 1, sizeof (BMAP), fp) >= sizeof (BMAP) - 1) {
#endif
	if (mt_bmap.version != DB_VERSION) {
	    printf("\nMthreads database is the wrong version -- ignoring it.\n")
		FLUSH;
	    return FALSE;
	}
	mybytemap(&my_bmap);
	for (i = 0; i < sizeof (LONG); i++) {
	    if (i < sizeof (WORD)) {
		if (my_bmap.w[i] != mt_bmap.w[i]) {
		    word_same = FALSE;
		}
	    }
	    if (my_bmap.l[i] != mt_bmap.l[i]) {
		long_same = FALSE;
	    }
	}
    } else
	success = FALSE;
#ifdef USE_XTHREAD
    while (nntp_read(ser_line, (long)sizeof ser_line))
	;		/* trash any extraneous bytes */
#else
    if (fp != Nullfp)
	fclose(fp);
#endif
    return success;
}

/* Open and process the data in the group's thread file.  Returns TRUE unless
** we discovered a bogus thread file, destroyed the cache, and re-built it.
*/
bool
mt_data()
{
    bool success = TRUE;
#ifdef USE_XTHREAD		/* use remote thread file? */
    long size;

    sprintf(ser_line, "XTHREAD THREAD");
    nntp_command(ser_line);
    size = nntp_readcheck();
    if (size < 0)
	return TRUE;

    printf("\nGetting thread file."), fflush(stdout);
    if (nntp_read((char*)&total, (long)sizeof (TOTAL)) < sizeof (TOTAL))
	goto exit;

#else /* !USE_XTHREAD */
    if ((fp = fopen(mt_name(ngname), FOPEN_RB)) == Nullfp)
	return TRUE;
    printf("\nReading thread file."), fflush(stdout);

    if (fread((char*)&total, 1, sizeof (TOTAL), fp) < sizeof (TOTAL))
	goto exit;

#endif /* !USE_XTHREAD */

    lp_bmap(&total.first, 4);
    wp_bmap(&total.root, 5);
    if (!total.root) {
	tweak_data();
	goto exit;
    }
    if (total.last > lastart)
	grow_cache(total.last);

    if (read_authors()
     && read_subjects()
     && read_roots()
     && read_articles()
     && read_ids())
    {
	tweak_data();
	first_cached = absfirst;
	last_cached = (total.last < absfirst ? absfirst-1: total.last);
	cached_all_in_range = TRUE;
	goto exit;
    }
    /* Something failed.  Safefree takes care of checking if some items
    ** were already freed.  Any partially-allocated structures were freed
    ** before we got here.  All other structures are cleaned up now.
    */
    close_cache();
    safefree(&strings);
    safefree((char**)&article_array);
    safefree((char**)&subject_array);
    safefree((char**)&author_array);
    safefree((char**)&ids);
    try_mt = FALSE;
    build_cache();
    try_mt = TRUE;
    success = FALSE;

exit:
#ifdef USE_XTHREAD
    while (nntp_read(ser_line, (long)sizeof ser_line))
	;		/* trash any extraneous bytes */
#else
    fclose(fp);
#endif
    return success;
}

#ifndef USE_XTHREAD
/* Change a newsgroup name into the name of the thread data file.  We
** subsitute any '.'s in the group name into '/'s (unless LONG_THREAD_NAMES
** is defined), prepend the path, and append the '/.thread' or '.th' on to
** the end.
*/
static char *
mt_name(group)
char *group;
{
#ifdef LONG_THREAD_NAMES
    sprintf(buf, "%s/%s", threaddir, group);
#else
    register char *cp;

    cp = strcpy(buf, threaddir) + strlen(threaddir);
    *cp++ = '/';
    strcpy(cp, group);
    while ((cp = index(cp, '.')))
	*cp = '/';
#endif
    if (threaddir == spool)
	strcat(buf, "/.thread");
    else
	strcat(buf, ".th");
    return buf;
}
#endif

static char *subject_strings, *string_end;

/* The author information is an array of use-counts, followed by all the
** null-terminated strings crammed together.  The subject strings are read
** in at the same time, since they are appended to the end of the author
** strings.
*/
static int
read_authors()
{
    register int count;
    register char *string_ptr, **author_ptr;

    if (!read_item((char**)&author_cnts, (MEM_SIZE)total.author*sizeof (WORD)))
	return 0;
    safefree((char**)&author_cnts);   /* we don't need these */

    if (!read_item(&strings, (MEM_SIZE)total.string1))
	return 0;

    string_ptr = strings;
    string_end = string_ptr + total.string1;
    if (string_end[-1] != '\0') {
	/*error("first string table is invalid.\n");*/
	return 0;
    }

    /* We'll use this array to point each article at its proper author
    ** (the packed values were saved as indexes).
    */
    author_array = (char**)safemalloc(total.author * sizeof (char*));
    author_ptr = author_array;

    for (count = total.author; count; count--) {
	if (string_ptr >= string_end)
	    break;
	*author_ptr++ = string_ptr;
	string_ptr += strlen(string_ptr) + 1;
    }
    subject_strings = string_ptr;

    if (count) {
	/*error("author unpacking failed.\n");*/
	return 0;
    }
    return 1;
}

/* The subject values consist of the crammed-together null-terminated strings
** (already read in above) and the use-count array.  They were saved in the
** order that the roots require while being unpacked.
*/
static int
read_subjects()
{
    register int count;
    register char *string_ptr;
    register SUBJECT **subj_ptr;
    WORD *subject_cnts;

    if (!read_item((char**)&subject_cnts,
		   (MEM_SIZE)total.subject * sizeof (WORD))) {
	/* (Error already logged.) */
	return 0;
    }
    free((char*)subject_cnts);		/* we don't need these */

    /* Use this array when unpacking the article's subject offset. */
    subject_array = (SUBJECT**)safemalloc(total.subject * sizeof (SUBJECT*));
    subj_ptr = subject_array;

    string_ptr = subject_strings;	/* string_end is already set */

    for (count = total.subject; count; count--) {
	int len;
	ARTICLE arty;
	if (string_ptr >= string_end)
	    break;
	len = strlen(string_ptr);
	arty.subj = 0;
	set_subj_line(&arty, string_ptr, len);
	if (len == 72)
	    arty.subj->flags |= SF_SUBJTRUNCED;
	string_ptr += len + 1;
	*subj_ptr++ = arty.subj;
    }
    if (count || string_ptr != string_end) {
	/*error("subject data is invalid.\n");*/
	return 0;
    }
    return 1;
}

/* Read in the packed root structures to set each subject's thread article
** offset.  This gets turned into a real pointer later.
*/
static int
read_roots()
{
    register int count, i;
    register SUBJECT **subj_ptr;
    int ret;

    subj_ptr = subject_array;

    for (count = total.root; count--; ) {
#ifdef USE_XTHREAD
	ret = nntp_read((char*)&p_root, (long)sizeof (PACKED_ROOT));
#else
	ret = fread((char*)&p_root, 1, sizeof (PACKED_ROOT), fp);
#endif
	if (ret != sizeof (PACKED_ROOT)) {
	    /*error("failed root read -- %d bytes instead of %d.\n",
		ret, sizeof (PACKED_ROOT));*/
	    return 0;
	}
	wp_bmap(&p_root.articles, 3);	/* converts subject_cnt too */
	if (p_root.articles < 0 || p_root.articles >= total.article) {
	    /*error("root has invalid values.\n");*/
	    return 0;
	}
	i = p_root.subject_cnt;
	if (i <= 0 || (subj_ptr - subject_array) + i > total.subject) {
	    /*error("root has invalid values.\n");*/
	    return 0;
	}
	subj_ptr[i-1]->thread_link = subj_ptr[0];
	while (i--) {
	    union { ARTICLE *ap; int num; } uni;
	    if (i)
		subj_ptr[0]->thread_link = subj_ptr[1];
	    subj_ptr[0]->flags &= ~SF_THREAD;
	    uni.num = p_root.articles;
	    (*subj_ptr++)->thread = uni.ap;
	}
    }
    return 1;
}

static bool invalid_data;

/* A simple routine that checks the validity of the article's subject value.
** A -1 means that it is NULL, otherwise it should be an offset into the
** subject array we just unpacked.
*/
static SUBJECT *
the_subject(num)
WORD num;
{
    if (num == -1)
	return Nullsubj;
    if (num < 0 || num >= total.subject) {
	/*printf("Invalid subject in thread file: %d [%ld]\n", num, art_num);*/
	invalid_data = TRUE;
	return Nullsubj;
    }
    return subject_array[num];
}

/* Ditto for author checking. */
static char *
the_author(num)
WORD num;
{
    if (num == -1)
	return Nullch;
    if (num < 0 || num >= total.author) {
	/*error("invalid author in thread file: %d [%ld]\n", num, art_num);*/
	invalid_data = TRUE;
	return Nullch;
    }
    return savestr(author_array[num]);
}

/* Our parent/sibling information is a relative offset in the article array.
** zero for none.  Child values are always found in the very next array
** element if child_cnt is non-zero.
*/
static ARTICLE *
the_article(relative_offset, num)
WORD relative_offset;
int num;
{
    union { ARTICLE *ap; int num; } uni;

    if (!relative_offset)
	return Nullart;
    num += relative_offset;
    if (num < 0 || num >= total.article) {
	/*error("invalid article offset in thread file.\n");*/
	invalid_data = TRUE;
	return Nullart;
    }
    uni.num = num+1;
    return uni.ap;		/* slip them an offset in disguise */
}

/* Read the articles into their trees.  Point everything everywhere. */
static int
read_articles()
{
    register int count;
    register ARTICLE *article, **art_ptr;
    int ret;

    /* Build an array to interpret interlinkages of articles. */
    article_array = (ARTICLE**)safemalloc(total.article * sizeof (ARTICLE*));
    art_ptr = article_array;

    invalid_data = FALSE;
    for (count = 0; count < total.article; count++) {
#ifdef USE_XTHREAD
	ret = nntp_read((char*)&p_article, (long)sizeof (PACKED_ARTICLE));
#else
	ret = fread((char*)&p_article, 1, sizeof (PACKED_ARTICLE), fp);
#endif
	if (ret != sizeof (PACKED_ARTICLE)) {
	    /*error("failed article read -- %d bytes instead of %d.\n",
		ret, sizeof (PACKED_ARTICLE));*/
	    return 0;
	}
	lp_bmap(&p_article.num, 2);
	wp_bmap(&p_article.subject, 8);

	article = *art_ptr++ = allocate_article(p_article.num);
	article->date = p_article.date;
#ifndef DBM_XREFS
	if (olden_days < 2 && !(p_article.flags & HAS_XREFS))
	    article->xrefs = nullstr;
#endif
	article->from = the_author(p_article.author);
	article->parent = the_article(p_article.parent, count);
	article->child1 = the_article(p_article.child_cnt ? 1 : 0, count);
	article->sibling = the_article(p_article.sibling, count);
	article->subj = the_subject(p_article.subject);
	if (invalid_data) {
	    /* (Error already logged.) */
	    return 0;
	}
	/* This is ok because parent articles precede their children */
	if (article->parent) {
	    union { ARTICLE *ap; int num; } uni;
	    uni.ap = article->parent;
	    article->parent = article_array[uni.num-1];
	}
	if (article->subj) {
	    if (!(article->flags & AF_MISSING)) {
		article->flags |= AF_FROMTRUNCED | AF_THREADED
		    | ((p_article.flags & ROOT_ARTICLE)? 0 : AF_HAS_RE);
	    }
	    /* Give this subject to any faked parent articles */
	    while (article->parent && !article->parent->subj) {
		article->parent->subj = article->subj;
		article = article->parent;
	    }
	} else
	    article->flags |= AF_FAKE;
    }

    /* We're done with most of the pointer arrays at this point. */
    safefree((char**)&subject_array);
    safefree((char**)&author_array);
    safefree(&strings);

    return 1;
}

/* Read the message-id strings and attach them to each article.  The data
** format consists of the mushed-together null-terminated strings (a domain
** name followed by all its unique-id prefixes) and then the article offsets
** to which they belong.  The first domain name was omitted, as it is a null
** domain for those truly weird message-id's without '@'s.
*/
static int
read_ids()
{
    register ARTICLE *article;
    register char *string_ptr;
    register int i, count, len, len2;

    if (!read_item(&strings, (MEM_SIZE)total.string2)
     || !read_item((char**)&ids,
		(MEM_SIZE)(total.article+total.domain+1) * sizeof (WORD))) {
	return 0;
    }
    wp_bmap(ids, total.article + total.domain + 1);

    string_ptr = strings;
    string_end = string_ptr + total.string2;

    if (string_end[-1] != '\0') {
	/*error("second string table is invalid.\n");*/
	return 0;
    }

    for (i = 0, count = total.domain + 1; count--; i++) {
	if (i) {
	    if (string_ptr >= string_end) {
		/*error("error unpacking domain strings.\n");*/
		return 0;
	    }
	    sprintf(buf, "@%s", string_ptr);
	    len = strlen(string_ptr) + 1;
	    string_ptr += len;
	} else {
	    *buf = '\0';
	    len = 0;
	}
	if (ids[i] != -1) {
	    if (ids[i] < 0 || ids[i] >= total.article) {
		/*error("error in id array.\n");*/
		return 0;
	    }
	    article = article_array[ids[i]];
	    for (;;) {
		if (string_ptr >= string_end) {
		    /*error("error unpacking domain strings.\n");*/
		    return 0;
		}
		len2 = strlen(string_ptr);
		article->msgid = safemalloc(len2 + len + 2 + 1);
		sprintf(article->msgid, "<%s%s>", string_ptr, buf);
		string_ptr += len2 + 1;
		if (msgid_hash)
		{
		    HASHDATUM data;
		    if ((article->flags & AF_TMPMEM) == AF_TMPMEM) {
			data.dat_ptr = (char*)article;
			data.dat_len = 0;
		    } else {
			data.dat_ptr = Nullch;
			data.dat_len = article_num(article);
		    }
		    hashstore(msgid_hash, article->msgid, len2+len+2, data);
		}
		if (++i >= total.article + total.domain + !count) {
		    /*error("overran id array unpacking domains.\n");*/
		    return 0;
		}
		if (ids[i] != -1) {
		    if (ids[i] < 0 || ids[i] >= total.article)
			return 0;
		    article = article_array[ids[i]];
		} else
		    break;
	    }
	}
    }
    safefree((char**)&ids);
    safefree(&strings);

    return 1;
}

/* And finally, turn all the links into real pointers and mark missing
** articles as read.
*/
static void
tweak_data()
{
    register int count;
    register ARTICLE *ap, **art_ptr;
    register SUBJECT *sp;
    register ART_NUM i;
    union { ARTICLE *ap; int num; } uni;

    for (sp = first_subject; sp; sp = sp->next) {
	uni.ap = sp->thread;
	sp->thread = article_array[uni.num];
	sp->thread->subj->flags |= SF_THREAD;
    }
    art_ptr = article_array;
    for (count = total.article; count--; ) {
	ap = *art_ptr++;
	if (ap->child1) {
	    uni.ap = ap->child1;
	    ap->child1 = article_array[uni.num-1];
	}
	if (ap->sibling) {
	    uni.ap = ap->sibling;
	    ap->sibling = article_array[uni.num-1];
	}
	if (!(ap->flags & AF_MISSING))
	    cache_article(ap);
    }

    /* Mark any missing articles as read */
    for (i = absfirst, ap = article_ptr(i); i <= total.last; i++, ap++) {
	if ((ap->flags & (AF_CACHED|AF_MISSING)) == AF_CACHED)
	    check_poster(ap);
	else
	    onemissing(ap);
    }
    safefree((char**)&article_array);
}

/* A shorthand for reading a chunk of the file into a malloc'ed array.
*/
static int
read_item(dest, len)
char **dest;
MEM_SIZE len;
{
    long ret;

    *dest = safemalloc(len);
#ifdef USE_XTHREAD
    ret = nntp_read(*dest, (long)len);
#else
    ret = fread(*dest, 1, (int)len, fp);
#endif
    if (ret != len) {
	free(*dest);
	*dest = Nullch;
	return 0;
    }
    putchar('.'), fflush(stdout);
    return 1;
}

/* Free some memory if it hasn't already been freed.
*/
static void
safefree(pp)
char **pp;
{
    if (*pp) {
	free(*pp);
	*pp = Nullch;
    }
}

/* Determine this machine's byte map for WORDs and LONGs.  A byte map is an
** array of BYTEs (sizeof (WORD) or sizeof (LONG) of them) with the 0th BYTE
** being the byte number of the high-order byte in my <type>, and so forth.
*/
static void
mybytemap(map)
BMAP *map;
{
    union {
	BYTE b[sizeof (LONG)];
	WORD w;
	LONG l;
    } u;
    register BYTE *mp;
    register int i, j;

    mp = &map->w[sizeof (WORD)];
    u.w = 1;
    for (i = sizeof (WORD); i > 0; i--) {
	for (j = 0; j < sizeof (WORD); j++) {
	    if (u.b[j] != 0)
		break;
	}
	if (j == sizeof (WORD))
	    goto bad_news;
	*--mp = j;
	while (u.b[j] != 0 && u.w)
	    u.w <<= 1;
    }

    mp = &map->l[sizeof (LONG)];
    u.l = 1;
    for (i = sizeof (LONG); i > 0; i--) {
	for (j = 0; j < sizeof (LONG); j++) {
	    if (u.b[j] != 0)
		break;
	}
	if (j == sizeof (LONG)) {
	  bad_news:
	    /* trouble -- set both to *something* consistent */
	    for (j = 0; j < sizeof (WORD); j++)
		map->w[j] = j;
	    for (j = 0; j < sizeof (LONG); j++)
		map->l[j] = j;
	    return;
	}
	*--mp = j;
	while (u.b[j] != 0 && u.l)
	    u.l <<= 1;
    }
}

/* Transform each WORD's byte-ordering in a buffer of the designated length.
*/
static void
wp_bmap(buf, len)
WORD *buf;
int len;
{
    union {
	BYTE b[sizeof (WORD)];
	WORD w;
    } in, out;
    register int i;

    if (word_same)
	return;

    while (len--) {
	in.w = *buf;
	for (i = 0; i < sizeof (WORD); i++)
	    out.b[my_bmap.w[i]] = in.b[mt_bmap.w[i]];
	*buf++ = out.w;
    }
}

/* Transform each LONG's byte-ordering in a buffer of the designated length.
*/
static void
lp_bmap(buf, len)
LONG *buf;
int len;
{
    union {
	BYTE b[sizeof (LONG)];
	LONG l;
    } in, out;
    register int i;

    if (long_same)
	return;

    while (len--) {
	in.l = *buf;
	for (i = 0; i < sizeof (LONG); i++)
	    out.b[my_bmap.l[i]] = in.b[mt_bmap.l[i]];
	*buf++ = out.l;
    }
}

#endif /* USE_MT */