4.3BSD-UWisc/src/usr.bin/refer/what4.c

Compare this file to the similar file:
Show the results in this format:

#ifndef lint
static char *sccsid = "@(#)what4.c	4.1 (Berkeley) 5/6/83";
#endif

#include "what..c"
#define NW 5
#define ZIPF 10
#define HASHF 3
#define WLEN 10
#define SAME 0
#define TSIZE HASHF*ZIPF*NW
#define NF 10

struct wst { 
	char *tx; 
	int ct; 
} 
;
int HSIZE;
static struct wst word[TSIZE];
static char tbuf[NW*ZIPF*WLEN], *tp tbuf;

freqwd ( fn, wd, nin )
char *fn[], *wd[];
{
	FILE *fi[NF];
	int nw 0, i, any, nf, j, wexch(), wcomp();
	char tw[20];
	for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--);
	for(nf=0; fn[nf] && nf<NF; nf++)
		fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL;
	do {
		any=0;
		for(i=0; i<nf; i++)
		{
			if (fi[i]==NULL) continue;
			if (gw(fi[i], tw)==0)
			{
				fclose(fi[i]);
				fi[i]==NULL;
				continue;
			}
			any=1;
			if (common(tw)) continue;
			if (strlen(tw)<3) continue;
			j = lookup (tw);
			if (j<0 && nw < ZIPF*NW)
			{
				j = -j;
				strcpy (tp, tw);
				word[j].tx = tp;
				while (*tp++);
				_assert (tp < tbuf+NW*ZIPF*WLEN);
				word[j].ct = 1;
				nw++;
			}
			else if (j>0)
				word[j].ct++;
		}
	} 
	while (any>0);
	shell ( TSIZE, wcomp, wexch );
	for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++)
		if (nw>=nin*2 && word[nw].ct != word[0].ct)
			break;
	for(i=0; i<nw; i++)
		wd[i] = word[i].tx;
	return(nw);
}

lookup (wt)
char *wt;
{
	int h;
	h = hash(wt);
	for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE)
	{
		if (h==0) continue;
		if (strcmp(wt, word[h].tx) == SAME)
			return (h);
	}
	return ( -h );
}

hash (s)
char *s;
{
	int k 0, c 0, i 0;
	while ( c = *s++ )
		k ^= (c << (i++%5) );
	return (k>0 ? k : -k);
}

gw (f, t)
char *t;
FILE *f;
{
	int start 1, oldc ' ', c;
	if (f==NULL) return (0);
	while ( (c=getc(f)) != EOF)
	{
		if (isupper(c)) c= tolower(c);
		if (start==1)
			if (!alphanum(c, oldc))
				continue;
			else
				start=0;
		if (start==0)
			if (alphanum(c, oldc))
				*t++ = c;
			else
			{
				*t=0;
				return(1);
			}
		oldc=c;
	}
	return(0);
}

alphanum( c, oldc )
{
	if (isalpha(c) || isdigit(c)) return(1);
	if (isalpha(oldc))
		if (c== '\'' || c == '-') return(1);
	return(0);
}

wcomp (n1, n2)
{
	return (word[n1].ct >= word[n2].ct);
}

wexch (n1, n2)
{
	struct wst tt;
	tt.tx = word[n1].tx; 
	tt.ct = word[n1].ct;
	word[n1].tx = word[n2].tx; 
	word[n1].ct = word[n2].ct;
	word[n2].tx = tt.tx; 
	word[n2].ct = tt.ct;
}

prime(n)
{
	/* only executed once- slow is ok */
	int i;
	if (n%2==0) return(0);
	for(i=3; i*i<=n; i+= 2)
		if (n%i ==0 ) return(0);
	return(1);
}

trimnl(s)
char *s;
{
	while (*s)s++;
	if (*--s=='\n') *s=0;
}

/* this is the test for what4.c as a standalone prog ... */
# ifdef 0
main (argc, argv)
char *argv[];
{
	char *ff[10], *wd[20], **ffp ff;
	int n, i;

	while (--argc)
		*ffp++ = *++argv;
	*ffp=0;
	n=freqwd(ff,wd);
	for(i=0; i<n; i++)
		printf("%s\n",wd[i]);
	printf("total of %d items\n",n);
}
# endif 0