#include "code.h" #ifndef _POSIX_SOURCE #include <stdio.h> #include <ctype.h> #include <libc.h> #else #include <fcntl.h> #include <stdio.h> #include <ctype.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #endif #define isvowel(c) voweltab[c] #define Tolower(c) (isupper(c)? (c)-'A'+'a': (c)) #define pair(a,b) (((a)<<8) | (b)) #define DLEV 2 #define DSIZ 40 typedef long Bits; typedef unsigned char uchar; #define Set(h, f) ((long)(h) & (f)) Bits nop(char*, char*, char*, int, int); Bits strip(char*, char*, char*, int, int); Bits ize(char*, char*, char*, int, int); Bits i_to_y(char*, char*, char*, int, int); Bits ily(char*, char*, char*, int, int); Bits subst(char*, char*, char*, int, int); Bits CCe(char*, char*, char*, int, int); Bits tion(char*, char*, char*, int, int); Bits an(char*, char*, char*, int, int); Bits s(char*, char*, char*, int, int); Bits es(char*, char*, char*, int, int); Bits bility(char*, char*, char*, int, int); Bits y_to_e(char*, char*, char*, int, int); Bits VCe(char*, char*, char*, int, int); Bits trypref(char*, char*, int, int); Bits tryword(char*, char*, int, int); Bits trysuff(char*, int, int); Bits dict(char*, char*); void typeprint(Bits); void pcomma(char*); void runout(char*); void ise(void); int ordinal(void); char* skipv(char*); int inun(char*, Bits); char* ztos(char*); void readdict(char*); char *strdupl(char *); typedef struct Ptab Ptab; struct Ptab { char* s; int flag; }; typedef struct Suftab Suftab; struct Suftab { char *suf; Bits (*p1)(char*, char*, char*, int, int); int n1; char *d1; char *a1; int flag; int affixable; Bits (*p2)(char*, char*, char*, int, int); int n2; char *d2; char *a2; }; Suftab staba[] = { {"aibohp",subst,1,"-e+ia","",NOUN, NOUN}, 0 }; Suftab stabc[] = { {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN}, {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN}, {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ }, {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN }, {"cipocs",ize,1,"-e+ic","",NOUN, ADJ }, {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ }, {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ }, {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ }, {"cibohp",subst,1,"-e+ic","",NOUN, ADJ }, 0 }; Suftab stabd[] = { {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"}, {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN}, 0 }; Suftab stabe[] = { /* * V_affix for comment ->commence->commentment?? */ {"ecn",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX}, {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ}, {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ}, {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ}, {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP}, {"ekil",strip,4,"","+like",N_AFFIX ,ADJ}, 0 }; Suftab stabg[] = { {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN}, {"gnikam",strip,6,"","+making",NOUN,NOUN}, {"gnipeek",strip,7,"","+keeping",NOUN,NOUN}, {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN}, 0 }; Suftab stabl[] = { {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ}, {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX}, {"latnem",strip,2,"","+al",N_AFFIX,ADJ}, {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN}, {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN}, 0 }; Suftab stabm[] = { /* congregational + ism */ {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN}, {"margo",subst,-1,"-ph+m","",NOUN,NOUN}, 0 }; Suftab stabn[] = { {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX}, {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX}, {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR}, {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX}, {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB}, {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX}, {"nemow",strip,5,"","+women",MAN,PROP_COLLECT}, {"nem",strip,3,"","+man",MAN,PROP_COLLECT}, 0 }; Suftab stabp[] = { {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, 0 }; Suftab stabr[] = { {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"}, {"reyhparg",nop,0,"","",0,NOUN}, {"rekam",strip,5,"","+maker",NOUN,NOUN}, {"repeek",strip,6,"","+keeper",NOUN,NOUN}, {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"}, {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y}, {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX}, {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX}, 0 }; Suftab stabs[] = { {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX}, {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ }, {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"}, {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH }, {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH }, 0 }; Suftab stabt[] = { {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB}, {"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" }, {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX}, {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP}, 0 }; Suftab staby[] = { {"ytilb",nop,0,"","",0,NOUN}, {"ycn",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX}, {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX}, {"ytisuo",nop,0,"","",NOUN}, {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX }, {"ylb",y_to_e,1,"-e+y","",ADJ,ADV}, {"ylc",nop,0,"","",0}, {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP}, {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX}, {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP}, 0 }; Suftab stabz[] = { 0 }; Suftab* suftab[] = { staba, stabz, stabc, stabd, stabe, stabz, stabg, stabz, stabz, stabz, stabz, stabl, stabm, stabn, stabz, stabp, stabz, stabr, stabs, stabt, stabz, stabz, stabz, stabz, staby, stabz, }; Ptab ptaba[] = { "anti", 0, "auto", 0, 0 }; Ptab ptabb[] = { "bio", 0, 0 }; Ptab ptabc[] = { "counter", 0, 0 }; Ptab ptabd[] = { "dis", 0, 0 }; Ptab ptabe[] = { "electro", 0, 0 }; Ptab ptabf[] = { "femto", 0, 0 }; Ptab ptabg[] = { "geo", 0, "giga", 0, 0 }; Ptab ptabh[] = { "hyper", 0, 0 }; Ptab ptabi[] = { "immuno", 0, "im", IN, "intra", 0, "inter", 0, "in", IN, "ir", IN, "iso", 0, 0 }; Ptab ptabj[] = { 0 }; Ptab ptabk[] = { "kilo", 0, 0 }; Ptab ptabl[] = { 0 }; Ptab ptabm[] = { "magneto", 0, "mega", 0, "meta", 0, "micro", 0, "mid", 0, "milli", 0, "mini", 0, "mis", 0, "mono", 0, "multi", 0, 0 }; Ptab ptabn[] = { "nano", 0, "neuro", 0, "non", 0, 0 }; Ptab ptabo[] = { "out", 0, "over", 0, 0 }; Ptab ptabp[] = { "para", 0, "photo", 0, "pico", 0, "poly", 0, "pre", 0, "pseudo", 0, "psycho", 0, 0 }; Ptab ptabq[] = { "quasi", 0, 0 }; Ptab ptabr[] = { "radio", 0, "re", 0, 0 }; Ptab ptabs[] = { "semi", 0, "stereo", 0, "sub", 0, "super", 0, 0 }; Ptab ptabt[] = { "tele", 0, "thermo", 0, 0 }; Ptab ptabu[] = { "ultra", 0, "under", 0, /*must precede un*/ "un", IN, 0 }; Ptab ptabv[] = { 0 }; Ptab ptabw[] = { 0 }; Ptab ptabx[] = { 0 }; Ptab ptaby[] = { 0 }; Ptab ptabz[] = { 0 }; Ptab* preftab[] = { ptaba, ptabb, ptabc, ptabd, ptabe, ptabf, ptabg, ptabh, ptabi, ptabj, ptabk, ptabl, ptabm, ptabn, ptabo, ptabp, ptabq, ptabr, ptabs, ptabt, ptabu, ptabv, ptabw, ptabx, ptaby, ptabz, }; typedef struct { char *mesg; enum { NONE, SUFF, PREF} type; } Deriv; int cflag; int fflag; int vflag; int xflag; char word[500]; char original[500]; Deriv deriv[DSIZ+3]; Deriv emptyderiv; char affix[DSIZ*10]; /* 10 is longest affix message */ int prefcount; int suffcount; char space[300000]; /* must be as large as "words"+"space" in pcode run */ Bits encode[2048]; /* must be as long as "codes" in pcode run */ int nencode; char voweltab[128]; char* spacep[128*128+1]; /* pointer to words starting with 'xx' */ char* codefile = "/usr/lib/spell/amspell"; char* brfile = "/usr/lib/spell/brspell"; main(int argc, char *argv[]) { char *ep, *cp; char *dp; int j, i, c; int low = 0; Bits h; for(i=0; c = "aeiouyAEIOUY"[i]; i++) voweltab[c] = 1; while(argc > 1) { if(argv[1][0] != '-') break; for(i=1; c = argv[1][i]; i++) switch(c) { default: fprintf(stderr, "usage: spell [-bcvx] [-f file]\n"); exit(1); case 'b': ise(); if(!fflag) codefile = brfile; continue; case 'C': vflag++; case 'c': setbuf(stdout,0); cflag++; continue; case 'v': vflag++; continue; case 'x': xflag++; continue; case 'f': if(argc <= 2) { fprintf(stderr, "spell: -f requires another argument\n"); exit(1); } argv++; argc--; fflag++; codefile = argv[1]; goto brk; } brk: argv++; argc--; } readdict(codefile); if(argc > 1) { fprintf(stderr, "usage: spell [-bcvx] [-f file]\n"); exit(1); } for(;;) { loop: affix[0] = 0; for(ep=original; ; ep++) { if(ep >= original + sizeof(original) - 1) { *ep = 0; runout(original); goto loop; } j = getchar(); if(j == EOF) exit(0); if(j != '\n') *ep = j; else { *ep = 0; break; } } low = 0; for(ep=word,dp=original; j = *dp; ep++,dp++) { if(islower(j)) low++; if(ep >= word+sizeof(word)-1) break; *ep = j; } *ep = 0; h = ~STOP; if(word[1] == 0 && isalnum(word[0]) || isdigit(word[0]) && ordinal()) goto check; h = 0; if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))) for(cp=original+1,dp=word+1; dp<ep; dp++,cp++) *dp = Tolower(*cp); if(!h) for(;;) { /* at most twice */ if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)) break; if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH)) break; if(!isupper(word[0])) break; cp = original; dp = word; while(*dp = *cp++) { if(!low) *dp = Tolower(*dp); dp++; } word[0] = Tolower(word[0]); } check: if(cflag) { if(!h || Set(h,STOP)) putchar('-'); else if(!vflag) putchar('+'); else putchar('0' + (suffcount>0) + (prefcount>4? 8: 2*prefcount)); } else if(!h || Set(h,STOP)) printf("%s\n", original); else if(affix[0] != 0 && affix[0] != '.') printf("%s\t%s\n", affix, original); } } /* strip exactly one suffix and do * indicated routine(s), which may recursively * strip suffixes */ Bits trysuff(char* ep, int lev, int flag) { Suftab *t; char *cp, *sp; Bits h = 0; int initchar = ep[-1]; flag &= ~MONO; lev += DLEV; if(lev<DSIZ) deriv[lev] = deriv[lev-1] = emptyderiv; if(!islower(initchar)) return h; for(t=suftab[initchar-'a']; sp=t->suf; t++) { cp = ep; while(*sp) if(*--cp != *sp++) goto next; for(sp=ep-t->n1; --sp >= word && !isvowel(*sp);) ; if(sp < word) continue; if(!(t->affixable & flag)) return 0; h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP); if(!h && t->p2!=0) { if(lev<DSIZ) deriv[lev] = deriv[lev+1] = emptyderiv; h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP); } break; next:; } return h; } Bits nop(char* ep, char* d, char* a, int lev, int flag) { #pragma ref ep #pragma ref d #pragma ref a #pragma ref lev #pragma ref flag return 0; } Bits cstrip(char* ep, char* d, char* a, int lev, int flag) { int temp = ep[0]; if(isvowel(temp) && isvowel(ep[-1])) { switch(pair(ep[-1],ep[0])) { case pair('a', 'a'): case pair('a', 'e'): case pair('a', 'i'): case pair('e', 'a'): case pair('e', 'e'): case pair('e', 'i'): case pair('i', 'i'): case pair('o', 'a'): return 0; } } else if(temp==ep[-1]&&temp==ep[-2]) return 0; return strip(ep,d,a,lev,flag); } Bits strip(char* ep, char* d, char* a, int lev, int flag) { #pragma ref d Bits h = trypref(ep, a, lev, flag); if(Set(h,MONO) && isvowel(*ep) && isvowel(ep[-2])) h = 0; if(h) return h; if(isvowel(*ep) && !isvowel(ep[-1]) && ep[-1]==ep[-2]) { h = trypref(ep-1,a,lev,flag|MONO); if(h) return h; } return trysuff(ep,lev,flag); } Bits s(char* ep, char* d, char* a, int lev, int flag) { if(lev > DLEV+1) return 0; if(*ep=='s') { switch(ep[-1]) { case 'y': if(isvowel(ep[-2])||isupper(*word)) break; /*says Kennedys*/ case 'x': case 'z': case 's': return 0; case 'h': switch(ep[-2]) { case 'c': case 's': return 0; } } } return strip(ep,d,a,lev,flag); } Bits an(char* ep, char* d, char* a, int lev, int flag) { #pragma ref d if(!isupper(*word)) /*must be proper name*/ return 0; return trypref(ep,a,lev,flag); } Bits ize(char* ep, char* d, char* a, int lev, int flag) { #pragma ref a int temp = ep[-1]; Bits h; ep[-1] = 'e'; h = strip(ep,"",d,lev,flag); ep[-1] = temp; return h; } Bits y_to_e(char* ep, char* d, char* a, int lev, int flag) { #pragma ref a Bits h; int temp; switch(ep[-1]) { case 'a': case 'e': case 'i': return 0; } temp = *ep; *ep++ = 'e'; h = strip(ep,"",d,lev,flag); *--ep = temp; return h; } Bits ily(char* ep, char* d, char* a, int lev, int flag) { int temp = ep[0]; char *cp = ep; if(temp==ep[-1]&&temp==ep[-2]) /* sillly */ return 0; if(*--cp=='y' && !isvowel(*--cp)) /* happyly */ while(cp>word) if(isvowel(*--cp)) /* shyness */ return 0; if(ep[-1]=='i') return i_to_y(ep,d,a,lev,flag); return cstrip(ep,d,a,lev,flag); } Bits bility(char* ep, char* d, char* a, int lev, int flag) { *ep++ = 'l'; return y_to_e(ep,d,a,lev,flag); } Bits i_to_y(char* ep, char* d, char* a, int lev, int flag) { Bits h; int temp; if(isupper(*word)) return 0; if((temp=ep[-1])=='i' && !isvowel(ep[-2])) { ep[-1] = 'y'; a = d; } h = cstrip(ep,"",a,lev,flag); ep[-1] = temp; return h; } Bits es(char* ep, char* d, char* a, int lev, int flag) { if(lev>DLEV) return 0; switch(ep[-1]) { default: return 0; case 'i': return i_to_y(ep,d,a,lev,flag); case 'h': switch(ep[-2]) { default: return 0; case 'c': case 's': break; } case 's': case 'z': case 'x': return strip(ep,d,a,lev,flag); } } Bits subst(char* ep, char* d, char* a, int lev, int flag) { #pragma ref a char *u,*t; Bits h; if(skipv(skipv(ep-1)) < word) return 0; for(t=d; *t!='+'; t++) continue; for(u=ep; *--t!='-';) *--u = *t; h = strip(ep,"",d,lev,flag); while(*++t != '+') continue; while(*++t) *u++ = *t; return h; } Bits tion(char* ep, char* d, char* a, int lev, int flag) { switch(ep[-2]) { default: return trypref(ep,a,lev,flag); case 'a': case 'e': case 'i': case 'o': case 'u': return y_to_e(ep,d,a,lev,flag); } } /* * possible consonant-consonant-e ending */ Bits CCe(char* ep, char* d, char* a, int lev, int flag) { Bits h; switch(ep[-1]) { case 'l': if(isvowel(ep[-2])) break; switch(ep[-2]) { case 'l': case 'r': case 'w': break; default: return y_to_e(ep,d,a,lev,flag); } break; case 'c': case 'g': if(*ep == 'a') /* prevent -able for -eable */ return 0; case 's': case 'v': case 'z': if(ep[-2]==ep[-1]) break; if(isvowel(ep[-2])) break; case 'u': if(h = y_to_e(ep,d,a,lev,flag)) return h; if(!(ep[-2]=='n' && ep[-1]=='g')) return 0; } return VCe(ep,d,a,lev,flag); } /* * possible consonant-vowel-consonant-e ending */ Bits VCe(char* ep, char* d, char* a, int lev, int flag) { int c; Bits h; c = ep[-1]; if(c=='e') return 0; if(!isvowel(c) && isvowel(ep[-2])) { c = *ep; *ep++ = 'e'; h = trypref(ep,d,lev,flag); if(!h) h = trysuff(ep,lev,flag); if(h) return h; ep--; *ep = c; } return cstrip(ep,d,a,lev,flag); } Ptab* lookuppref(char** wp, char* ep) { Ptab *sp; char *bp,*cp; int initchar = Tolower(**wp); if(!isalpha(initchar)) return 0; for(sp=preftab[initchar-'a'];sp->s;sp++) { bp = *wp; for(cp= sp->s;*cp; ) if(*bp++!=*cp++) goto next; for(cp=bp;cp<ep;cp++) if(isvowel(*cp)) { *wp = bp; return sp; } next:; } return 0; } /* while word is not in dictionary try stripping * prefixes. Fail if no more prefixes. */ Bits trypref(char* ep, char* a, int lev, int flag) { Ptab *tp; char *bp, *cp; char *pp; Bits h; char space[20]; if(lev<DSIZ) { deriv[lev].mesg = a; deriv[lev].type = *a=='.'? NONE: SUFF; } if(h = tryword(word,ep,lev,flag)) { if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO)) return h; h = 0; } bp = word; pp = space; if(lev<DSIZ) { deriv[lev+1].mesg = pp; deriv[lev+1].type = 0; } while(tp=lookuppref(&bp,ep)) { *pp++ = '+'; cp = tp->s; while(pp<space+sizeof(space) && (*pp = *cp++)) pp++; deriv[lev+1].type += PREF; h = tryword(bp,ep,lev+1,flag); if(Set(h,NOPREF) || ((tp->flag&IN) && inun(bp-2,h)==0)) { h = 0; break; } if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO)) break; h = 0; } if(lev<DSIZ) deriv[lev+1] = deriv[lev+2] = emptyderiv; return h; } Bits tryword(char* bp, char* ep, int lev, int flag) { int j; Bits h = 0; char duple[3]; if(ep-bp <= 1) return h; if(flag&MONO) { if(lev<DSIZ) { deriv[++lev].mesg = duple; deriv[lev].type = SUFF; } duple[0] = '+'; duple[1] = *ep; duple[2] = 0; } h = dict(bp, ep); if(vflag==0 || h==0) return h; /* * when derivations are wanted, collect them * for printing */ j = lev; prefcount = suffcount = 0; do { if(j<DSIZ && deriv[j].type) { strcat(affix, deriv[j].mesg); if(deriv[j].type == SUFF) suffcount++; else if(deriv[j].type != NONE) prefcount = deriv[j].type/PREF; } } while(--j > 0); return h; } int inun(char* bp, Bits h) { if(*bp == 'u') return Set(h, IN) == 0; /* *bp == 'i' */ if(Set(h, IN) == 0) return 0; switch(bp[2]) { case 'r': return bp[1] == 'r'; case 'm': case 'p': return bp[1] == 'm'; } return bp[1] == 'n'; } char* skipv(char *s) { if(s >= word && isvowel(*s)) s--; while(s >= word && !isvowel(*s)) s--; return s; } /* * crummy way to Britishise */ void ise(void) { Suftab *p; int i; for(i=0; i<26; i++) for(p = suftab[i]; p->suf; p++) { p->suf = ztos(p->suf); p->d1 = ztos(p->d1); p->a1 = ztos(p->a1); } } char* ztos(char *as) { char *s, *ds; for(s=as; *s; s++) if(*s == 'z') goto copy; return as; copy: ds = strdupl(as); for(s=ds; *s; s++) if(*s == 'z') *s = 's'; return ds; } Bits dict(char* bp, char* ep) { char *cp, *cp1, *w, *wp, *we; int n, f; w = bp; we = ep; n = ep-bp; if(n <= 1) return NOUN; f = w[0] & 0x7f; f *= 128; f += w[1] & 0x7f; bp = spacep[f]; ep = spacep[f+1]; loop: if(bp >= ep) { if(xflag) fprintf(stderr,"=%.*s\n",n, w); return 0; } /* * find the beginning of some word in the middle */ cp = bp + (ep-bp)/2; while(cp > bp && !(*cp & 0x80)) cp--; while(cp > bp && (cp[-1] & 0x80)) cp--; wp = w + 2; /* skip two letters */ cp1 = cp + 2; /* skip affix code */ for(;;) { if(wp >= we) { if(*cp1&0x80) goto found; else f = 1; break; } if(*cp1&0x80) { f = -1; break; } f = *cp1++ - *wp++; if(f != 0) break; } if(f < 0) { while(!(*cp1&0x80)) cp1++; bp = cp1; goto loop; } ep = cp; goto loop; found: f = ((cp[0] & 0x7) << 8) | (cp[1] & 0xff); if(xflag) { fprintf(stderr,"=%.*s ",n,w); typeprint(encode[f]); } return encode[f]; } void typeprint(Bits h) { pcomma(""); if(h & NOUN) pcomma("n"); if(h & PROP_COLLECT) pcomma("pc"); if(h & VERB) { if((h & VERB) == VERB) pcomma("v"); else if((h & VERB) == V_IRREG) pcomma("vi"); else if(h & ED) pcomma("ed"); } if(h & ADJ) pcomma("a"); if(h & COMP) { if((h & COMP) == ACTOR) pcomma("er"); else pcomma("comp"); } if(h & DONT_TOUCH) pcomma("d"); if(h & N_AFFIX) pcomma("na"); if(h & ADV) pcomma("adv"); if(h & ION) pcomma("ion"); if(h & V_AFFIX) pcomma("va"); if(h & MAN) pcomma("man"); if(h & NOPREF) pcomma("nopref"); if(h & MONO) pcomma("ms"); if(h & IN) pcomma("in"); if(h & _Y) pcomma("y"); if(h & STOP) pcomma("s"); fprintf(stderr, "\n"); } void pcomma(char *s) { static flag; if(*s == 0) { flag = 0; return; } if(!flag) { fprintf(stderr, "%s", s); flag = 1; } else fprintf(stderr, ",%s", s); } int ordinal(void) { char *cp = word; static char sp[4]; while(isdigit(*cp)) cp++; strncpy(sp,cp,3); if(isupper(cp[0]) && isupper(cp[1])) { sp[0] = Tolower(cp[0]); sp[1] = Tolower(cp[1]); } return 0 == strncmp(sp, cp[-2]=='1'? "th": /* out of bounds if 1 digit */ *--cp=='1'? "st": /* harmless */ *cp=='2'? "nd": *cp=='3'? "rd": "th", 3); } /* layout of file entry: first byte has bit 0x80 turned on. next 4 bits count number of characters common between this entry and previous one. last three bits concatenated with second byte are the affixing code, so arranged that the 0x80 bit is zero in all bytes but the first. 3rd and following bytes are the remainder of the dictionary word. layout in memory: common prefixes are expanded, and the first two letters of each word are deleted and found instead by lookup in table spacep, which points to the first word for each two-letter prefix. */ void readdict(char *file) { char *s, *is, *lasts, *ls; int c, i, sp, p; int f; long l; f = open(file, 0); if(f == -1) { fprintf(stderr, "spell: cannot open %s\n", file); exit(1); } if(read(f, space, 2) != 2) goto bad; nencode = ((space[0]&0xff)<<8) | (space[1]&0xff); if(nencode > sizeof(encode)/sizeof(*encode)) goto noroom; if(read(f, space, nencode*sizeof(*encode)) != nencode*sizeof(*encode)) goto bad; s = space; for(i=0; i<nencode; i++) { l = (long)(s[0] & 0xff) << 24; l |= (s[1] & 0xff) << 16; l |= (s[2] & 0xff) << 8; l |= s[3] & 0xff; encode[i] = (Bits)l; s += 4; } l = read(f, space, sizeof(space)); if(l >= sizeof(space)) goto noroom; is = space + (sizeof(space) - l); memmove(is, space, l); s = space; c = *is++ & 0xff; sp = -1; i = 0; lasts = 0; /* to prevent diagnostics */ loop: if(s > is) goto noroom; if(c < 0) { close(f); while(sp < 128*128) spacep[++sp] = s; *s++ = 0x80; /* fence */ return; } p = (c>>3) & 0xf; *s++ = c; *s++ = *is++ & 0xff; if(p <= 0) i = (*is++ & 0xff)*128; if(p <= 1) { if(!(*is & 0x80)) i = i/128*128 + (*is++ & 0xff); if(i <= sp) { fprintf(stderr, "spell: the dict isn't " "sorted or memmove didn't work\n"); goto bad; } while(sp < i) spacep[++sp] = s-2; } ls = lasts; lasts = s; for(p-=2; p>0; p--) *s++ = *ls++; for(;;) { if(is >= space+sizeof(space)) { c = -1; break; } c = *is++ & 0xff; if(c & 0x80) break; *s++ = c; } *s = 0; goto loop; bad: fprintf(stderr, "spell: trouble reading %s\n", file); exit(1); noroom: fprintf(stderr,"spell: not enough space for dictionary\n"); exit(1); } char * /* same as strdup; portability hack */ strdupl(char *s) { char *t = (char*)malloc(strlen(s)+1); strcpy(t, s); return t; } void runout(char *s) { int c; if(!cflag) printf("%s", s); else { putchar('-'); fflush(stdout); } do { if((c=getchar()) == EOF) exit(0); if(!cflag) putchar(c); } while(c != '\n'); }