/* * egrep -- print lines containing (or not containing) a regular expression * * status returns: * 0 - ok, and some matches * 1 - ok, but no matches * 2 - some error */ %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST %left OR %left CHAR DOT CCL NCCL '(' %left CAT %left STAR PLUS QUEST %{ #define MAXLIN 350 #define MAXPOS 4000 #define NCHARS 128 #define NSTATES 128 #define FINAL -1 char gotofn[NSTATES][NCHARS]; int state[NSTATES]; char out[NSTATES]; int line 1; int name[MAXLIN]; int left[MAXLIN]; int right[MAXLIN]; int parent[MAXLIN]; int foll[MAXLIN]; int positions[MAXPOS]; char chars[MAXLIN]; int nxtpos; int nxtchar 0; int tmpstat[MAXLIN]; int initstat[MAXLIN]; int xstate; int count; int icount; int numstat; char *input; char buf[1024]; long lnum; int aflag; int bflag; int nflag; int cflag; int vflag; int fflag; int nfile; int blkno; long tln; int nsucc; int f; int fname; int xargc; char **xargv; %} %% s: t ={ unary(FINAL, $1); line--; } ; t: b r ={ $$ = node(CAT, $1, $2); } | OR b r OR ={ $$ = node(CAT, $2, $3); } | OR b r ={ $$ = node(CAT, $2, $3); } | b r OR ={ $$ = node(CAT, $1, $2); } ; b: ={ $$ = enter(DOT); $$ = unary(STAR, $$); } ; r: CHAR ={ $$ = enter($1); } | DOT ={ $$ = enter(DOT); } | CCL ={ $$ = cclenter(CCL); } | NCCL ={ $$ = cclenter(NCCL); } ; r: r OR r ={ $$ = node(OR, $1, $3); } | r r %prec CAT ={ $$ = node(CAT, $1, $2); } | r STAR ={ $$ = unary(STAR, $1); } | r PLUS ={ $$ = unary(PLUS, $1); } | r QUEST ={ $$ = unary(QUEST, $1); } | '(' r ')' ={ $$ = $2; } | error ; %% yyerror(s) { printf2("egrep: %s\n", s); } yylex() { extern int yylval; int cclcnt, x; register c; switch(c = nextch()) { case '$': case '^': c = '\n'; goto defchar; case '|': return (OR); case '*': return (STAR); case '+': return (PLUS); case '?': return (QUEST); case '(': return (c); case ')': return (c); case '.': return (DOT); case '\0': return (0); case '\n': return (OR); case '[': x = CCL; cclcnt = 0; count = nxtchar++; if ((c = nextch()) == '^') { x = NCCL; c = nextch(); } do { if (c == '\0') synerror(); if (nxtchar > MAXLIN) overflo(); chars[nxtchar++] = c; cclcnt++; } while ((c = nextch()) != ']'); chars[count] = cclcnt; return (x); case '\\': if ((c = nextch()) == '\0') synerror(); defchar: default: yylval = c; return (CHAR); } } nextch() { register char c; register cread; if ((c = *input++) == '\0') { if (buf[512] == 0) { if ((cread = read(f, buf, 512)) <= 0) { if (fname) close(f); return (0); } else { input = buf; buf[cread] = 0; if ((c = *input++) == '\0') synerror(); return (c); } } else return (0); } else return (c); } synerror() { printf2("egrep: syntax error\n"); } enter(x) int x; { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = 0; right[line] = 0; return(line++); } cclenter(x) int x; { register linno; linno = enter(x); right[linno] = count; return (linno); } node(x, l, r) { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = l; right[line] = r; parent[l] = line; parent[r] = line; return(line++); } unary(x, d) { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = d; right[line] = 0; parent[d] = line; return(line++); } overflo() { extern fout; flush(); fout = 2; printf("regular expression too long\n"); printf("try fgrep if pattern only keywords\n"); flush(); exit(2); } cfoll(v) { register i; if (left[v] == 0) { count = 0; for (i=1; i<=line; i++) tmpstat[i] = 0; follow(v); add(foll, v); } else if (right[v] == 0) cfoll(left[v]); else { cfoll(left[v]); cfoll(right[v]); } } cgotofn() { register c, i, k; int n, s; char symbol[NCHARS]; int j, nc, pc, pos; int curpos, num; int number, newpos; count = 0; for (n=3; n<=line; n++) tmpstat[n] = 0; if (cstate(line-1)==0) { tmpstat[line] = 1; count++; out[0] = 1; } for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; count--; /*leave out position 1 */ icount = count; tmpstat[1] = 0; add(state, 0); n = 0; for (s=0; s<=n; s++) { if (out[s] == 1) continue; for (i=0; i<NCHARS; i++) symbol[i] = 0; num = positions[state[s]]; count = icount; for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; pos = state[s] + 1; for (i=0; i<num; i++) { curpos = positions[pos]; if ((c = name[curpos]) >= 0) { if (c < NCHARS) symbol[c] = 1; else if (c == DOT) { for (k=0; k<NCHARS; k++) symbol[k] = 1; symbol['\n'] = 0; /* leave out newline */ } else if (c == CCL) { nc = chars[right[curpos]]; pc = right[curpos] + 1; for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; } else if (c == NCCL) { nc = chars[right[curpos]]; for (j = 0; j < NCHARS; j++) { pc = right[curpos] + 1; for (k = 0; k < nc; k++) if (j==chars[pc++]) goto cont; symbol[j] = 1; cont:; } symbol['\n'] = 0; } else printf("something's funny\n"); } pos++; } for (c=0; c<NCHARS; c++) { if (symbol[c] == 1) { /* nextstate(s,c) */ count = icount; for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; pos = state[s] + 1; for (i=0; i<num; i++) { curpos = positions[pos]; if ((k = name[curpos]) >= 0) if ( (k == c) | (k == DOT) | (k == CCL && member(c, right[curpos], 1)) | (k == NCCL && member(c, right[curpos], 0)) ) { number = positions[foll[curpos]]; newpos = foll[curpos] + 1; for (k=0; k<number; k++) { if (tmpstat[positions[newpos]] != 1) { tmpstat[positions[newpos]] = 1; count++; } newpos++; } } pos++; } /* end nextstate */ if (notin(n)) { if (n >= NSTATES) overflo(); add(state, ++n); if (tmpstat[line] == 1) out[n] = 1; gotofn[s][c] = n; } else { gotofn[s][c] = xstate; } } } } numstat = n+1; } cstate(v) { register b; if (left[v] == 0) { if (tmpstat[v] != 1) { tmpstat[v] = 1; count++; } return(1); } else if (right[v] == 0) { if (cstate(left[v]) == 0) return (0); else if (name[v] == PLUS) return (1); else return (0); } else if (name[v] == CAT) { if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); else return (1); } else { /* name[v] == OR */ b = cstate(right[v]); if (cstate(left[v]) == 0 || b == 0) return (0); else return (1); } } member(symb, set, torf) { register i, num, pos; num = chars[set]; pos = set + 1; for (i=0; i<num; i++) if (symb == chars[pos++]) return (torf); return (!torf); } notin(n) { register i, j, pos; for (i=0; i<=n; i++) { if (positions[state[i]] == count) { pos = state[i] + 1; for (j=0; j < count; j++) if (tmpstat[positions[pos++]] != 1) goto nxt; xstate = i; return (0); } nxt: ; } return (1); } add(array, n) int *array; { register i; if (nxtpos + count > MAXPOS) overflo(); array[n] = nxtpos; positions[nxtpos++] = count; for (i=3; i <= line; i++) { if (tmpstat[i] == 1) { positions[nxtpos++] = i; } } } follow(v) int v; { int p; if (v == line) return; p = parent[v]; switch(name[p]) { case STAR: case PLUS: cstate(v); follow(p); return; case OR: case QUEST: follow(p); return; case CAT: if (v == left[p]) { if (cstate(right[p]) == 0) { follow(p); return; } } else follow(p); return; case FINAL: if (tmpstat[line] != 1) { tmpstat[line] = 1; count++; } return; } } yyinit(argc, argv) char **argv; { register cread; xargc = argc; xargv = argv; while (--xargc > 0 && (++xargv)[0][0]=='-') switch (xargv[0][1]) { case 'a': aflag++; continue; case 'v': vflag++; continue; case 'b': bflag++; continue; case 'c': cflag++; continue; case 'n': nflag++; continue; case 'f': fflag++; continue; default: printf2("Unknown flag\n"); continue; } if (xargc<=0) exit(2); if (fflag) { input = buf; buf[512] = 0; fname = *xargv; xargc--; xargv++; if (fname) { if ((f = open(fname,0)) < 0) printf2("Can't open %s\n", fname); } else f = 0; if ((cread = read(f, buf, 512)) <= 0) { close(f); printf2("File %s empty\n", fname); } else buf[cread] = 0; } else { buf[512] = 1; input = *xargv; xargc--; xargv++; } } yyaccpt() { cfoll(line-1); cgotofn(); if (aflag) { printf("no. of states = %d\n", numstat); printf("size of tree = %d\n", line); printf("positions = %d\n\n", nxtpos); } nfile = xargc; if (xargc<=0) execute(0); else while (--xargc >= 0) { execute(*xargv); xargv++; } flush(); exit(nsucc == 0); } execute(file) char *file; { register char *p; register cstat; register ccount; char *nlp; int istat; if (file) { if ((f = open(file, 0)) < 0) { printf2("Can't open %s\n", file); } } else f = 0; ccount = 0; lnum = 1; tln = 0; blkno = 0; p = buf; nlp = p; if ((ccount = read(f,p,512))<=0) goto done; istat = cstat = gotofn[0]['\n']; if (out[cstat]) goto found; for (;;) { if (*p <= 0) { p++; count--; goto brk2; } cstat = gotofn[cstat][*p]; if (out[cstat]) { found: for(;;) { if (*p++ == '\n') { if (vflag == 0) { succeed: nsucc = 1; if (cflag) tln++; else { if (nfile > 1) printf("%s:", file); if (bflag) printf("%d:", blkno); if (nflag) printf("%ld:", lnum); if (p > nlp) write(1, nlp, p-nlp); else { write(1, nlp, &buf[1024] - nlp); write(1, buf, p-&buf[0]); } } } lnum++; nlp = p; if ((out[(cstat=istat)]) == 0) goto brk2; } cfound: if (--ccount <= 0) { if (p <= &buf[512]) { if ((ccount = read(f, p, 512)) <= 0) goto done; } else if (p == &buf[1024]) { p = buf; if ((ccount = read(f, p, 512)) <= 0) goto done; } else { if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; } blkno++; } } } if (*p++ == '\n') { if (vflag) goto succeed; else { lnum++; nlp = p; if (out[(cstat=istat)]) goto cfound;; } } brk2: if (--ccount <= 0) { if (p <= &buf[512]) { if ((ccount = read(f, p, 512)) <= 0) break; } else if (p == &buf[1024]) { p = buf; if ((ccount = read(f, p, 512)) <= 0) break; } else { if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; } blkno++; } } done: close(f); if (cflag) { if (nfile > 1) printf("%s:", file); printf("%ld\n", tln); } } printf2(s, a) { extern fout; flush(); fout = 2; printf(s, a); flush(); exit(2); }