# define CHAR 257 # define DOT 258 # define CCL 259 # define NCCL 260 # define OR 261 # define CAT 262 # define STAR 263 # define PLUS 264 # define QUEST 265 # line 16 "egrep.y" static char *sccsid = "@(#)egrep.y 4.2 (Berkeley) 11/8/82"; #include <stdio.h> #define MAXLIN 350 #define MAXPOS 4000 #define NCHARS 128 #define NSTATES 128 #define FINAL -1 char gotofn[NSTATES][NCHARS]; int state[NSTATES]; char out[NSTATES]; int line = 1; int name[MAXLIN]; int left[MAXLIN]; int right[MAXLIN]; int parent[MAXLIN]; int foll[MAXLIN]; int positions[MAXPOS]; char chars[MAXLIN]; int nxtpos; int nxtchar = 0; int tmpstat[MAXLIN]; int initstat[MAXLIN]; int xstate; int count; int icount; char *input; FILE *exprfile; long lnum; int bflag; int cflag; int fflag; int lflag; int nflag; int hflag = 1; int sflag; int vflag; int nfile; int blkno; long tln; int nsucc; int f; char *fname; #define yyclearin yychar = -1 #define yyerrok yyerrflag = 0 extern int yychar; extern short yyerrflag; #ifndef YYMAXDEPTH #define YYMAXDEPTH 150 #endif #ifndef YYSTYPE #define YYSTYPE int #endif YYSTYPE yylval, yyval; # define YYERRCODE 256 # line 107 "egrep.y" yyerror(s) { fprintf(stderr, "egrep: %s\n", s); exit(2); } yylex() { extern int yylval; int cclcnt, x; register char c, d; switch(c = nextch()) { case '$': case '^': c = '\n'; goto defchar; case '|': return (OR); case '*': return (STAR); case '+': return (PLUS); case '?': return (QUEST); case '(': return (c); case ')': return (c); case '.': return (DOT); case '\0': return (0); case '\n': return (OR); case '[': x = CCL; cclcnt = 0; count = nxtchar++; if ((c = nextch()) == '^') { x = NCCL; c = nextch(); } do { if (c == '\0') synerror(); if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { if ((d = nextch()) != 0) { c = chars[nxtchar-1]; while (c < d) { if (nxtchar >= MAXLIN) overflo(); chars[nxtchar++] = ++c; cclcnt++; } continue; } } if (nxtchar >= MAXLIN) overflo(); chars[nxtchar++] = c; cclcnt++; } while ((c = nextch()) != ']'); chars[count] = cclcnt; return (x); case '\\': if ((c = nextch()) == '\0') synerror(); defchar: default: yylval = c; return (CHAR); } } nextch() { register char c; if (fflag) { if ((c = getc(exprfile)) == EOF) { fclose(exprfile); return(0); } } else c = *input++; return(c); } synerror() { fprintf(stderr, "egrep: syntax error\n"); exit(2); } enter(x) int x; { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = 0; right[line] = 0; return(line++); } cclenter(x) int x; { register linno; linno = enter(x); right[linno] = count; return (linno); } node(x, l, r) { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = l; right[line] = r; parent[l] = line; parent[r] = line; return(line++); } unary(x, d) { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = d; right[line] = 0; parent[d] = line; return(line++); } overflo() { fprintf(stderr, "egrep: regular expression too long\n"); exit(2); } cfoll(v) { register i; if (left[v] == 0) { count = 0; for (i=1; i<=line; i++) tmpstat[i] = 0; follow(v); add(foll, v); } else if (right[v] == 0) cfoll(left[v]); else { cfoll(left[v]); cfoll(right[v]); } } cgotofn() { register c, i, k; int n, s; char symbol[NCHARS]; int j, nc, pc, pos; int curpos, num; int number, newpos; count = 0; for (n=3; n<=line; n++) tmpstat[n] = 0; if (cstate(line-1)==0) { tmpstat[line] = 1; count++; out[0] = 1; } for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; count--; /*leave out position 1 */ icount = count; tmpstat[1] = 0; add(state, 0); n = 0; for (s=0; s<=n; s++) { if (out[s] == 1) continue; for (i=0; i<NCHARS; i++) symbol[i] = 0; num = positions[state[s]]; count = icount; for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; pos = state[s] + 1; for (i=0; i<num; i++) { curpos = positions[pos]; if ((c = name[curpos]) >= 0) { if (c < NCHARS) symbol[c] = 1; else if (c == DOT) { for (k=0; k<NCHARS; k++) if (k!='\n') symbol[k] = 1; } else if (c == CCL) { nc = chars[right[curpos]]; pc = right[curpos] + 1; for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; } else if (c == NCCL) { nc = chars[right[curpos]]; for (j = 0; j < NCHARS; j++) { pc = right[curpos] + 1; for (k = 0; k < nc; k++) if (j==chars[pc++]) goto cont; if (j!='\n') symbol[j] = 1; cont:; } } else printf("something's funny\n"); } pos++; } for (c=0; c<NCHARS; c++) { if (symbol[c] == 1) { /* nextstate(s,c) */ count = icount; for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; pos = state[s] + 1; for (i=0; i<num; i++) { curpos = positions[pos]; if ((k = name[curpos]) >= 0) if ( (k == c) | (k == DOT) | (k == CCL && member(c, right[curpos], 1)) | (k == NCCL && member(c, right[curpos], 0)) ) { number = positions[foll[curpos]]; newpos = foll[curpos] + 1; for (k=0; k<number; k++) { if (tmpstat[positions[newpos]] != 1) { tmpstat[positions[newpos]] = 1; count++; } newpos++; } } pos++; } /* end nextstate */ if (notin(n)) { if (n >= NSTATES) overflo(); add(state, ++n); if (tmpstat[line] == 1) out[n] = 1; gotofn[s][c] = n; } else { gotofn[s][c] = xstate; } } } } } cstate(v) { register b; if (left[v] == 0) { if (tmpstat[v] != 1) { tmpstat[v] = 1; count++; } return(1); } else if (right[v] == 0) { if (cstate(left[v]) == 0) return (0); else if (name[v] == PLUS) return (1); else return (0); } else if (name[v] == CAT) { if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); else return (1); } else { /* name[v] == OR */ b = cstate(right[v]); if (cstate(left[v]) == 0 || b == 0) return (0); else return (1); } } member(symb, set, torf) { register i, num, pos; num = chars[set]; pos = set + 1; for (i=0; i<num; i++) if (symb == chars[pos++]) return (torf); return (!torf); } notin(n) { register i, j, pos; for (i=0; i<=n; i++) { if (positions[state[i]] == count) { pos = state[i] + 1; for (j=0; j < count; j++) if (tmpstat[positions[pos++]] != 1) goto nxt; xstate = i; return (0); } nxt: ; } return (1); } add(array, n) int *array; { register i; if (nxtpos + count > MAXPOS) overflo(); array[n] = nxtpos; positions[nxtpos++] = count; for (i=3; i <= line; i++) { if (tmpstat[i] == 1) { positions[nxtpos++] = i; } } } follow(v) int v; { int p; if (v == line) return; p = parent[v]; switch(name[p]) { case STAR: case PLUS: cstate(v); follow(p); return; case OR: case QUEST: follow(p); return; case CAT: if (v == left[p]) { if (cstate(right[p]) == 0) { follow(p); return; } } else follow(p); return; case FINAL: if (tmpstat[line] != 1) { tmpstat[line] = 1; count++; } return; } } main(argc, argv) char **argv; { while (--argc > 0 && (++argv)[0][0]=='-') switch (argv[0][1]) { case 's': sflag++; continue; case 'h': hflag = 0; continue; case 'b': bflag++; continue; case 'c': cflag++; continue; case 'e': argc--; argv++; goto out; case 'f': fflag++; continue; case 'l': lflag++; continue; case 'n': nflag++; continue; case 'v': vflag++; continue; default: fprintf(stderr, "egrep: unknown flag\n"); continue; } out: if (argc<=0) exit(2); if (fflag) { fname = *argv; exprfile = fopen(fname, "r"); if (exprfile == (FILE *)NULL) { fprintf(stderr, "egrep: can't open %s\n", fname); exit(2); } } else input = *argv; argc--; argv++; yyparse(); cfoll(line-1); cgotofn(); nfile = argc; if (argc<=0) { if (lflag) exit(1); execute(0); } else while (--argc >= 0) { execute(*argv); argv++; } exit(nsucc == 0); } execute(file) char *file; { register char *p; register cstat; register ccount; char buf[1024]; char *nlp; int istat; if (file) { if ((f = open(file, 0)) < 0) { fprintf(stderr, "egrep: can't open %s\n", file); exit(2); } } else f = 0; ccount = 0; lnum = 1; tln = 0; blkno = 0; p = buf; nlp = p; if ((ccount = read(f,p,512))<=0) goto done; istat = cstat = gotofn[0]['\n']; if (out[cstat]) goto found; for (;;) { cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ if (out[cstat]) { found: for(;;) { if (*p++ == '\n') { if (vflag == 0) { succeed: nsucc = 1; if (cflag) tln++; else if (sflag) ; /* ugh */ else if (lflag) { printf("%s\n", file); close(f); return; } else { if (nfile > 1 && hflag) printf("%s:", file); if (bflag) printf("%d:", blkno); if (nflag) printf("%ld:", lnum); if (p <= nlp) { while (nlp < &buf[1024]) putchar(*nlp++); nlp = buf; } while (nlp < p) putchar(*nlp++); } } lnum++; nlp = p; if ((out[(cstat=istat)]) == 0) goto brk2; } cfound: if (--ccount <= 0) { if (p <= &buf[512]) { if ((ccount = read(f, p, 512)) <= 0) goto done; } else if (p == &buf[1024]) { p = buf; if ((ccount = read(f, p, 512)) <= 0) goto done; } else { if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; } blkno++; } } } if (*p++ == '\n') { if (vflag) goto succeed; else { lnum++; nlp = p; if (out[(cstat=istat)]) goto cfound; } } brk2: if (--ccount <= 0) { if (p <= &buf[512]) { if ((ccount = read(f, p, 512)) <= 0) break; } else if (p == &buf[1024]) { p = buf; if ((ccount = read(f, p, 512)) <= 0) break; } else { if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; } blkno++; } } done: close(f); if (cflag) { if (nfile > 1) printf("%s:", file); printf("%ld\n", tln); } } short yyexca[] ={ -1, 1, 0, -1, -2, 0, }; # define YYNPROD 18 # define YYLAST 261 short yyact[]={ 10, 22, 4, 14, 11, 2, 1, 5, 0, 0, 10, 15, 16, 17, 18, 0, 19, 20, 3, 0, 10, 0, 0, 12, 0, 20, 0, 20, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 6, 7, 8, 9, 21, 0, 15, 16, 17, 11, 6, 7, 8, 9, 23, 0, 15, 16, 17, 11, 6, 7, 8, 9, 13, 0, 15, 16, 17, 11, 6, 7, 8, 9, 0, 0, 15, 16, 17, 11, 6, 7, 8, 9 }; short yypact[]={ -259,-1000,-1000, 0,-1000, -20,-1000,-1000,-1000,-1000, 0,-1000, 0, 0,-252,-1000,-1000,-1000, -40, -30, -10, 0,-1000, 0 }; short yypgo[]={ 0, 6, 5, 18, 3 }; short yyr1[]={ 0, 1, 2, 2, 2, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; short yyr2[]={ 0, 1, 2, 4, 3, 3, 0, 1, 1, 1, 1, 3, 2, 2, 2, 2, 3, 1 }; short yychk[]={ -1000, -1, -2, -3, 261, -4, 257, 258, 259, 260, 40, 256, -3, 261, -4, 263, 264, 265, -4, -4, -4, 261, 41, 261 }; short yydef[]={ 6, -2, 1, 0, 6, 2, 7, 8, 9, 10, 0, 17, 0, 5, 12, 13, 14, 15, 0, 4, 11, 0, 16, 3 }; # # define YYFLAG -1000 # define YYERROR goto yyerrlab # define YYACCEPT return(0) # define YYABORT return(1) /* parser for yacc output */ #ifdef YYDEBUG int yydebug = 0; /* 1 for debugging */ #endif YYSTYPE yyv[YYMAXDEPTH]; /* where the values are stored */ int yychar = -1; /* current input token number */ int yynerrs = 0; /* number of errors */ short yyerrflag = 0; /* error recovery flag */ yyparse() { short yys[YYMAXDEPTH]; short yyj, yym; register YYSTYPE *yypvt; register short yystate, *yyps, yyn; register YYSTYPE *yypv; register short *yyxi; yystate = 0; yychar = -1; yynerrs = 0; yyerrflag = 0; yyps= &yys[-1]; yypv= &yyv[-1]; yystack: /* put a state and value onto the stack */ #ifdef YYDEBUG if( yydebug ) printf( "state %d, char 0%o\n", yystate, yychar ); #endif if( ++yyps> &yys[YYMAXDEPTH] ) { yyerror( "yacc stack overflow" ); return(1); } *yyps = yystate; ++yypv; *yypv = yyval; yynewstate: yyn = yypact[yystate]; if( yyn<= YYFLAG ) goto yydefault; /* simple state */ if( yychar<0 ) if( (yychar=yylex())<0 ) yychar=0; if( (yyn += yychar)<0 || yyn >= YYLAST ) goto yydefault; if( yychk[ yyn=yyact[ yyn ] ] == yychar ){ /* valid shift */ yychar = -1; yyval = yylval; yystate = yyn; if( yyerrflag > 0 ) --yyerrflag; goto yystack; } yydefault: /* default state action */ if( (yyn=yydef[yystate]) == -2 ) { if( yychar<0 ) if( (yychar=yylex())<0 ) yychar = 0; /* look through exception table */ for( yyxi=yyexca; (*yyxi!= (-1)) || (yyxi[1]!=yystate) ; yyxi += 2 ) ; /* VOID */ while( *(yyxi+=2) >= 0 ){ if( *yyxi == yychar ) break; } if( (yyn = yyxi[1]) < 0 ) return(0); /* accept */ } if( yyn == 0 ){ /* error */ /* error ... attempt to resume parsing */ switch( yyerrflag ){ case 0: /* brand new error */ yyerror( "syntax error" ); yyerrlab: ++yynerrs; case 1: case 2: /* incompletely recovered error ... try again */ yyerrflag = 3; /* find a state where "error" is a legal shift action */ while ( yyps >= yys ) { yyn = yypact[*yyps] + YYERRCODE; if( yyn>= 0 && yyn < YYLAST && yychk[yyact[yyn]] == YYERRCODE ){ yystate = yyact[yyn]; /* simulate a shift of "error" */ goto yystack; } yyn = yypact[*yyps]; /* the current yyps has no shift onn "error", pop stack */ #ifdef YYDEBUG if( yydebug ) printf( "error recovery pops state %d, uncovers %d\n", *yyps, yyps[-1] ); #endif --yyps; --yypv; } /* there is no state on the stack with an error shift ... abort */ yyabort: return(1); case 3: /* no shift yet; clobber input char */ #ifdef YYDEBUG if( yydebug ) printf( "error recovery discards char %d\n", yychar ); #endif if( yychar == 0 ) goto yyabort; /* don't discard EOF, quit */ yychar = -1; goto yynewstate; /* try again in the same state */ } } /* reduction by production yyn */ #ifdef YYDEBUG if( yydebug ) printf("reduce %d\n",yyn); #endif yyps -= yyr2[yyn]; yypvt = yypv; yypv -= yyr2[yyn]; yyval = yypv[1]; yym=yyn; /* consult goto table to find next state */ yyn = yyr1[yyn]; yyj = yypgo[yyn] + *yyps + 1; if( yyj>=YYLAST || yychk[ yystate = yyact[yyj] ] != -yyn ) yystate = yyact[yypgo[yyn]]; switch(yym){ case 1: # line 65 "egrep.y" { unary(FINAL, yypvt[-0]); line--; } break; case 2: # line 70 "egrep.y" { yyval = node(CAT, yypvt[-1], yypvt[-0]); } break; case 3: # line 72 "egrep.y" { yyval = node(CAT, yypvt[-2], yypvt[-1]); } break; case 4: # line 74 "egrep.y" { yyval = node(CAT, yypvt[-1], yypvt[-0]); } break; case 5: # line 76 "egrep.y" { yyval = node(CAT, yypvt[-2], yypvt[-1]); } break; case 6: # line 79 "egrep.y" { yyval = enter(DOT); yyval = unary(STAR, yyval); } break; case 7: # line 83 "egrep.y" { yyval = enter(yypvt[-0]); } break; case 8: # line 85 "egrep.y" { yyval = enter(DOT); } break; case 9: # line 87 "egrep.y" { yyval = cclenter(CCL); } break; case 10: # line 89 "egrep.y" { yyval = cclenter(NCCL); } break; case 11: # line 93 "egrep.y" { yyval = node(OR, yypvt[-2], yypvt[-0]); } break; case 12: # line 95 "egrep.y" { yyval = node(CAT, yypvt[-1], yypvt[-0]); } break; case 13: # line 97 "egrep.y" { yyval = unary(STAR, yypvt[-1]); } break; case 14: # line 99 "egrep.y" { yyval = unary(PLUS, yypvt[-1]); } break; case 15: # line 101 "egrep.y" { yyval = unary(QUEST, yypvt[-1]); } break; case 16: # line 103 "egrep.y" { yyval = yypvt[-1]; } break; } goto yystack; /* stack new state and value */ }