%Start A str sc reg comment %{ #include "y.tab.h" #include "awk.h" #undef input /* defeat lex */ #undef unput extern int yylval; extern int infunc; int lineno = 1; int bracecnt = 0; int brackcnt = 0; int parencnt = 0; #ifdef DEBUG # define RET(x) {if (dbg) printf("lex %s\n", tokname(x)); return(x); } #else # define RET(x) return(x) #endif #define CADD cbuf[clen++]=yytext[0]; if (clen>=CBUFLEN-1) {yyerror("string/reg expr %.10s... too long", cbuf); BEGIN A;} #define CBUFLEN 400 char cbuf[CBUFLEN], *s; int clen, cflag; %} A [a-zA-Z_] B [a-zA-Z0-9_] D [0-9] WS [ \t] %% switch (yybgin-yysvec-1) { /* witchcraft */ case 0: BEGIN A; break; case sc: BEGIN A; RET('}'); } <A>^\n { lineno++; } <A>^{WS}*#.*\n { lineno++; } /* strip comment lines */ <A>{WS} { ; } <A>"\\"\n { lineno++; } <A>BEGIN { RET(XBEGIN); } <A>END { RET(XEND); } <A>func(tion)? { if (infunc) yyerror("illegal nested function"); RET(FUNC); } <A>return { if (!infunc) yyerror("return not in function"); RET(RETURN); } <A>"&&" { RET(AND); } <A>"||" { RET(BOR); } <A>"!" { RET(NOT); } <A>"!=" { yylval = NE; RET(NE); } <A>"~" { yylval = MATCH; RET(MATCHOP); } <A>"!~" { yylval = NOTMATCH; RET(MATCHOP); } <A>"<" { yylval = LT; RET(LT); } <A>"<=" { yylval = LE; RET(LE); } <A>"==" { yylval = EQ; RET(EQ); } <A>">=" { yylval = GE; RET(GE); } <A>">" { yylval = GT; RET(GT); } <A>">>" { yylval = APPEND; RET(APPEND); } <A>"++" { yylval = INCR; RET(INCR); } <A>"--" { yylval = DECR; RET(DECR); } <A>"+=" { yylval = ADDEQ; RET(ASGNOP); } <A>"-=" { yylval = SUBEQ; RET(ASGNOP); } <A>"*=" { yylval = MULTEQ; RET(ASGNOP); } <A>"/=" { yylval = DIVEQ; RET(ASGNOP); } <A>"%=" { yylval = MODEQ; RET(ASGNOP); } <A>"^=" { yylval = POWEQ; RET(ASGNOP); } <A>"**=" { yylval = POWEQ; RET(ASGNOP); } <A>"=" { yylval = ASSIGN; RET(ASGNOP); } <A>"**" { RET(POWER); } <A>"^" { RET(POWER); } <A>"$0" { yylval = (int) lookup("$0", symtab); RET(FIELD); } <A>"$"{D}+ { yylval = (int) fieldadr(atoi(yytext+1)); RET(FIELD); } <A>"$"{WS}* { RET(INDIRECT); } <A>NF { yylval = (int)setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); } <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { yylval = (int)setsymtab(yytext, "", atof(yytext), CON|NUM, symtab); RET(NUMBER); } <A>"}"{WS}*\n { if (--bracecnt < 0) yyerror("extra }"); BEGIN sc; lineno++; RET(';'); } <A>"}" { if (--bracecnt < 0) yyerror("extra }"); BEGIN sc; RET(';'); } <A>;\n { lineno++; RET(';'); } <A>\n { lineno++; RET(NL); } <A>while { RET(WHILE); } <A>for { RET(FOR); } <A>if { RET(IF); } <A>else { RET(ELSE); } <A>next { RET(NEXT); } <A>exit { RET(EXIT); } <A>break { RET(BREAK); } <A>continue { RET(CONTINUE); } <A>print { yylval = PRINT; RET(PRINT); } <A>printf { yylval = PRINTF; RET(PRINTF); } <A>sprintf { yylval = SPRINTF; RET(SPRINTF); } <A>split { yylval = SPLIT; RET(SPLIT); } <A>substr { RET(SUBSTR); } <A>sub { yylval = SUB; RET(SUB); } <A>gsub { yylval = GSUB; RET(GSUB); } <A>index { RET(INDEX); } <A>in { RET(IN); } <A>getline { RET(GETLINE); } <A>close { RET(CLOSE); } <A>delete { RET(DELETE); } <A>length { yylval = FLENGTH; RET(BLTIN); } <A>log { yylval = FLOG; RET(BLTIN); } <A>int { yylval = FINT; RET(BLTIN); } <A>exp { yylval = FEXP; RET(BLTIN); } <A>sqrt { yylval = FSQRT; RET(BLTIN); } <A>sin { yylval = FSIN; RET(BLTIN); } <A>cos { yylval = FCOS; RET(BLTIN); } <A>atan2 { yylval = FATAN; RET(BLTIN); } <A>system { yylval = FSYSTEM; RET(BLTIN); } <A>rand { yylval = FRAND; RET(BLTIN); } <A>srand { yylval = FSRAND; RET(BLTIN); } <A>{A}{B}* { int n, c; c = input(); unput(c); /* look for '(' */ if (c != '(' && infunc && (n=isarg(yytext)) >= 0) { yylval = n; RET(ARG); } else { yylval = (int)setsymtab(yytext,"",0.0,STR|NUM,symtab); if (c == '(') RET(CALL); else RET(VAR); } } <A>\" { BEGIN str; clen = 0; } <A># { BEGIN comment; } <A>"]" { if (--brackcnt < 0) yyerror("extra ]"); RET(']'); } <A>")" { if (--parencnt < 0) yyerror("extra )"); RET(')'); } <A>. { if (yytext[0] == '{') bracecnt++; else if (yytext[0] == '[') brackcnt++; else if (yytext[0] == '(') parencnt++; RET(yylval = yytext[0]); /* everything else */ } <comment>\n { BEGIN A; lineno++; RET(NL); } <comment>. ; <reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } <reg>\n { yyerror("newline in regular expression %.10s...", cbuf); lineno++; BEGIN A; } <reg>"/" { BEGIN A; cbuf[clen] = 0; yylval = (int) tostring(cbuf); unput('/'); RET(REGEXPR); } <reg>. { CADD; } <str>\" { BEGIN A; cbuf[clen] = 0; s = tostring(cbuf); cbuf[clen] = ' '; cbuf[++clen] = 0; yylval = (int)setsymtab(cbuf, s, 0.0, CON|STR, symtab); RET(STRING); } <str>\n { yyerror("newline in string %.10s...", cbuf); lineno++; BEGIN A; } <str>"\\\"" { cbuf[clen++] = '"'; } <str>"\\"n { cbuf[clen++] = '\n'; } <str>"\\"t { cbuf[clen++] = '\t'; } <str>"\\"f { cbuf[clen++] = '\f'; } <str>"\\"r { cbuf[clen++] = '\r'; } <str>"\\"b { cbuf[clen++] = '\b'; } <str>"\\\\" { cbuf[clen++] = '\\'; } <str>"\\"({D}{D}{D}|{D}{D}|{D}) { int n; sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } <str>"\\". { cbuf[clen++] = yytext[1]; } <str>. { CADD; } %% startreg() { BEGIN reg; clen = 0; } /* input() and unput() are transcriptions of the standard lex macros for input and output with additions for error message printing. God help us all if someone changes how lex works. */ char ebuf[300]; char *ep = ebuf; input() { register c; extern char *lexprog; if (yysptr > yysbuf) c = U(*--yysptr); else if (yyin == NULL) c = *lexprog++; else c = getc(yyin); if (c == '\n') yylineno++; else if (c == EOF) c = 0; if (ep >= ebuf + sizeof ebuf) ep = ebuf; return *ep++ = c; } unput(c) { yytchar = c; if (yytchar == '\n') yylineno--; *yysptr++ = yytchar; if (--ep < ebuf) ep = ebuf + sizeof(ebuf) - 1; }