V8/usr/src/cmd/awk/awk.lx.l

%Start A str sc reg comment

%{
#include	"y.tab.h"
#include	"awk.h"

#undef	input	/* defeat lex */
#undef	unput

extern int	yylval;
extern int	infunc;

int	lineno	= 1;
int	bracecnt = 0;
int	brackcnt  = 0;
int	parencnt = 0;
#ifdef	DEBUG
#	define	RET(x)	{if (dbg) printf("lex %s\n", tokname(x)); return(x); }
#else
#	define	RET(x)	return(x)
#endif
#define	CADD	cbuf[clen++]=yytext[0]; if (clen>=CBUFLEN-1) {yyerror("string/reg expr %.10s... too long", cbuf); BEGIN A;}
#define	CBUFLEN	400
char	cbuf[CBUFLEN], *s;
int	clen, cflag;
%}

A	[a-zA-Z_]
B	[a-zA-Z0-9_]
D	[0-9]
WS	[ \t]

%%
	switch (yybgin-yysvec-1) {	/* witchcraft */
	case 0:
		BEGIN A;
		break;
	case sc:
		BEGIN A;
		RET('}');
	}

<A>^\n		{ lineno++; }
<A>^{WS}*#.*\n	{ lineno++; }	/* strip comment lines */
<A>{WS}		{ ; }
<A>"\\"\n	{ lineno++; }
<A>BEGIN	{ RET(XBEGIN); }
<A>END		{ RET(XEND); }
<A>func(tion)?	{ if (infunc) yyerror("illegal nested function"); RET(FUNC); }
<A>return	{ if (!infunc) yyerror("return not in function"); RET(RETURN); }
<A>"&&"		{ RET(AND); }
<A>"||"		{ RET(BOR); }
<A>"!"		{ RET(NOT); }
<A>"!="		{ yylval = NE; RET(NE); }
<A>"~"		{ yylval = MATCH; RET(MATCHOP); }
<A>"!~"		{ yylval = NOTMATCH; RET(MATCHOP); }
<A>"<"		{ yylval = LT; RET(LT); }
<A>"<="		{ yylval = LE; RET(LE); }
<A>"=="		{ yylval = EQ; RET(EQ); }
<A>">="		{ yylval = GE; RET(GE); }
<A>">"		{ yylval = GT; RET(GT); }
<A>">>"		{ yylval = APPEND; RET(APPEND); }
<A>"++"		{ yylval = INCR; RET(INCR); }
<A>"--"		{ yylval = DECR; RET(DECR); }
<A>"+="		{ yylval = ADDEQ; RET(ASGNOP); }
<A>"-="		{ yylval = SUBEQ; RET(ASGNOP); }
<A>"*="		{ yylval = MULTEQ; RET(ASGNOP); }
<A>"/="		{ yylval = DIVEQ; RET(ASGNOP); }
<A>"%="		{ yylval = MODEQ; RET(ASGNOP); }
<A>"^="		{ yylval = POWEQ; RET(ASGNOP); }
<A>"**="	{ yylval = POWEQ; RET(ASGNOP); }
<A>"="		{ yylval = ASSIGN; RET(ASGNOP); }
<A>"**"		{ RET(POWER); }
<A>"^"		{ RET(POWER); }

<A>"$0"		{ yylval = (int) lookup("$0", symtab); RET(FIELD); }
<A>"$"{D}+	{ yylval = (int) fieldadr(atoi(yytext+1)); RET(FIELD); }
<A>"$"{WS}*	{ RET(INDIRECT); }
<A>NF		{ yylval = (int)setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }

<A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
		  yylval = (int)setsymtab(yytext, "", atof(yytext), CON|NUM, symtab);
		  RET(NUMBER); }
<A>"}"{WS}*\n	{ if (--bracecnt < 0) yyerror("extra }"); BEGIN sc; lineno++; RET(';'); }
<A>"}"		{ if (--bracecnt < 0) yyerror("extra }"); BEGIN sc; RET(';'); }
<A>;\n		{ lineno++; RET(';'); }
<A>\n		{ lineno++; RET(NL); }
<A>while	{ RET(WHILE); }
<A>for		{ RET(FOR); }
<A>if		{ RET(IF); }
<A>else		{ RET(ELSE); }
<A>next		{ RET(NEXT); }
<A>exit		{ RET(EXIT); }
<A>break	{ RET(BREAK); }
<A>continue	{ RET(CONTINUE); }
<A>print	{ yylval = PRINT; RET(PRINT); }
<A>printf	{ yylval = PRINTF; RET(PRINTF); }
<A>sprintf	{ yylval = SPRINTF; RET(SPRINTF); }
<A>split	{ yylval = SPLIT; RET(SPLIT); }
<A>substr	{ RET(SUBSTR); }
<A>sub		{ yylval = SUB; RET(SUB); }
<A>gsub		{ yylval = GSUB; RET(GSUB); }
<A>index	{ RET(INDEX); }
<A>in		{ RET(IN); }
<A>getline	{ RET(GETLINE); }
<A>close	{ RET(CLOSE); }
<A>delete	{ RET(DELETE); }
<A>length	{ yylval = FLENGTH; RET(BLTIN); }
<A>log		{ yylval = FLOG; RET(BLTIN); }
<A>int		{ yylval = FINT; RET(BLTIN); }
<A>exp		{ yylval = FEXP; RET(BLTIN); }
<A>sqrt		{ yylval = FSQRT; RET(BLTIN); }
<A>sin		{ yylval = FSIN; RET(BLTIN); }
<A>cos		{ yylval = FCOS; RET(BLTIN); }
<A>atan2	{ yylval = FATAN; RET(BLTIN); }
<A>system	{ yylval = FSYSTEM; RET(BLTIN); }
<A>rand		{ yylval = FRAND; RET(BLTIN); }
<A>srand	{ yylval = FSRAND; RET(BLTIN); }
<A>{A}{B}*	{ int n, c;
		  c = input(); unput(c);	/* look for '(' */
		  if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
			yylval = n;
			RET(ARG);
		  } else {
			yylval = (int)setsymtab(yytext,"",0.0,STR|NUM,symtab);
			if (c == '(')
				RET(CALL);
			else
				RET(VAR);
		  }
		}
<A>\"		{ BEGIN str; clen = 0; }
<A>#		{ BEGIN comment; }

<A>"]"		{ if (--brackcnt < 0) yyerror("extra ]"); RET(']'); }
<A>")"		{ if (--parencnt < 0) yyerror("extra )"); RET(')'); }

<A>.		{ if (yytext[0] == '{') bracecnt++;
		  else if (yytext[0] == '[') brackcnt++;
		  else if (yytext[0] == '(') parencnt++;
		  RET(yylval = yytext[0]); /* everything else */ }

<comment>\n	{ BEGIN A; lineno++; RET(NL); }
<comment>.	;

<reg>\\.	{ cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
<reg>\n		{ yyerror("newline in regular expression %.10s...", cbuf); lineno++; BEGIN A; }
<reg>"/"	{ BEGIN A;
		  cbuf[clen] = 0;
		  yylval = (int) tostring(cbuf);
		  unput('/');
		  RET(REGEXPR); }
<reg>.		{ CADD; }

<str>\"		{ BEGIN A;
		  cbuf[clen] = 0; s = tostring(cbuf);
		  cbuf[clen] = ' '; cbuf[++clen] = 0;
		  yylval = (int)setsymtab(cbuf, s, 0.0, CON|STR, symtab);
		  RET(STRING); }
<str>\n		{ yyerror("newline in string %.10s...", cbuf); lineno++; BEGIN A; }
<str>"\\\""	{ cbuf[clen++] = '"'; }
<str>"\\"n	{ cbuf[clen++] = '\n'; }
<str>"\\"t	{ cbuf[clen++] = '\t'; }
<str>"\\"f	{ cbuf[clen++] = '\f'; }
<str>"\\"r	{ cbuf[clen++] = '\r'; }
<str>"\\"b	{ cbuf[clen++] = '\b'; }
<str>"\\\\"	{ cbuf[clen++] = '\\'; }
<str>"\\"({D}{D}{D}|{D}{D}|{D}) { int n;
		  sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
<str>"\\".	{ cbuf[clen++] = yytext[1]; }
<str>.		{ CADD; }

%%

startreg()
{
	BEGIN reg;
	clen = 0;
}

/* input() and unput() are transcriptions of the standard lex
   macros for input and output with additions for error message
   printing.  God help us all if someone changes how lex works.
*/

char	ebuf[300];
char	*ep = ebuf;

input()
{
	register c;
	extern char *lexprog;

	if (yysptr > yysbuf)
		c = U(*--yysptr);
	else if (yyin == NULL)
		c = *lexprog++;
	else
		c = getc(yyin);
	if (c == '\n')
		yylineno++;
	else if (c == EOF)
		c = 0;
	if (ep >= ebuf + sizeof ebuf)
		ep = ebuf;
	return *ep++ = c;
}

unput(c)
{
	yytchar = c;
	if (yytchar == '\n')
		yylineno--;
	*yysptr++ = yytchar;
	if (--ep < ebuf)
		ep = ebuf + sizeof(ebuf) - 1;
}