Minix2.0/src/commands/awk/y.c

/*
 * a small awk clone
 *
 * (C) 1989 Saeko Hirabauashi & Kouichi Hirabayashi
 *
 * Absolutely no warranty. Use this software with your own risk.
 *
 * Permission to use, copy, modify and distribute this software for any
 * purpose and without fee is hereby granted, provided that the above
 * copyright and disclaimer notice.
 *
 * This program was written to fit into 64K+64K memory of the Minix 1.2.
 */


#include <stdio.h>
#include "awk.h"

extern char *mkpat();

extern char *cmd;
extern char text[];
extern char funnam[];
extern int sym;
extern int sym1;
extern int regexflg;
extern int funflg;
extern int printflg;
extern int getlineflg;

extern SYMBOL *hashtab[], *funtab[];

extern CELL *field[];

char *emalloc(), *strsave();
NODE *node0(), *node1(), *node2(), *node3(), *node4();
NODE *stat(), *pastat();
NODE *expr(), *expr1(), *expr2(), *expr3(), *expr4();
NODE *expr5(), *expr6(), *expr7(), *expr8(), *expr9(), *expr10();
NODE *doprint(), *dofuncn(), *doif(), *dowhile(), *dofor(), *body();
NODE *doassign(), *dodo(), *doarray(), *doreturn(), *doelement();
CELL *mkcell(), *getvar();
CELL *execute(), *lookup();

int forflg;	/* parsing for(expr in array), inhibit 'expr in array' */
int prmflg;	/* parsing pass parameters */
NODE *begin, *loop, *End;

parse()
{
  NODE *p, *q, *r, *stat();
  CELL *u;

  lex();
  skipeol();
  while (sym) {
	switch (sym) {
	case BEGIN:
		lex();
		begin = stat();
		break;
	case END:
		lex();
		if (End == NULL)
			End = stat();
		else {
			for (p = End; p; p = q) {
				if ((q = p->n_next) == NULL)
					p->n_next = stat();
			}
		}
		break;
	case FUNC:
		lex();
		dousrfun();
		break;
	default:
		q = loop = pastat();
		skipeol();
		while (sym &&  sym != BEGIN && sym != END && sym != FUNC) {
			r = pastat();
			q->n_next = r;
			q = r;
			skipeol();
		}
		break;
	}
	skipeol();
  }
  if (begin) {
	u = execute(begin);
	c_free(u);
  }
  if (End || loop)
	while (Getrec(NULL)) {
		if (loop) {
			u = execute(loop);
			c_free(u);
		}
	}
  if (End) {
	u = execute(End);
	c_free(u);
  }
}

#define MAXARG		100
static char *argnam[MAXARG];
static int narg;

static
dousrfun()
{
  CELL *u;

  strcpy(funnam, text);
  u = getvar(text, funtab, FUN);
  lex();
  if (sym != '(')
	synerr("'(' expected");
  for (lex(); sym != ')'; narg++) {
	if (sym != IDENT)
		synerr("argument expected");
	argnam[narg] = strsave(text);
	lex();
	if (sym == ',')
		lex();
  }
  u->c_fval = (double) narg;
  lex();
  skipeol();
  funflg++;
  u->c_sval = (char *) stat();
  funflg--;
  if (narg > 0) {
	do {
		sfree(argnam[--narg]);
	} while (narg > 0);
  }
  skipeol();
}

isarg(s) char *s;
{
  int i;

  if (narg > 0) {
	for (i = narg - 1; i >= 0; i--)
		if (strcmp(s, argnam[i]) == 0)
			break;
  }
  else
	i = -1;
  return i;
}

/*
interactive()
{
  NODE *p, *q;
  CELL *u;

  for (lex(); sym; lex()) {
	p = stat();
	if (p->n_type != PRINT && !iscntl(p->n_type)) {
		q = (NODE *) emalloc(sizeof(NODE) + sizeof(NODE *) * 4);
		q->n_type = PRINT;
		q->n_arg[0] = q->n_arg[1] = q->n_arg[3] = NULL;
		q->n_arg[2] = p;
		q->n_next = NULL;
		p = q;
	}
	u = execute(p);
	printf("[%g(%s)]\n", u->c_fval, u->c_sval);
	c_free(u);
  }
  closeall();
  exit(0);
}
*/

static
iscntl(t)
{
  static int tab[] = {
	IF, DO, WHILE, FOR, JUMP, GETLINE, 0
  };
  int i;

  for (i = 0; tab[i]; i++)
	if (t == tab[i])
		break;
  return tab[i];
}

static NODE *
pastat()
{
  NODE *p, *q, *r;

  if (sym == '{')	/* action only */
	p = stat();
  else {	/* exp [, expr] [{ .. }] */
	p = expr();
	if (sym == ',') {
		lex();
		q = expr();
	}
	else
		q = NULL;
	if (sym && sym != EOL)
		r = stat();
	else
		r = node0(PRINT0);
	if (q)
		p = node3(P2STAT, p, q, r);
	else
		p = node2(P1STAT, p, r);
  }
  return p;
}

static NODE *
stat()
{
  NODE *p, *q, *r;
  CELL *u, *v;
  int op;

/*printf("@stat(%d)(%s)\n", sym, text);*/
  while (sym == EOL)
	lex();
  switch(sym) {
  case PRINT:
	p = doprint(0);
	break;
  case PRINTF:
	p = doprint(FORMAT);
	break;
  case IF:
	p = doif();
	break;
  case WHILE:
	p = dowhile();
	break;
  case DO:
	p = dodo();
	break;
  case FOR:
	p = dofor();
	break;
  case RETURN:
	p = doreturn();
	break;
  case EXIT:
	p = node2(JUMP, (NODE *)sym, (NODE *)NULL);
	lex();
	if (sym == IDENT || sym == NUMBER || sym == ARG)
		p->n_arg[1] = expr();
	break;
  case BREAK: case CONTIN: case NEXT:
	p = node1(JUMP, (NODE *)sym);
	lex();
	break;
  case DELETE:
	lex();
	u = getvar(text, hashtab, ARR);
	if (Getc() != '[')
		synerr("'[' expected");
	p = doarray(u);
	p->n_type = DELETE;
	lex();	/* ']' */
	break;
  case '{':
	lex();
	skipeol();
	if (sym == '}')
		p = node0(NULPROC);
	else
		p = q = stat();
	skipeol();
	while (sym != '}') {
		r = stat();
		q->n_next = r;
		q = r;
		skipeol();
	}
	lex();
	break;
  default:
	p = expr();
#if 0
	if (sym == BINOR) {	/* expr | GETLINE */
		lex();
		if (sym != GETLINE)
			synerr("'GETLINE' expected");
		lex();
		if (sym == IDENT || sym == STRING || sym == ARG) {
			q = expr();
		}
		else
			q = NULL;
		p = node3(GETLINE, q, p, (NODE *)R_PIN);
	}
#endif
	break;
  }
  if (p->n_type == VALUE)
	synerr("statement expected");
  return p;
}

static
skipeol()
{
  while (sym == EOL)
	lex();
}

static NODE *
doprint(fmt)
{
  NODE *p, *q, *r;
  CELL *u;
  int i, op;
  int n = 0;

  printflg++;
  lex();
  if (sym == '(')
	lex();
  if (sym != '}' && sym != ')' && sym != EOL && sym != R_OUT && sym != R_APD
	&& sym != R_POUT) {
	p = q = expr(); n++;
	while (sym == ',') {
  		lex();
		skipeol();
	  	r = expr(); n++;
		q->n_next = r;
	  	q = r;
	}
  }
  if (sym == ')')
	lex();
  if (sym == R_OUT || sym == R_APD || sym == R_POUT) {
	op = sym;
	lex();
/*	q = expr10();*/
	q = expr();	/* 94-04-02 */
  }
  else
	q = (NODE *) (op = 0);	/* stdout */
  printflg = 0;
  r = (NODE *) emalloc(sizeof(*r) + sizeof(r) * (n + 3));
  r->n_type = PRINT;	/* convert list to arg */
  r->n_next = NULL;
  r->n_arg[0] = (NODE *) (op | fmt);
  r->n_arg[1] = q;
  if (n == 0) {
	p = node1(VALUE, (NODE *)field[0]);
  }
  for (i = 2; p != NULL; i++) {
	r->n_arg[i] = p;
	q = p->n_next;
	p->n_next = NULL;
	p = q;
  }
  r->n_arg[i] = NULL;
  return r;
}

static NODE *
doif()
{
  NODE *p, *q, *r;

  lex();
  if (sym != '(')
	synerr("'(' expected");
  lex();
  p = expr();
  if (sym != ')')
	synerr("')' expected");
  lex();
  skipeol();
  q = stat();
  skipeol();
  if (sym == ELSE) {
	lex();
	skipeol();
	r = stat();
  }
  else
	r = NULL;
  return node3(IF, p, q, r);
}

static NODE *
dowhile()
{
  NODE *p, *q;

  lex();
  if (sym != '(')
	synerr("'(' expected");
  lex();
  p = stat();
  if (sym != ')')
	synerr("')' expected");
  q = body();
  return node2(WHILE, p, q);
}

static NODE *
dofor()
{
  NODE *p, *q, *r, *s;
  CELL *u;
  int i;

  lex();
  if (sym != '(')
	synerr("'(' expected");
  lex();
  if (sym != EOL) {
	forflg++;	/* inhibit parsing 'expr IN array' */
	p = expr();
	forflg = 0;
  }
  else
	p = NULL;
  if (sym == IN) {
	lex();
	if (sym == ARG) {
/*
printf("***FOR_IN_ARG(%d)***\n", sym);
*/
		u = mkcell(POS, NULL, (double)sym1);
		q = node1(ARG, u);
	}
	else {
		u = getvar(text, hashtab, ARR);
		q = node1(VALUE, u);
	}
	lex();
	if (sym != ')')
		synerr("')' expected");
	lex();
	skipeol();
	s = stat();
	r = node3(FORIN, p, q, s);
  }
  else {
	if (sym != EOL)
		synerr("'in' or ';' expected");
	lex();
	if (sym != EOL)
		q = expr();
	else
		q = NULL;
	if (sym != EOL)
		synerr("';' expected");
	lex();
	if (sym != ')')
		r = expr();
	else
		r = NULL;
	if (sym != ')')
		synerr("')' expected");
	s = body();
	r = node4(FOR, p, q, r, s);
  }
  return r;
}

static NODE *
body()
{
  NODE *r;

  while ((sym = Getc()) == '\n' || sym == ' ' || sym == '\t')
	;
  if (sym == ';') {
	r = node0(NULPROC);
	lex();
  }
  else {
	Ungetc(sym);
	lex();
	r = stat();
  }
  return r;
}

static NODE *
dodo()
{
  NODE *p, *q;

  lex();
  skipeol();
  p = stat();
  skipeol();
  if (sym != WHILE)
	synerr("'while' expected");
  lex();
  if (sym != '(')
	synerr("'(' expected");
  lex();
  q = stat();
  if (sym != ')')
	synerr("')' expected");
  lex();
  return node2(DO, p, q);
}

static NODE *
doreturn()
{
  NODE *p, *q, *r;
  int i, n = 0;

  if (lex() != EOL) {
	p = q = expr(); n++;
	while (sym == ',') {
		lex(); skipeol();
		r = expr(); n++;
		q ->n_next = r;
		q = r;
	}
  }
  else
	p = (NODE *)NULL;

  r = (NODE *) emalloc(sizeof(*r) + sizeof (r) * (n + 1));
  r->n_type = JUMP;
  r->n_next = NULL;
  r->n_arg[0] = (NODE *) RETURN;
  for (i = 1; p != NULL; i++) {
	r->n_arg[i] = p;
	q = p->n_next;
	p->n_next = NULL;
	p = q;
  }
  r->n_arg[i] = NULL;
  return r;
}

static NODE *
expr()
{
  NODE *p;

  p = expr1();
  if (isassign(sym))
	p = doassign(sym, p);
  return p;
}

static isassign(sym)
{
  return (sym == ASSIGN || sym == ADDEQ || sym == SUBEQ || sym == MULTEQ
  	|| sym == DIVEQ || sym == MODEQ || sym == POWEQ);
}

static NODE *
doassign(op, p) NODE *p;
{	/* evaluate right to left */
  NODE *q;

  lex();
  q = expr();
  if (isassign(sym))
	q = doassign(sym, q);
  return node3(ASSIGN, (NODE *)op, p, q);
}

static NODE *
expr1()
{
  NODE *p, *q;

/*
printf("expr1(%d)(%s)\n", sym, text);
*/
  p = expr2();
  if (sym == '?') {
	lex();
#if 0
	q = stat();
	if (sym != ':')
		synerr("':' expected");
	lex();
	return node3(IF, p, q, stat());
#else
	q = expr();
	if (sym != ':')
		synerr("':' expected");
	lex();
	return node3(IF, p, q, expr());
#endif
  }
  return p;	/* 930213 */
}

static NODE *
expr2()
{
  NODE *p;

/*
printf("expr2(%d)(%s)\n", sym, text);
*/
  p = expr3();
  while (sym == OR) {
	lex();
	skipeol();
	p = node3(COND, (NODE *)OR, p, expr3());
  }
  return p;
}

static NODE *
expr3()
{
  NODE *p;

/*
printf("expr3(%d)(%s)\n", sym, text);
*/
  p = expr4();
  while (sym == AND) {
	lex();
	skipeol();
	p = node3(COND, (NODE *)AND, p, expr4());
  }
  return p;
}

static NODE *
expr4()
{
  NODE *p;
  CELL *q;
  int op;

/*
printf("expr4(%d)(%s)\n", sym, text);
*/
  p = expr5();
  if (!forflg && sym == IN) {
	lex();
	q = getvar(text, hashtab, ARR);
	lex();
	return node2(IN, p, q);
  }
  while (sym == EQ || sym == NE || sym == LT || sym == LE  || sym == GT
	|| sym == GE || sym == MATCH || sym == NOMATCH) {
	op = sym;
	lex();
	p = node3(COND, (NODE *)op, p, expr5());
  }
  return p;
}

static NODE *
expr5()
{
  NODE *p, *q;

/*
printf("expr5(%d)(%s)\n", sym, text);
*/
  p = expr6();
  while (iscat(sym)) {
	q = expr6();
	p = node2(CAT, p, q);
  }
  return p;
}

static iscat(sym)
{
  static int ctab[] = {
	ADD, SUB, MULT, DIV, MOD, INC, DEC, STRING, NUMBER, IDENT, '(',
	MATHFUN, STRFUN, SPRINTF, '$', SUBST, ARG, CALL, 0
	};
  register int i, j;

  for (i = 0; j = ctab[i]; i++)
	if (sym == j)
		break;
  return j;
}

static NODE *
expr6()
{
  register int sign = sym;
  NODE *p, *q;

/*
printf("expr6(%d)(%s)\n", sym, text);
*/
  if (sym == SUB || sym == ADD)
	lex();
  p = expr7();
  if (sign == SUB)
	p = node2(ARITH, (NODE *)UMINUS, p);
  while (sym == ADD || sym == SUB) {
	sign = sym;
	lex();
	q = expr7();
	if (sign == ADD) {
		p = node3(ARITH, (NODE *)ADD, p, q);
	}
	else if (sign == SUB) {
		p = node3(ARITH, (NODE *)SUB, p, q);
	}
	else
		synerr("'+' or '-' expected");
  }
  return p;
}

static NODE *
expr7()
{
  register int op;
  NODE *p, *q;

/*
printf("expr7(%d)(%s)\n", sym, text);
*/
  p = expr8();
  while (sym == MULT || sym == DIV || sym == MOD) {
	op = sym;
	lex();
	q = expr8();
	switch (op) {
	case MULT:	p = node3(ARITH, (NODE *)MULT, p, q); break;
	case DIV:	p = node3(ARITH, (NODE *)DIV, p, q); break;
	case MOD:	p = node3(ARITH, (NODE *)MOD, p, q); break;
	default:	synerr("'*', '/' or '%' expected"); break;
	}
  }
  return p;
}

static NODE *
expr8()
{
  NODE *p;
  int op;

/*
printf("expr8(%d)(%s)\n", sym, text);
*/
  if (sym == NOT) {
	lex();
	p = node2(COND, (NODE *)NOT, expr9());
  }
  else {
	p = expr9();
	if (sym == POWER) {
		lex();
		p = node3(ARITH, (NODE *)POWER, p, expr9());
	}
  }
  return p;
}

static NODE *
expr9()
{
  NODE *p, *q;
  int op, sym0;

/*
printf("expr9(%d)(%s)\n", sym, text);
*/
  if (op = isincdec(sym)) {
	lex();
	if (sym != IDENT && sym != ARG)
		synerr("illegal '++/--' operator");
	p = expr10();
	p = node4(ARITH, (NODE *)INCDEC, p, (NODE *)op, (NODE *)PRE);
  }
  else {
	sym0 = sym;
	p = expr10();
	if (op = isincdec(sym)) {
/*printf("POST(%d)(%d)(%s)\n", sym, sym0, text);*/
		if (sym0 == IDENT || sym0 == ARG) {
			p = node4(ARITH, (NODE *)INCDEC, p, (NODE *)op,
				(NODE *)POST);
			lex();
		}
	}
	if (sym == BINOR) {	/* | getline */
		lex();
		if (sym != GETLINE)
			synerr("'GETLINE' expected");
		lex();
		if (sym == IDENT || sym == STRING || sym == ARG) {
			q = expr();
		}
		else
			q = NULL;
		p = node3(GETLINE, q, p, (NODE *)R_PIN);
	}
  }
  return p;
}

static isincdec(sym)
{
  return sym == INC ? 1 : (sym == DEC ? -1 : 0);
}

static NODE *
expr10()
{
  NODE *p, *q;
  CELL *u, *v;
  int op;
  int c;
int gsave, psave;
  double atof();

/*
printf("expr10(%d)(%s)\n", sym, text);
*/
  switch (sym) {
  case STRING:
	u = mkcell(STR, text, 0.0);
	goto g1;
  case NUMBER:
	u = mkcell(NUM, NULL, atof(text));
g1:
	p = node1(VALUE, u);
	lex();
	break;
  case IDENT: case ARG:
	if ((c = Getc()) == '[') {	/* array */
		/* 940403 */
		if (sym == ARG) {
			u = (CELL *)emalloc(sizeof(CELL));
			u = mkcell(POS, NULL, (double)sym1);
			p = doarray(u);
		}
		else {
			u = getvar(text, hashtab, ARR);
			p = doarray(u);
		}
	}
	else {
		Ungetc(c);
		if (sym == ARG) {
			u = mkcell(POS, NULL, (double)sym1);
			p = node1(ARG, u);
		}
		else {	/* symple variable */
			u = getvar(text, hashtab, VAR|STR|NUM);
			p = node1(VALUE, u);
		}
	}
	lex();
	break;
  case '(':
	/* print >(x ? y : z) needs this */
gsave = getlineflg; psave = printflg;
getlineflg = printflg = 0;
	lex();
	p = expr();
	if (sym == ',')	/* (expr, expr, .. ) */
		p = doelement(p);
	if (sym != ')')
		synerr("')' expected");
getlineflg = gsave; printflg = psave;
	lex();
	break;
  case CALL:
	p = dofuncn(sym, getvar(text, funtab, UDF));
	break;
  case MATHFUN: case STRFUN: case SUBST:
	p = dofuncn(sym, (CELL *)sym1);
	break;
  case SPRINTF:
	p = doprint(FORMAT|STROUT);
	break;
  case '$':
	lex();
	switch (sym) {
	case NUMBER:
		u = mkcell(NUM, NULL, atof(text));
		p = node1(VALUE, u);
		p = node1(FIELD, p);
		lex();
		break;
	case IDENT: case ARG: case '(':
		p = node1(FIELD, expr10());
		break;
	default:
		synerr("number or identifier expected after '$'", (char *)0);
	}
	break;
  case DIV:
	regexflg++;
	lex();
	regexflg = 0;
	u = mkcell(PAT, NULL, 0.0);
	u->c_sval = (char *) mkpat(text);
	p = node1(VALUE, u);
	lex();
	break;
  case GETLINE:
	getlineflg++;
	lex();
	if (sym == IDENT || sym == STRING || sym == ARG)
		q = expr10();	/* read into var */
	else
		q = NULL;
	getlineflg = 0;
	if (sym == R_IN) {
		op = R_IN;
		lex();
		p = expr10();
	}
	else
		op = (int) (p = NULL);
	p = node3(GETLINE, q, p, (NODE *)op);
	break;
  default:
	synerr(
	"identifier, number, string, argument, regexpr, call or '(' expected");
	break;
  }
  return p;
}

static NODE *
dofuncn(fun, op) CELL *op;
{
  NODE *p;
  int i, j;
  int n = 0;
  NODE *a[100];

  if (lex() == '(') {
	prmflg++;
	for (lex(); sym && (sym != ')'); n++) {
		if ((int)op == SPLIT && n == 1) {
/*
printf("sym(%d)sym1(%d)(%d)\n", sym, sym1, isarg(text));
*/
			if (sym != ARG) {	/*isarg(text) == -1*/
				/* make an array if not exist */
				prmflg = 0;
				getvar(text, hashtab, ARR);
				prmflg++;
			}
		}
		a[n] = expr();
		if (sym == ',')
			lex();
		else if (sym != ')')
			synerr("',' or ')' expected");
	}
	prmflg = 0;

	if (sym == ')')
		lex();
	else
		synerr("')' expected");
  }
  p = (NODE *) emalloc(sizeof(*p) + sizeof(p) * (n + 2));
  p->n_type = fun;
  p->n_next = NULL;
  p->n_arg[0] = (NODE *) op;
  p->n_arg[1] = (NODE *) n;
  for (i = 0, j = 2; i < n; )
	p->n_arg[j++] = a[i++];
  p->n_arg[j] = NULL;
  return p;
}

static NODE *
doarray(u) CELL *u;
{
  NODE *p;
  int i, j;
  int n;
  NODE *a[20];

  for (lex(), n = 0; sym &&  sym != ']'; n++) {
	a[n] = expr();
	if (sym == ',')
		lex();
  }
  if (sym != ']')
	synerr("']' expected");
  /* left ']' for expr10() */
  p = (NODE *) emalloc(sizeof(*p) + sizeof(p) * (n + 1));
  p->n_type = ARRAY;
  p->n_next = NULL;
  p->n_arg[0] = (NODE *)u;
  p->n_arg[1] = (NODE *) n;
  for (i = 0, j = 2; i < n; )
	p->n_arg[j++] = a[i++];
  return p;
}

static NODE *
doelement(q) NODE *q;
{
  NODE *p;
  int i, j;
  int n;
  NODE *a[20];

  a[0] = q;
  for (lex(), n = 1; sym &&  sym != ')'; n++) {
	a[n] = expr();
	if (sym == ',')
		lex();
	else if (sym != ')')
		synerr("',' or ')' expected");
  }
  /* left ')' for expr10() */
  p = (NODE *) emalloc(sizeof(*p) + sizeof(p) * (n + 1));
  p->n_type = ELEMENT;
  p->n_next = NULL;
  p->n_arg[0] = NULL;
  p->n_arg[1] = (NODE *) n;
  for (i = 0, j = 2; i < n; )
	p->n_arg[j++] = a[i++];
  return p;
}

synerr(s, t) char *s, *t;
{
  extern int lineno;
  extern char line[], *linep;
  int c, i;
  char *u, *v;

  fprintf(stderr, "%s: Syntax error at line %d", cmd, lineno);
  if (funflg)
	fprintf(stderr, " in function %s", funnam);
  fprintf(stderr, ":\n");
  if ((v = linep - 1) < line)
	v = line + BUFSIZ - 1;
  for (i = 0, u = v - 1; ; --u) {
	if (u < line) {
		if (line[BUFSIZ - 1] == '\0')
			break;
		u = line + BUFSIZ - 1;
	}
	if (*u == '\n' && ++i == 2)
		break;
  }
  if (u != v) {
	while (u != v) {
		fputc(*u, stderr);
		if ((++u - line) == BUFSIZ)
			u = line;
	}
	if (*u != '\n')
		fputc(*u, stderr);
	fprintf(stderr, " <--\n\n");
/*
	fprintf(stderr, " <-- ");
	while ((c = Getc()) != EOF && c != '\n')
		fputc(c, stderr);
	fprintf(stderr, "\n");
	if (c == EOF);
		fprintf(stderr, "\n");
*/
  }
  fprintf(stderr, s, t);
  fprintf(stderr, "\n");
#ifdef DOS
  closeall();
#endif
  exit(1);
}