1BSD/pi/yyrecover.c

Compare this file to the similar file:
Show the results in this format:

#
/*
 * pi - Pascal interpreter code translator
 *
 * Charles Haley, Bill Joy UCB
 * Version 1.0 August 1977
 *
 *
 * pxp - Pascal execution profiler
 *
 * Bill Joy UCB
 * Version 1.0 August 1977
 */

#include "whoami"
#include "0.h"
#include "yy.h"

/*
 * Very simplified version of Graham-Rhodes error recovery
 * method for LALR parsers.  Backward move is embodied in
 * default reductions of the yacc parser until an error condition
 * is reached.  Forward move is over a small number of input tokens
 * and cannot "condense".  The basic corrections are:
 *
 *	1) Delete the input token.
 *
 *	2) Replace the current input with a legal input.
 *
 *	3) Insert a legal token.
 *
 * All corrections are weighted, considered only if they allow
 * at least two shifts, and the cost of a correction increases if
 * it allows shifting over only a part of the lookahead.
 *
 * Another error situation is that which occurs when an identifier "fails"
 * a reduction because it is not the required "class".
 * In this case, we also consider replacing this identifier, which has
 * already been shifted over, with an identifier of the correct class.
 *
 * Another correction performed here is unique symbol insertion.
 * If the current state admits only one input, and no other alternative
 * correction presents itself, then that symbol will be inserted.
 * There is a danger in this of looping, and it is handled
 * by counting true shifts over input (see below).
 *
 *
 * A final class of corrections, considered only when the error
 * occurred immediately after a shift over a terminal, involves
 * the three basic corrections above, but with the point of error
 * considered to be before this terminal was shifted over, effectively
 * "unreading" this terminal.  This is a feeble attempt at elimination
 * of the left-right bias and because "if" has a low weight and some
 * statements are quite simple i.e.
 *
 *	cse ch of ...
 *
 * we can get a small number of errors.  The major deficiency of
 * this is that we back up only one token, and that the forward
 * move is over a small number of tokens, often not enough to really
 * tell what the input should be, e.g. in
 *
 *	a[i] > a[i - 1] ...
 *
 * In such cases a bad identifier (misspelled keyword) or omitted
 * keyword will be change or inserted as "if" as it has the lowest cost.
 * This is not terribly bad, as "if"s are most common.
 * This also allows the correction of other errors.
 *
 * This recovery depends on the default reductions which delay
 * noticing the error until the parse reaches a state where the
 * relevant "alternatives" are visible.  Note that it does not
 * consider tokens which will cause reductions before being
 * shifted over.  This requires the grammar to be written in a
 * certain way for the recovery to work correctly.
 * In some sense, also, the recovery suffers because we have
 * LALR(1) tables rather than LR(1) tables, e.g. in
 *
 *	if rec.field < rec2,field2 then
 */

/*
 * Definitions of possible corrective actions
 */
#define	CPANIC		0
#define	CDELETE		1
#define	CREPLACE	2
#define	CINSERT		3
#define	CUNIQUE		4
#define	CCHIDENT	5

/*
 * Multiplicative cost factors for corrective actions.
 *
 * When an error occurs we take YCSIZ - 1 look-ahead tokens.
 * If a correction being considered will shift over only part of
 * that look-ahead, it is not completely discarded, but rather
 * "weighted", its cost being multiplied by a weighting factor.
 * For a correction to be considered its weighted cost must be less
 * than CLIMIT.
 *
 * Non-weighted costs are considered:
 *
 *	LOW	<= 3
 *	MEDIUM	4,5
 *	HIGH	>= 6
 *
 * CURRENT WEIGHTING STRATEGY: Aug 20, 1977
 *
 * For all kinds of corrections we demand shifts over two symbols.
 * Corrections have high weight even after two symbol
 * shifts because the costs for deleting and inserting symbols are actually
 * quite low; we do not want to change weighty symbols 
 * on inconclusive evidence.
 *
 * The weights are the same after the third look ahead.
 * This prevents later, unrelated errors from causing "funny"
 * biases of the weights toward one type of correction.
 *
 * Current look ahead is 5 symbols.
 */

/*** CLIMIT is defined in yy.h for yycosts ***/
#define	CPRLIMIT	50
#define	CCHIDCOST	4

char	insmult[8]	INFINITY, INFINITY, INFINITY, 15, 8, 6, 3, 1;
char	repmult[7]	INFINITY, INFINITY, INFINITY, 8, 6, 3, 1;
char	delmult[6]	INFINITY, INFINITY, INFINITY, 6, 3, 1;

#define	NOCHAR	-1

#define	Eprintf	if (errtrace) printf
#define	Tprintf	if (testtrace) printf

/*
 * Action arrays of the parser needed here
 */
int	yyact[], yypact[], *yypv;

/*
 * Yytips is the tip of the stack when using
 * the function loccor to check for local
 * syntactic correctness. As we don't want
 * to copy the whole parser stack, but want
 * to simulate parser moves, we "split"
 * the parser stack and keep the tip here.
 */
#define	YYTIPSIZ 16
int	yytips[YYTIPSIZ], yytipct;
int	yytipv[YYTIPSIZ];

/*
 * The array YC saves the lookahead tokens for the
 * forward moves.
 * Yccnt is the number of tokens in the YC array.
 */
#define	YCSIZ	6

int	yCcnt;
struct	yytok YC0[YCSIZ + 1];
struct	yytok *YC;

/*
 * YCps gives the top of stack at
 * the point of error.
 */

char	yyunique	1;

STATIC	unsigned yyTshifts;

/*
 * Cact is the corrective action we have decided on
 * so far, ccost its cost, and cchar the associated token.
 * Cflag tells if the correction is over the previous input token.
 */
int	cact, ccost, cchar, cflag;

/*
 * ACtok holds the token under
 * consideration when examining
 * the lookaheads in a state.
 */
struct	yytok ACtok;

#define acchar	ACtok.Yychar
#define aclval	ACtok.Yylval

/*
 * Make a correction to the current stack which has
 * top of stack pointer Ps.
 */
yyrecover(Ps0, idfail)
	int *Ps0, idfail;
{
	register int c, i;
	int yyrwant, yyrhave;

#ifdef PI
	Recovery = 1;
#endif

	YC = &YC0[1];
#ifdef DEBUG
	if (errtrace) {
		setpfx('p');
		yerror("Point of error");
		printf("States %d %d ...", Ps0[0], Ps0[-1]);
		if (idfail)
			printf(" [Idfail]");
		putchar('\n');
		printf("Input %s%s", tokname(&Y));
	}

#endif
	/*
	 * We first save the current input token
	 * and its associated semantic information.
	 */
	if (yychar < 0)
		yychar = yylex();
	copy(&YC[0], &Y, sizeof Y);

	/*
	 * Set the default action and cost
	 */
	cact = CPANIC, ccost = CLIMIT, cflag = 0;

	/*
	 * Peek ahead
	 */
	for (yCcnt = 1; yCcnt < YCSIZ; yCcnt++) {
		yychar = yylex();
		copy(&YC[yCcnt], &Y, sizeof YC[0]);
#ifdef DEBUG
		Eprintf(" | %s%s", tokname(&YC[yCcnt]));
#endif
	}
#ifdef DEBUG
	Eprintf("\n");
#endif

	/*
	 * If we are here because a reduction failed, try
	 * correcting that.
	 */
	if (idfail) {
		/*
		 * Save the particulars about
		 * the kind of identifier we want/have.
		 */
		yyrwant = yyidwant;
		yyrhave = yyidhave;
#ifdef DEBUG
		Tprintf("  Try Replace %s identifier with %s identifier cost=%d\n",
		    classes[yyidhave], classes[yyidwant], CCHIDCOST);
#endif

		/*
		 * Save the semantics of the ID on the
		 * stack, and null them out to free
		 * up the reduction in question.
		 */
		i = yypv[0];
		yypv[0] = nullsem(YID);
		c = correct(NOCHAR, 0, CCHIDCOST, &repmult[2], Ps0, yypv);
		yypv[0] = i;
#ifdef DEBUG
		if (c < CPRLIMIT || fulltrace)
			Eprintf("Cost %2d Replace %s identifier with %s identifier\n", c, classes[yyrhave], classes[yyrwant]);
#endif
		if (c < ccost)
			cact = CCHIDENT, ccost = c, cchar = YID;
	}

	/*
	 * First try correcting the state we are in
	 */
	trystate(Ps0, yypv, 0, &insmult[1], &delmult[1], &repmult[1]);

	/*
	 * Now, if we just shifted over a terminal, try
	 * correcting it.
	 */
	if (OY.Yychar != -1 && OY.Yylval != nullsem(OY.Yychar)) {
		YC--;
		copy(&YC[0], &OY, sizeof YC[0]);
		trystate(Ps0 - 1, yypv - 1, 1, insmult, delmult, repmult);
		if (cflag == 0)
			YC++;
		else {
			yypv--;
#ifdef PXP
			yypw--;
#endif
			Ps0--;
			yCcnt++;
		}
	}

	/*
	 * Restoring the first look ahead into
	 * the scanner token allows the error message
	 * routine to print the error message with the text
	 * of the correct line.
	 */
	copy(&Y, &YC[0], sizeof Y);

	/*
	 * Unique symbol insertion.
	 *
	 * If there was no reasonable correction found,
	 * but only one input to the parser is acceptable
	 * we report that, and try it.
	 *
	 * Special precautions here to prevent looping.
	 * The number of true inputs shifted over at the point
	 * of the last unique insertion is recorded in the
	 * variable yyTshifts.  If this is not less than
	 * the current number in yytshifts, we do not insert.
	 * Thus, after one unique insertion, no more unique
	 * insertions will be made until an input is shifted
	 * over.  This guarantees termination.
	 */
	if (cact == CPANIC && !idfail) {
		register int *ap;

		ap = &yyact[yypact[*Ps0 + 1]];
		if (*ap == -ERROR)
			ap =+ 2;
		if (ap[0] <= 0 && ap[2] > 0) {
			cchar = -ap[0];
			if (cchar == YEOF)
				yyexeof();
			if (cchar != ERROR && yyTshifts < yytshifts) {
				cact = CUNIQUE;
#ifdef DEBUG
				Eprintf("Unique symbol %s%s\n", charname(cchar));
#endif
				/*
				 * Note that the inserted symbol
				 * will not be counted as a true input
				 * (i.e. the "yytshifts--" below)
				 * so that a true shift will be needed
				 * to make yytshifts > yyTshifts.
				 */
				yyTshifts = yytshifts;
			}
		}
	}

	/*
	 * Set up to perform the correction.
	 * Build a token appropriate for replacement
	 * or insertion in the yytok structure ACchar
	 * having the attributes of the input at the
	 * point of error.
	 */
	copy(&ACtok, &YC[0], sizeof ACtok);
	acchar = cchar;
	aclval = nullsem(acchar);
	if (aclval != NIL)
		recovered();
	switch (cact) {
		/*
		 * Panic, just restore the
		 * lookahead and return.
		 */
		case CPANIC:
			setpfx('E');
			if (idfail) {
				copy(&Y, &OY, sizeof Y);
				if (yyrhave == NIL) {
#ifdef PI
					if (yybaduse(yypv[0], yyeline, ISUNDEF) == NIL)
#endif
						yerror("Undefined identifier");
				} else {
					yerror("Improper %s identifier", classes[yyrhave]);
#ifdef PI
					yybaduse(yypv[0], yyeline, NIL);
#endif
				}
				/*
				 * Suppress message from panic routine
				 */
				yyshifts = 1;
			}
			i = 0;
			/* Note that on one path we dont touch yyshifts ! */
			break;
		/*
		 * Delete the input.
		 * Mark this as a shift over true input.
		 * Restore the lookahead starting at
		 * the second token.
		 */
		case CDELETE:
			if (ccost != 0)
				yerror("Deleted %s%s", tokname(&YC[0]));
			yytshifts++;
			i = 1;
			yyshifts = 0;
			break;
		/*
		 * Replace the input with a new token.
		 */
		case CREPLACE:
			if (acchar == YEOF)
				yyexeof();
			if (acchar == YEND)
				aclval = NIL;
			yerror("Replaced %s%s with a %s%s",
			    tokname(&YC[0]), tokname(&ACtok));
			copy(&YC[0], &ACtok, sizeof YC[0]);
			i = 0;
			yyshifts = 0;
			break;
		/*
		 * Insert a token.
		 * Don't count this token as a true input shift.
		 * For inserted "end"s pas.y is responsible
		 * for the error message later so suppress it.
		 * Restore all the lookahead.
		 */
		case CINSERT:
			if (acchar == YEOF)
				yyexeof();
			if (acchar != YEND)
				yerror("Inserted %s%s", tokname(&ACtok));
			yytshifts--;
			i = 0;
			yyshifts = 0;
			break;
		/*
		 * Make a unique symbol correction.
		 * Like an insertion but a different message.
		 */
		case CUNIQUE:
			setpfx('E');
			yerror("Expected %s%s", tokname(&ACtok));
			yytshifts--;
			i = 0;
			if (ccost == 0 || yyunique)
				yyshifts = 0;
			else
				yyshifts = -1;
			break;
		/*
		 * Change an identifier's type
		 * to make it work.
		 */
		case CCHIDENT:
			copy(&Y, &OY, sizeof Y);
#ifdef PI
			i = 1 << yyrwant;
#endif
			if (yyrhave == NIL) {
				yerror("Undefined %s", classes[yyrwant]);
#ifdef PI
				i =| ISUNDEF;
#endif
			} else
				yerror("Replaced %s id with a %s id", classes[yyrhave], classes[yyrwant]);
#ifdef PI
			yybaduse(yypv[0], yyeline, i);
#endif
			yypv[0] = nullsem(YID);
			i = 0;
			yyshifts = 0;
			break;
	}

	/*
	 * Restore the desired portion of the lookahead,
	 * and possibly the inserted or unique inserted token.
	 */
	for (yCcnt--; yCcnt >= i; yCcnt--)
		unyylex(&YC[yCcnt]);
	if (cact == CINSERT || cact == CUNIQUE)
		unyylex(&ACtok);

	/*
	 * Put the scanner back in sync.
	 */
	yychar = yylex();

	/*
	 * We succeeded if we didn't "panic".
	 */
	Recovery = 0;
	Ps = Ps0;
	return (cact != CPANIC);
}

yyexeof()
{

	yerror("End-of-file expected - QUIT");
	pexit(ERRS);
}

yyunexeof()
{

	yerror("Unexpected end-of-file - QUIT");
	pexit(ERRS);
}

/*
 * Try corrections with the state at Ps0.
 * Flag is 0 if this is the top of stack state,
 * 1 if it is the state below.
 */
trystate(Ps0, Pv0, flag, insmult, delmult, repmult)
	int *Ps0, *Pv0, flag;
	char *insmult, *delmult, *repmult;
{
	/*
	 * C is a working cost, ap a pointer into the action
	 * table for looking at feasible alternatives.
	 */
	register int c, *ap;
	int i, *actions;

#ifdef DEBUG
	Eprintf("Trying state %d\n", *Ps0);
#endif
	/*
	 * Try deletion.
	 * Correct returns a cost.
	 */
#ifdef DEBUG
	Tprintf("  Try Delete %s%s cost=%d\n", tokname(&YC[0]), delcost(YC[0].Yychar));
#endif
	c = delcost(YC[0].Yychar);
#ifndef DEBUG
	if (c < ccost) {
#endif
		c = correct(NOCHAR, 1, c, delmult, Ps0, Pv0);
#ifdef DEBUG
		if (c < CPRLIMIT || fulltrace)
			Eprintf("Cost %2d Delete %s%s\n", c, tokname(&YC[0]));
#endif
		if (c < ccost)
			cact = CDELETE, ccost = c, cflag = flag;
#ifndef DEBUG
	}
#endif

	/*
	 * Look at the inputs to this state
	 * which will cause parse action shift.
	 */
	aclval = NIL;
	ap = &yyact[yypact[*Ps0 + 1]];

	/*
	 * Skip action on error to
	 * detect true unique inputs.
	 * Error action is always first.
	 */
	if (*ap == -ERROR) 
		ap=+ 2;

	/*
	 * Loop through the test actions
	 * for this state.
	 */
	for (actions = ap; *ap <= 0; ap =+ 2) {
		/*
		 * Extract the token of this action
		 */
		acchar = -*ap;

		/*
		 * Try insertion
		 */
#ifdef DEBUG
		Tprintf("  Try Insert %s%s cost=%d\n", charname(acchar), inscost(acchar));
#endif
		c = inscost(acchar, YC[0].Yychar);
#ifndef DEBUG
		if (c < ccost) {
#endif
			if (c == 0) {
				c = correct(acchar, 0, 1, insmult + 1, Ps0, Pv0);
#ifdef DEBUG
				Eprintf("Cost %2d Freebie %s%s\n", c, charname(acchar));
#endif
				if (c < ccost)
					cact = CUNIQUE, ccost = 0, cchar = acchar, cflag = flag;
			} else {
				c = correct(acchar, 0, c, insmult, Ps0, Pv0);
#ifdef DEBUG
				if (c < CPRLIMIT || fulltrace)
					Eprintf("Cost %2d Insert %s%s\n", c, charname(acchar));
#endif
				if (c < ccost)
					cact = CINSERT, ccost = c, cchar = acchar, cflag = flag;
			}
#ifndef DEBUG
		}
#endif

		/*
		 * Try replacement
		 */
#ifdef DEBUG
		Tprintf("  Try Replace %s%s with %s%s cost=%d\n",
		    tokname(&YC[0]), charname(acchar), repcost(YC[0].Yychar, acchar));
#endif
		c = repcost(YC[0].Yychar, acchar);
#ifndef DEBUG
		if (c < ccost) {
#endif
			c = correct(acchar, 1, repcost(YC[0].Yychar, acchar), repmult, Ps0, Pv0);
#ifdef DEBUG
			if (c < CPRLIMIT || fulltrace)
				Eprintf("Cost %2d Replace %s%s with %s%s\n", c, tokname(&YC[0]), tokname(&ACtok));
#endif
			if (c < ccost)
				cact = CREPLACE, ccost = c, cchar = acchar, cflag = flag;
#ifndef DEBUG
		}
#endif
	}
}

int	*yCpv;
char	yyredfail;
/*
 * Compute the cost of a correction
 * C is the base cost for it.
 * Fchar is the first input character from
 * the current state, NOCHAR if none.
 * The rest of the inputs come from the array
 * YC, starting at origin and continuing to the
 * last character there, YC[yCcnt - 1].Yychar.
 *
 * The cost returned is INFINITE if this correction
 * allows no shifts, otherwise is weighted based
 * on the number of shifts this allows against the
 * maximum number possible with the available lookahead.
 */
correct(fchar, origin, c, multvec, Ps0, Pv0)
	int fchar, origin, c;
	char *multvec;
	int *Ps0, *Pv0;
{
	/*
	 * The ntok structure is used to build a
	 * scanner structure for tokens inserted
	 * from the argument fchar.
	 */
	struct yytok ntok;
	register char *mv;

	/*
	 * Ps is the top of the parse stack after the most
	 * recent local correctness check.  Loccor returns
	 * NIL when we cannot shift.
	 */
	register int *ps;

	yyredfail = 0;
	/*
	 * Initialize the tip parse and semantic stacks.
	 */
	ps = Ps0;
	yytips[0] = *ps;
	ps--;
	yytipv[0] = Pv0[0];
	yCpv = Pv0 - 1;
	yytipct = 1;

	/*
	 * Shift while possible.
	 * Adjust cost as necessary.
	 */
	mv = multvec;
	do {
		if (fchar != NOCHAR) {
			copy(&ntok, &YC[0], sizeof ntok);
			ntok.Yychar = fchar, ntok.Yylval = nullsem(fchar);
			fchar = NOCHAR;
			ps = loccor(ps, &ntok);
		} else
			ps = loccor(ps, &YC[origin++]);
		if (ps == NIL) {
			if (yyredfail && mv > multvec)
				mv--;
			c =* *mv;
			break;
		}
		mv++;
	} while (*mv != 1);
	return (c);
}

extern	int yygo[], yypgo[], yyr1[], yyr2[];
/*
 * Local syntactic correctness check.
 * The arguments to this routine are a
 * top of stack pointer, ps, and an input
 * token tok.  Also, implicitly, the contents
 * of the yytips array which contains the tip
 * of the stack, and into which the new top
 * state on the stack will be placed if we shift.
 *
 * If we succeed, we return a new top of stack
 * pointer, else we return NIL.
 */
loccor(ps, ntok)
	int *ps;
	struct yytok *ntok;
{
	register int *p, n;
	register int nchar;
	int i;

	if (ps == NIL)
		return (NIL);
	nchar = ntok->Yychar;
	yyeline = ntok->Yyeline;
#ifdef DEBUG
	Tprintf("    Stack ");
	for (i = yytipct - 1; i >= 0; i--)
		Tprintf("%d ", yytips[i]);
	Tprintf("| %d, Input %s%s\n", *ps, charname(nchar));
#endif
	/*
	 * As in the yacc parser yyparse,
	 * p traces through the action list
	 * and "n" is the information associated
	 * with the action.
	 */
newstate:
	p = &yyact[ yypact[yytips[yytipct - 1]+1] ];

actn:
	/*
	 * Search the parse actions table
	 * for something useful to do.
	 * While n is non-positive, it is the
	 * arithmetic inverse of the token to be tested.
	 * This allows a fast check.
	 */
	while ((n = *p++) <= 0)
		if ((n =+ nchar) != 0)
			p++;
	switch (n >> 12) {
		/*
		 * SHIFT
		 */
		case 2:
			n =& 07777;
			yyredfail = 0;
			if (nchar == YID)
				yyredfail++;
			if (yytipct == YYTIPSIZ) {
tipover:
#ifdef DEBUG
				Tprintf("\tTIP OVFLO\n");
#endif
				return (NIL);
			}
			yytips[yytipct] = n;
			yytipv[yytipct] = ntok->Yylval;
			yytipct++;
#ifdef DEBUG
			Tprintf("\tShift to state %d\n", n);
#endif
			return (ps);
		/*
		 * REDUCE
		 */
		case 3:
			n =& 07777;
			if (yyEactr(n, yytipv[yytipct - 1]) == 0) {
#ifdef DEBUG
				Tprintf("\tYyEactr objects: have %s id, want %s id\n", classes[yyidhave], classes[yyidwant]);
#endif
				return (NIL);
			}
			yyredfail = 0;
			i = yyr2[n];
#ifdef DEBUG
			Tprintf("\tReduce, length %d,", i);
#endif
			if (i > yytipct) {
				i =- yytipct;
				yytipct = 0;
				ps =- i;
				yCpv =- i;
			} else
				yytipct =- i;
			if (yytipct >= YYTIPSIZ)
				goto tipover;
			/*
			 * Use goto table to find next state
			 */
			p = &yygo[yypgo[yyr1[n]]];
			i = yytipct ? yytips[yytipct - 1] : *ps;
			while (*p != i && *p >= 0)
				p =+ 2;
#ifdef DEBUG
			Tprintf(" new state %d\n", p[1]);
#endif
			yytips[yytipct] = p[1];
			yytipct++;
			goto newstate;
		/*
		 * ACCEPT
		 */
		case 4:
#ifdef DEBUG
			Tprintf("\tAccept\n");
#endif
			return (ps);
		/*
		 * ERROR
		 */
		case 1:
#ifdef DEBUG
			Tprintf("\tError\n");
#endif
			return (0);
	}
	panic("loccor");
}