OpenSolaris_b135/lib/libpp/common/ppfsm.c

/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*          Copyright (c) 1986-2009 AT&T Intellectual Property          *
*                      and is licensed under the                       *
*                  Common Public License, Version 1.0                  *
*                    by AT&T Intellectual Property                     *
*                                                                      *
*                A copy of the License is available at                 *
*            http://www.opensource.org/licenses/cpl1.0.txt             *
*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                                                                      *
***********************************************************************/
#pragma prototyped
/*
 * Glenn Fowler
 * AT&T Research
 *
 * preprocessor and proto lexical analyzer fsm
 * define PROTOMAIN for standalone proto
 */

#include "pplib.h"
#include "ppfsm.h"

/*
 * lexical FSM encoding
 * derived from a standalone ansi cpp by Dennis Ritchie
 * modified for libpp by Glenn Fowler
 *
 *   fsm[] is initialized from fsminit[].  The encoding is blown out into
 *   fsm[] for time efficiency.  When in state state, and one of the
 *   characters in ch arrives, enter nextstate.  States >= TERMINAL are
 *   either final, or at least require special action.  In fsminit[] there
 *   is a line for each <state,charset,nextstate>.  Early entries are
 *   overwritten by later ones.  C_XXX is the universal set and should
 *   always be first.  Some of the fsminit[] entries are templates for
 *   groups of states.  The OP entries trigger the state copies.  States
 *   above TERMINAL are represented in fsm[] as negative values.  S_TOK and
 *   S_TOKB encode the resulting token type in the upper bits.  These actions
 *   differ in that S_TOKB has a lookahead char.
 *
 *   fsm[] has three start states:
 *
 *	PROTO	proto (ANSI -> K&R,C++,ANSI)
 *	QUICK	standalone ppcpp()
 *	TOKEN	tokenizing pplex()
 *
 *   If the next state remains the same then the fsm[] transition value is 0.
 *   MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
 *   fsm[state+1][0] which is ~S_EOB for all states.  This preserves the
 *   power of 2 fsm[] row size for efficient array indexing.  Thanks to
 *   D. G. Korn for the last two observations.  The pseudo non-terminal state
 *   fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
 *
 *   The bit layout is:
 *
 *	TERM	arg	SPLICE	next
 *	15	14-8	7	6-0
 */

/*
 * NOTE: these must be `control' characters for all native codesets
 *       currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
 */

#define C_DEC		001
#define C_EOF		002
#define C_HEX		003
#define C_LET		021
#define C_OCT		022
#define C_XXX		023

#define OP		(-1)
#define END		0
#define COPY		1

#define copy(t,f)	(memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])

struct fsminit				/* fsm initialization row	*/
{
	int		state;		/* if in this state		*/
	unsigned char	ch[4];		/* and see one of these		*/
	int		nextstate;	/* enter this state if <TERMINAL*/
};

static struct fsminit	fsminit[] =
{
	/* proto start state */
	{	PROTO,	{ C_XXX },		S_CHR,			},
	{	PROTO,	{ C_EOF },		S_EOF,			},
	{	PROTO,	{ C_DEC },		BAD1,			},
	{	PROTO,	{ '.' },		DOT,			},
	{	PROTO,	{ C_LET },		NID,			},
	{	PROTO,	{ 'L' },		LIT,			},
	{	PROTO,	{ 'd', 'e', 'f', 'i' },	RES1,			},
	{	PROTO,	{ 'r', 's', 't', 'v' },	RES1,			},
	{	PROTO,	{ 'w', 'N' },		RES1,			},
	{	PROTO,	{ '"', '\'' },		S_LITBEG,		},
	{	PROTO,	{ '/' },		COM1,			},
	{	PROTO,	{ '\n' },		S_NL,			},
	{	PROTO,	{ ' ','\t','\f','\v' },	WS1,			},

/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
	{	RES1,	{ C_XXX },		S_MACRO,		},
	{	RES1,	{ C_LET, C_DEC },	NID,			},
	{	RES1,	{ 'a' },		RES1a,			},
	{	RES1,	{ 'e' },		RES1e,			},
	{	RES1,	{ 'f' },		RES1f,			},
	{	RES1,	{ 'h' },		RES1h,			},
	{	RES1,	{ 'l' },		RES1l,			},
	{	RES1,	{ 'n' },		RES1n,			},
	{	RES1,	{ 'o' },		RES1o,			},
	{	RES1,	{ 't' },		RES1t,			},
	{	RES1,	{ 'x' },		RES1x,			},
	{	RES1,	{ 'y' },		RES1y,			},

	/* proto reserved {va_start} */
	{	RES1a,	{ C_XXX },		S_RESERVED,		},
	{	RES1a,	{ C_LET, C_DEC },	NID,			},
	{	RES1a,	{ '_','s','t','a' },	RES1a,			},
	{	RES1a,	{ 'r' },		RES1a,			},

	/* proto reserved {return} */
	{	RES1e,	{ C_XXX },		S_RESERVED,		},
	{	RES1e,	{ C_LET, C_DEC },	NID,			},
	{	RES1e,	{ 't','u','r','n' },	RES1e,			},

	/* proto reserved {if} */
	{	RES1f,	{ C_XXX },		S_RESERVED,		},
	{	RES1f,	{ C_LET, C_DEC },	NID,			},

	/* proto reserved {while} */
	{	RES1h,	{ C_XXX },		S_RESERVED,		},
	{	RES1h,	{ C_LET, C_DEC },	NID,			},
	{	RES1h,	{ 'i','l','e' },	RES1h,			},

	/* proto reserved {else} */
	{	RES1l,	{ C_XXX },		S_RESERVED,		},
	{	RES1l,	{ C_LET, C_DEC },	NID,			},
	{	RES1l,	{ 's','e' },		RES1l,			},

	/* proto reserved {inline} */
	{	RES1n,	{ C_XXX },		S_RESERVED,		},
	{	RES1n,	{ C_LET, C_DEC },	NID,			},
	{	RES1n,	{ 'l','i','n','e' },	RES1n,			},

	/* proto reserved {do,for,void} */
	{	RES1o,	{ C_XXX },		S_RESERVED,		},
	{	RES1o,	{ C_LET, C_DEC },	NID,			},
	{	RES1o,	{ 'r','i','d','N' },	RES1o,			},

	/* proto reserved {static} */
	{	RES1t,	{ C_XXX },		S_RESERVED,		},
	{	RES1t,	{ C_LET, C_DEC },	NID,			},
	{	RES1t,	{ 'a','t','i','c' },	RES1t,			},

	/* proto reserved {extern} */
	{	RES1x,	{ C_XXX },		S_RESERVED,		},
	{	RES1x,	{ C_LET, C_DEC },	NID,			},
	{	RES1x,	{ 't','e','r','n' },	RES1x,			},

	/* proto reserved {typedef} */
	{	RES1y,	{ C_XXX },		S_RESERVED,		},
	{	RES1y,	{ C_LET, C_DEC },	NID,			},
	{	RES1y,	{ 'p','e','d','f' },	RES1y,			},

	/* saw /, perhaps start of comment */
	{	COM1,	{ C_XXX },		S_CHRB,			},
	{	COM1,	{ '*' },		COM2,			},
#if PROTOMAIN
	{	COM1,	{ '/' },		COM5,			},
#endif

	/* saw / *, start of comment */
	{	COM2,	{ C_XXX },		COM2,			},
	{	COM2,	{ '\n', C_EOF },	S_COMMENT,		},
	{	COM2,	{ '/' },		COM4,			},
	{	COM2,	{ '*' },		COM3,			},
	{	COM2,	{ '#', ';', ')' },	QUAL(COM2),		},

	/* saw the * possibly ending a comment */
	{	COM3,	{ C_XXX },		COM2,			},
	{	COM3,	{ '\n', C_EOF },	S_COMMENT,		},
	{	COM3,	{ '#', ';', ')' },	QUAL(COM2),		},
	{	COM3,	{ '*' },		COM3,			},
	{	COM3,	{ '/' },		S_COMMENT,		},

	/* saw / in / * comment, possible malformed nest */
	{	COM4,	{ C_XXX },		COM2,			},
	{	COM4,	{ '*', '\n', C_EOF },	S_COMMENT,		},
	{	COM4,	{ '/' },		COM4,			},

	/* saw / /, start of comment */
	{	COM5,	{ C_XXX },		COM5,			},
	{	COM5,	{ '\n', C_EOF },	S_COMMENT,		},
	{	COM5,	{ '/' },		COM6,			},
	{	COM5,	{ '*' },		COM7,			},

	/* saw / in / / comment, possible malformed nest */
	{	COM6,	{ C_XXX },		COM5,			},
	{	COM6,	{ '*', '\n', C_EOF },	S_COMMENT,		},
	{	COM6,	{ '/' },		COM6,			},

	/* saw * in / /, possible malformed nest */
	{	COM7,	{ C_XXX },		COM5,			},
	{	COM7,	{ '\n', C_EOF },	S_COMMENT,		},
	{	COM7,	{ '*' },		COM7,			},
	{	COM7,	{ '/' },		S_COMMENT,		},

	/* normal identifier -- always a macro candidate */
	{	NID,	{ C_XXX },		S_MACRO,		},
	{	NID,	{ C_LET, C_DEC },	NID,			},

	/* saw ., operator or dbl constant */
	{	DOT,	{ C_XXX },		S_CHRB,			},
	{	DOT,	{ '.' },		DOT2,			},
	{	DOT,	{ C_DEC },		BAD1,			},

	/* saw .., possible ... */
	{	DOT2,	{ C_XXX },		BACK(T_INVALID),	},
	{	DOT2,	{ '.' },		KEEP(T_VARIADIC),	},

	/* saw L (possible start of normal wide literal) */
	{	LIT,	{ C_XXX },		S_MACRO,		},
	{	LIT,	{ C_LET, C_DEC },	NID,			},
	{	LIT,	{ '"', '\'' },		QUAL(LIT1),		},

	/* saw " or ' beginning literal */
	{	LIT1,	{ C_XXX },		LIT1,			},
	{	LIT1,	{ '"', '\'' },		S_LITEND,		},
	{	LIT1,	{ '\n', C_EOF },	S_LITEND,		},
	{	LIT1,	{ '\\' },		LIT2,			},

	/* saw \ in literal */
	{	LIT2,	{ C_XXX },		S_LITESC,		},
	{	LIT2,	{ '\n', C_EOF },	S_LITEND,		},

	/* eat malformed numeric constant */
	{	BAD1,	{ C_XXX },		BACK(T_INVALID),	},
	{	BAD1,	{ C_LET, C_DEC, '.' },	BAD1,			},
	{	BAD1,	{ 'e', 'E' },		BAD2,			},

	/* eat malformed numeric fraction|exponent */
	{	BAD2,	{ C_XXX },		BACK(T_INVALID),	},
	{	BAD2,	{ C_LET, C_DEC, '.' },	BAD1,			},
	{	BAD2,	{ '+', '-' },		BAD1,			},

	/* saw white space, eat it up */
	{	WS1,	{ C_XXX },		S_WS,			},
	{	WS1,	{ ' ', '\t' },		WS1,			},
	{	WS1,	{ '\f', '\v' },		S_VS,			},

#if !PROTOMAIN

	/* quick template */
	{	QUICK,	{ C_XXX },		QTOK,			},
	{	QUICK,	{ C_EOF, MARK },	S_CHRB,			},
	{	QUICK,	{ C_LET, C_DEC },	QID,			},
	{	QUICK,	{ 'L' },		LIT0,			},
	{	QUICK,	{ '"', '\'' },		S_LITBEG,		},
	{	QUICK,	{ '/' },		S_CHRB,			},
	{	QUICK,	{ '*' },		QCOM,			},
	{	QUICK,	{ '#' },		SHARP1,			},
	{	QUICK,	{ '\n' },		S_NL,			},
	{	QUICK,	{ '\f', '\v' },		S_VS,			},

	/* copy QUICK to QUICK+1 through MAC0+1 */
	{	OP,	{QUICK,QUICK+1,MAC0+1},	COPY,			},

	/* quick start state */
	{	QUICK,	{ C_EOF },		S_EOF,			},
	{	QUICK,	{ C_DEC },		QNUM,			},
	{	QUICK,	{ MARK },		QTOK,			},
	{	QUICK,	{ '/' },		COM1,			},
	{	QUICK,	{ ' ', '\t' },		QUICK,			},

	/* grab non-macro tokens */
	{	QTOK,	{ C_DEC },		QNUM,			},

	/* grab numeric and invalid tokens */
	{	QNUM,	{ C_LET, C_DEC, '.' },	QNUM,			},
	{	QNUM,	{ 'e', 'E' },		QEXP,			},

	/* grab exponent token */
	{	QEXP,	{ C_LET, C_DEC, '.' },	QNUM,			},
	{	QEXP,	{ '+', '-' },		QNUM,			},

	/* saw *, grab possible bad comment terminator */
	{	QCOM,	{ C_DEC },		QNUM,			},
	{	QCOM,	{ '/' },		S_COMMENT,		},

	/* saw L (possible start of wide string or first macro char) */
	{	MAC0,	{ 'L' },		QID,			},
	{	MAC0,	{ '"', '\'' },		QUAL(LIT1),		},

	/* macro candidate template */
	{	MAC0+1,	{ 'L' },		QID,			},

	/* copy MAC0+1 to MAC0+2 through MACN */
	{	OP,	{MAC0+1,MAC0+2,MACN},	COPY			},

	/* saw L (possible start of wide string or macro L) */
	{	HIT0,	{ C_XXX },		S_MACRO,		},
	{	HIT0,	{ C_LET, C_DEC },	QID,			},
	{	HIT0,	{ '"', '\'' },		QUAL(LIT1),		},

	/* macro hit template */
	{	HIT0+1,	{ C_XXX },		S_MACRO,		},
	{	HIT0+1,	{ C_LET, C_DEC },	QID,			},

	/* copy HIT0+1 to HIT0+2 through HITN */
	{	OP,	{HIT0+1,HIT0+2,HITN},	COPY			},

	/* saw L (possible start of wide literal) */
	{	LIT0,	{ C_XXX },		S_MACRO,		},
	{	LIT0,	{ C_LET, C_DEC },	QID,			},
	{	LIT0,	{ '"', '\'' },		QUAL(LIT1),		},

	/* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
	{	COM1,	{ '=' },		KEEP(T_DIVEQ),		},

	/* normal start state */
	{	TOKEN,	{ C_XXX },		S_HUH,			},
	{	TOKEN,	{ C_EOF },		S_EOF,			},
	{	TOKEN,	{ C_DEC },		DEC1,			},
	{	TOKEN,	{ '0' },		OCT1,			},
	{	TOKEN,	{ '.' },		DOT1,			},
	{	TOKEN,	{ C_LET },		NID,			},
	{	TOKEN,	{ 'L' },		LIT,			},
	{	TOKEN,	{ '"', '\'', '<' },	S_LITBEG,		},
	{	TOKEN,	{ '/' },		COM1,			},
	{	TOKEN,	{ '\n' },		S_NL,			},
	{	TOKEN,	{ ' ', '\t' },		WS1,			},
	{	TOKEN,	{ '\f', '\v' },		S_VS,			},
	{	TOKEN,	{ '#' },		SHARP1,			},
	{	TOKEN,	{ ':' },		COLON1,			},
	{	TOKEN,	{ '%' },		PCT1,			},
	{	TOKEN,	{ '&' },		AND1,			},
	{	TOKEN,	{ '*' },		STAR1,			},
	{	TOKEN,	{ '+' },		PLUS1,			},
	{	TOKEN,	{ '-' },		MINUS1,			},
	{	TOKEN,	{ '=' },		EQ1,			},
	{	TOKEN,	{ '!' },		NOT1,			},
	{	TOKEN,	{ '>' },		GT1,			},
	{	TOKEN,	{ '^' },		CIRC1,			},
	{	TOKEN,	{ '|' },		OR1,			},
	{	TOKEN,	{ '(', ')', '[', ']' },	S_CHR,			},
	{	TOKEN,	{ '{', '}', ',', ';' },	S_CHR,			},
	{	TOKEN,	{ '~', '?' },		S_CHR,			},

	/* saw 0, possible oct|hex|dec|dbl constant */
	{	OCT1,	{ C_XXX },		BACK(T_DECIMAL),	},
	{	OCT1,	{ C_LET, C_DEC },	BAD1,			},
	{	OCT1,	{ C_OCT },		OCT2,			},
	{	OCT1,	{ 'e', 'E' },		DBL2,			},
	{	OCT1,	{ 'l', 'L', 'u', 'U' },	QUAL(DEC2),		},
	{	OCT1,	{ 'x', 'X' },		HEX1,			},
	{	OCT1,	{ '.' },		DBL1,			},

	/* saw 0<oct>, oct constant */
	{	OCT2,	{ C_XXX },		BACK(T_OCTAL),		},
	{	OCT2,	{ C_LET, C_DEC },	BAD1,			},
	{	OCT2,	{ C_OCT },		OCT2,			},
	{	OCT2,	{ 'e', 'E' },		DBL2,			},
	{	OCT2,	{ 'l', 'L', 'u', 'U' },	QUAL(OCT3),		},
	{	OCT2,	{ '.' },		DBL1,			},

	/* oct constant qualifier */
	{	OCT3,	{ C_XXX },		BACK(T_OCTAL),		},
	{	OCT3,	{ C_LET, C_DEC, '.' },	BAD1,			},
	{	OCT3,	{ 'l', 'L', 'u', 'U' },	QUAL(OCT3),		},

	/* saw 0 [xX], hex constant */
	{	HEX1,	{ C_XXX },		BACK(T_HEXADECIMAL),	},
	{	HEX1,	{ C_LET },		BAD1,			},
	{	HEX1,	{ C_HEX },		HEX1,			},
	{	HEX1,	{ 'e', 'E' },		HEX3,			},
	{	HEX1,	{ 'l', 'L', 'u', 'U' },	QUAL(HEX2),		},
	{	HEX1,	{ '.' },		HEX4,			},
	{	HEX1,	{ 'p', 'P' },		HEX5,			},

	/* hex constant qualifier */
	{	HEX2,	{ C_XXX },		BACK(T_HEXADECIMAL),	},
	{	HEX2,	{ C_LET, C_DEC, '.' },	BAD1,			},
	{	HEX2,	{ 'l', 'L', 'u', 'U' },	QUAL(HEX2),		},

	/* hex [eE][-+] botch */
	{	HEX3,	{ C_XXX },		BACK(T_HEXADECIMAL),	},
	{	HEX3,	{ C_LET, '.', '-', '+'},BAD1,			},
	{	HEX3,	{ C_HEX },		HEX1,			},
	{	HEX3,	{ 'e', 'E' },		HEX3,			},
	{	HEX3,	{ 'l', 'L', 'u', 'U' },	QUAL(HEX2),		},

	/* hex dbl fraction */
	{	HEX4,	{ C_XXX },		BACK(T_HEXDOUBLE),	},
	{	HEX4,	{ C_LET, '.' },		BAD1,			},
	{	HEX4,	{ C_HEX },		HEX4,			},
	{	HEX4,	{ 'p', 'P' },		HEX5,			},
	{	HEX4,	{ 'f', 'F', 'l', 'L' },	QUAL(HEX8),		},

	/* optional hex dbl exponent sign */
	{	HEX5,	{ C_XXX },		BACK(T_INVALID),	},
	{	HEX5,	{ C_LET, '.' },		BAD1,			},
	{	HEX5,	{ '+', '-' },		HEX6,			},
	{	HEX5,	{ C_DEC },		HEX7,			},

	/* mandatory hex dbl exponent first digit */
	{	HEX6,	{ C_XXX },		BACK(T_INVALID),	},
	{	HEX6,	{ C_LET, '.' },		BAD1,			},
	{	HEX6,	{ C_DEC },		HEX7,			},

	/* hex dbl exponent digits */
	{	HEX7,	{ C_XXX },		BACK(T_HEXDOUBLE),	},
	{	HEX7,	{ C_LET, '.' },		BAD1,			},
	{	HEX7,	{ C_DEC },		HEX7,			},
	{	HEX7,	{ 'f', 'F', 'l', 'L' },	QUAL(HEX8),		},

	/* hex dbl constant qualifier */
	{	HEX8,	{ C_XXX },		BACK(T_HEXDOUBLE),	},
	{	HEX8,	{ C_LET, '.' },		BAD1,			},
	{	HEX8,	{ 'f', 'F', 'l', 'L' },	QUAL(HEX8),		},

	/* saw <dec>, dec constant */
	{	DEC1,	{ C_XXX },		BACK(T_DECIMAL),	},
	{	DEC1,	{ C_LET },		BAD1,			},
	{	DEC1,	{ C_DEC },		DEC1,			},
	{	DEC1,	{ 'e', 'E' },		DBL2,			},
	{	DEC1,	{ 'l', 'L', 'u', 'U' },	QUAL(DEC2),		},
	{	DEC1,	{ '.' },		DBL1,			},

	/* dec constant qualifier */
	{	DEC2,	{ C_XXX },		BACK(T_DECIMAL),	},
	{	DEC2,	{ C_LET, C_DEC },	BAD1,			},
	{	DEC2,	{ 'l', 'L', 'u', 'U' },	QUAL(DEC2),		},

	/* saw ., operator or dbl constant */
	{	DOT1,	{ C_XXX },		S_CHRB,			},
	{	DOT1,	{ '.' },		DOT2,			},
	{	DOT1,	{ C_DEC },		DBL1,			},

	/* dbl fraction */
	{	DBL1,	{ C_XXX },		BACK(T_DOUBLE),		},
	{	DBL1,	{ C_LET, '.' },		BAD1,			},
	{	DBL1,	{ C_DEC },		DBL1,			},
	{	DBL1,	{ 'e', 'E' },		DBL2,			},
	{	DBL1,	{ 'f', 'F', 'l', 'L' },	QUAL(DBL5),		},

	/* optional dbl exponent sign */
	{	DBL2,	{ C_XXX },		BACK(T_INVALID),	},
	{	DBL2,	{ C_LET, '.' },		BAD1,			},
	{	DBL2,	{ '+', '-' },		DBL3,			},
	{	DBL2,	{ C_DEC },		DBL4,			},

	/* mandatory dbl exponent first digit */
	{	DBL3,	{ C_XXX },		BACK(T_INVALID),	},
	{	DBL3,	{ C_LET, '.' },		BAD1,			},
	{	DBL3,	{ C_DEC },		DBL4,			},

	/* dbl exponent digits */
	{	DBL4,	{ C_XXX },		BACK(T_DOUBLE),		},
	{	DBL4,	{ C_LET, '.' },		BAD1,			},
	{	DBL4,	{ C_DEC },		DBL4,			},
	{	DBL4,	{ 'f', 'F', 'l', 'L' },	QUAL(DBL5),		},

	/* dbl constant qualifier */
	{	DBL5,	{ C_XXX },		BACK(T_DOUBLE),		},
	{	DBL5,	{ C_LET, '.' },		BAD1,			},
	{	DBL5,	{ 'f', 'F', 'l', 'L' },	QUAL(DBL5),		},

	/* saw < starting include header */
	{	HDR1,	{ C_XXX },		HDR1,			},
	{	HDR1,	{ '>', '\n', C_EOF },	S_LITEND,		},

	/* saw <binop><space> expecting = */
	{	BIN1,	{ C_XXX },		S_HUH,			},
	{	BIN1,	{ ' ', '\t' },		BIN1,			},

	/* 2-char ops */

	{	SHARP1,	{ C_XXX },		S_SHARP,		},

	{	PCT1,	{ C_XXX },		S_CHRB,			},
	{	PCT1,	{ '=' },		KEEP(T_MODEQ),		},

	{	AND1,	{ C_XXX },		S_CHRB,			},
	{	AND1,	{ '=' },		KEEP(T_ANDEQ),		},
	{	AND1,	{ '&' },		KEEP(T_ANDAND),		},

	{	STAR1,	{ C_XXX },		S_CHRB,			},
	{	STAR1,	{ '=' },		KEEP(T_MPYEQ),		},
	{	STAR1,	{ '/' },		S_COMMENT,		},

	{	PLUS1,	{ C_XXX },		S_CHRB,			},
	{	PLUS1,	{ '=' },		KEEP(T_ADDEQ),		},
	{	PLUS1,	{ '+' },		KEEP(T_ADDADD),		},

	{	MINUS1,	{ C_XXX },		S_CHRB,			},
	{	MINUS1,	{ '=' },		KEEP(T_SUBEQ),		},
	{	MINUS1,	{ '-' },		KEEP(T_SUBSUB),		},
	{	MINUS1,	{ '>' },		KEEP(T_PTRMEM),		},

	{	COLON1,	{ C_XXX },		S_CHRB,			},
	{	COLON1,	{ '=', '>' },		S_HUH,			},

	{	LT1,	{ C_XXX },		S_CHRB,			},
	{	LT1,	{ '=' },		KEEP(T_LE),		},
	{	LT1,	{ '<' },		LSH1,			},

	{	EQ1,	{ C_XXX },		S_CHRB,			},
	{	EQ1,	{ '=' },		KEEP(T_EQ),		},

	{	NOT1,	{ C_XXX },		S_CHRB,			},
	{	NOT1,	{ '=' },		KEEP(T_NE),		},

	{	GT1,	{ C_XXX },		S_CHRB,			},
	{	GT1,	{ '=' },		KEEP(T_GE),		},
	{	GT1,	{ '>' },		RSH1,			},

	{	CIRC1,	{ C_XXX },		S_CHRB,			},
	{	CIRC1,	{ '=' },		KEEP(T_XOREQ),		},

	{	OR1,	{ C_XXX },		S_CHRB,			},
	{	OR1,	{ '=' },		KEEP(T_OREQ),		},
	{	OR1,	{ '|' },		KEEP(T_OROR),		},

	/* 3-char ops */

	{	ARROW1,	{ C_XXX },		BACK(T_PTRMEM),		},
	{	ARROW1,	{ '*' },		KEEP(T_PTRMEMREF),	},

	{	LSH1,	{ C_XXX },		BACK(T_LSHIFT),		},
	{	LSH1,	{ '=' },		KEEP(T_LSHIFTEQ),	},

	{	RSH1,	{ C_XXX },		BACK(T_RSHIFT),		},
	{	RSH1,	{ '=' },		KEEP(T_RSHIFTEQ),	},

#endif

	/* end */
	{	OP,	{ 0 },			END,			}
};

short		fsm[TERMINAL+1][MAX+1];

char		trigraph[MAX+1];

#if PROTOMAIN
static char	spl[] = { '\\', '\r', 0 };
static char	aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
#else
static char	spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
static char	aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
#endif
static char*	let = &aln[10];
static char	hex[] = "fedcbaFEDCBA9876543210";
static char*	dec = &hex[12];
static char*	oct = &hex[14];

/*
 * runtime FSM modifications
 * ppfsm(FSM_INIT,0) must be called first
 */

void
ppfsm(int op, register char* s)
{
	register int			c;
	register int			n;
	register int			i;
	register short*			rp;
	register struct fsminit*	fp;
#if !PROTOMAIN
	char*				t;
	int				x;
#endif

	switch (op)
	{

#if !PROTOMAIN

	case FSM_IDADD:
		while (c = *s++)
			if (!ppisid(c))
			{
				if (fsm[TOKEN][c] == ~S_HUH)
				{
					setid(c);
					for (i = 0; i < TERMINAL; i++)
						fsm[i][c] = IDSTATE(fsm[i]['_']);
				}
				else error(2, "%c: cannot add to identifier set", c);
			}
		break;

	case FSM_IDDEL:
		while (c = *s++)
			if (ppisid(c))
			{
				clrid(c);
				for (i = 0; i < TERMINAL; i++)
					fsm[i][c] = ~S_HUH;
			}
		break;

#endif

	case FSM_INIT:
		for (fp = fsminit;; fp++)
		{
			if ((n = fp->nextstate) >= TERMINAL) n = ~n;
			if (fp->state == OP)
			{
#if !PROTOMAIN
				switch (n)
				{
				case COPY:
					c = fp->ch[0];
					n = fp->ch[2];
					for (i = fp->ch[1]; i <= n; i++)
						copy(i, c);
					continue;
				default:
					break;
				}
#endif
				break;
			}
			rp = fsm[fp->state];
			for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
			{
				switch (c)
				{
				case C_XXX:
					for (c = 0; c <= MAX; c++)
						rp[c] = n;
					/*FALLTHROUGH*/

				case C_EOF:
					fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n;
					continue;

				case C_LET:
					s = let;
					break;

				case C_HEX:
					s = hex;
					break;

				case C_DEC:
					s = dec;
					break;

				case C_OCT:
					s = oct;
					break;

				default:
					rp[c] = n;
					continue;
				}
				while (c = *s++)
					rp[c] = n;
			}
		}

		/*
		 * install splice special cases
		 * and same non-terminal transitions
		 */

		for (i = 0; i < TERMINAL; i++)
		{
			rp = fsm[i];
			s = spl;
			while (c = *s++)
				if (c != MARK || !INCOMMENT(rp))
				{
					if (rp[c] >= 0) rp[c] = ~rp[c];
					rp[c] &= ~SPLICE;
				}
			rp[EOB] = ~S_EOB;
			for (c = 0; c <= MAX; c++)
				if (rp[c] == i)
					rp[c] = 0;
		}
		fsm[TERMINAL][0] = ~S_EOB;

#if !PROTOMAIN

		/*
		 * default character types
		 */

		s = let;
		while (c = *s++)
			setid(c);
		s = dec;
		while (c = *s++)
			setdig(c);
		s = spl;
		do setsplice(c = *s++); while (c);

		/*
		 * trigraph map
		 */

		trigraph['='] = '#';
		trigraph['('] = '[';
		trigraph['/'] = '\\';
		trigraph[')'] = ']';
		trigraph['\''] = '^';
		trigraph['<'] = '{';
		trigraph['!'] = '|';
		trigraph['>'] = '}';
		trigraph['-'] = '~';
#endif
		break;

#if !PROTOMAIN

	case FSM_PLUSPLUS:
		if (pp.option & PLUSPLUS)
		{
			fsm[COLON1][':'] = ~KEEP(T_SCOPE);
			fsm[DOT1]['*'] = ~KEEP(T_DOTREF);
			fsm[MINUS1]['>'] = ARROW1;
			fsm[COM1]['/'] = COM5;
			t = "%<:";
			for (i = 0; i < TERMINAL; i++)
			{
				rp = fsm[i];
				if (!INCOMMENT(rp) && !INQUOTE(rp))
				{
					s = t;
					while (c = *s++)
					{
						if (rp[c] > 0) rp[c] = ~rp[c];
						else if (!rp[c]) rp[c] = ~i;
						rp[c] &= ~SPLICE;
					}
				}
			}
			s = t;
			while (c = *s++) setsplice(c);
		}
		else
		{
			fsm[COLON1][':'] = ~S_CHRB;
			fsm[DOT1]['*'] = ~S_CHRB;
			fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM);
			fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
		}
		break;

#if COMPATIBLE

	case FSM_COMPATIBILITY:
		if (pp.state & COMPATIBILITY)
		{
			fsm[HEX1]['e'] = HEX1;
			fsm[HEX1]['E'] = HEX1;
			fsm[QNUM]['e'] = QNUM;
			fsm[QNUM]['E'] = QNUM;
			fsm[QNUM]['u'] = ~QUAL(QNUM);
			fsm[QNUM]['U'] = ~QUAL(QNUM);
		}
		else
		{
			fsm[HEX1]['e'] = HEX3;
			fsm[HEX1]['E'] = HEX3;
			fsm[QNUM]['e'] = QEXP;
			fsm[QNUM]['E'] = QEXP;
			fsm[QNUM]['u'] = QNUM;
			fsm[QNUM]['U'] = QNUM;
		}
		break;

#endif

	case FSM_QUOTADD:
		while (c = *s++)
			if (fsm[TOKEN][c] == ~S_HUH)
				for (i = 0; i < TERMINAL; i++)
					fsm[i][c] = fsm[i]['"'];
			else error(2, "%c: cannot add to quote set", c);
		break;

	case FSM_QUOTDEL:
		while (c = *s++)
			if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"'])
				for (i = 0; i < TERMINAL; i++)
					fsm[i][c] = fsm[i]['_'];
		break;

	case FSM_OPSPACE:
		n = s ? BIN1 : ~S_CHRB;
		fsm[COM1][' '] = fsm[COM1]['\t'] = n;
		fsm[AND1][' '] = fsm[AND1]['\t'] = n;
		fsm[STAR1][' '] = fsm[STAR1]['\t'] = n;
		fsm[PCT1][' '] = fsm[PCT1]['\t'] = n;
		fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n;
		fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n;
		fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n;
		fsm[OR1][' '] = fsm[OR1]['\t'] = n;
		fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
		fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
		break;

	case FSM_MACRO:
		if (pp.truncate && strlen(s) >= pp.truncate)
		{
			x = s[pp.truncate];
			s[pp.truncate] = 0;
		}
		else x = -1;
		i = MAC0 + ((c = *s++) != 'L');
		if ((n = fsm[QUICK][c]) != (i + NMAC))
		{
			n = i;
			if (!*s) n += NMAC;
		}
		if (fsm[QUICK][c] != n)
			fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n;
		if (c = *s++)
		{
			for (;;)
			{
				if ((i = n) < HIT0)
				{
					if (n < MACN) n++;
					if (!*s)
					{
						n += NMAC;
						break;
					}
					if (fsm[i][c] < HIT0)
						fsm[i][c] = n;
					if (fsm[i + NMAC][c] < HIT0)
						fsm[i + NMAC][c] = n;
				}
				else
				{
					if (n < HITN) n++;
					if (!*s) break;
					if (fsm[i][c] < HIT0)
					{
						n -= NMAC;
						fsm[i][c] = n;
					}
				}
				c = *s++;
			}
			if (x >= 0)
			{
				*s = x;
				for (n = CHAR_MIN; n <= CHAR_MAX; n++)
					if (ppisidig(n))
						fsm[HITN][n] = HITN;
				n = HITN;
			}
			if (fsm[i][c] < n)
				fsm[i][c] = n;
			if (i < HIT0 && fsm[i + NMAC][c] < n)
				fsm[i + NMAC][c] = n;
		}
		break;

#endif

	}
}

#if !PROTOMAIN

/*
 * file buffer refill
 * c is current input char
 */

void
refill(register int c)
{
	if (pp.in->flags & IN_eof)
	{
		pp.in->nextchr--;
		c = 0;
	}
	else
	{
		*((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
		c =
#if PROTOTYPE
		(pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
#endif
		read(pp.in->fd, pp.in->nextchr, PPBUFSIZ);
	}
	if (c > 0)
	{
		if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
		else pp.in->flags &= ~IN_newline;
#if PROTOTYPE
		if (!(pp.in->flags & IN_prototype))
#endif
		if (c < PPBUFSIZ && (pp.in->flags & IN_regular))
		{
			pp.in->flags |= IN_eof;
			close(pp.in->fd);
			pp.in->fd = -1;
		}
	}
	else
	{
		if (c < 0)
		{
			error(ERROR_SYSTEM|3, "read error");
			c = 0;
		}
		else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c)
		{
			static char	ket[] = { 0, '}', '\n', 0 };

			pp.in->flags ^= IN_c;
			pp.in->nextchr = ket + 1;
			c = 2;
		}
		pp.in->flags |= IN_eof;
	}
#if CHECKPOINT
	pp.in->buflen = c;
#endif
	pp.in->nextchr[c] = 0;
	debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
	if (pp.test & 0x0080)
		sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
}

#endif