V10/cmd/twig/machine.c

#include "common.h"
#include "code.h"
#include "sym.h"
#include "machine.h"

int gen_table2;		/* generate type two tables */
struct machine_node *machine = NULL;
static int depth;

static struct machine_node *
get_state()
{
	struct machine_node *mp = (struct machine_node *)
				malloc (sizeof (struct machine_node));
	assert (mp!=NULL);
	mp->inp.value = -1;
	mp->nst = NULL;
	mp->link = NULL;
	mp->fail = NULL;
	mp->match = NULL;
	return(mp);
}

add_match(s, tp, depth)
	struct machine_node *s;
	LabelData *tp;
	int depth;
{
	struct match *mp = (struct match *) malloc (sizeof (struct match));

	assert (mp!=NULL);
	mp->next = s->match;
	mp->depth = depth;
	mp->tree = tp;
	s->match = mp;
}
	
/*
 * Build a trie from the path strings.  Failure transition are added later.
 */
cgotofn(tp)
	LabelData *tp;
{
	struct machine_input inp;
	int ret;
	register struct machine_node *s;
	register int quit = 0;

	if (machine==NULL) {
		/* first time thru */
		machine = get_state();
	}
	s = machine;
	depth = -1;

	assert (tp->tree!=NULL);
	path_setup (tp->tree);

	for(;;) {
		ret = path_getsym(&inp);

		/* no more paths */
		if (ret == EOF)
			break;

		if (ret == NULL) {
			/*
			 * the end of the path.  Add a match to the
			 * accepting state.
			 */
			assert ((depth&01)==0); /* make sure it's even */
			add_match(s, tp, depth >> 1);
			s = machine;
			depth = -1;
			ntrees++;
			continue;
		}

		while (!MI_EQUAL(s->inp, inp)) {
			if (!MI_DEFINED(s->inp.value) || s->link == NULL) {
				if (MI_DEFINED(s->inp.value)) {
					/*
					 * The trie must be split here
					 */
					s->link = get_state();
					s = s->link;
				}
				/*
				 * Fill in the state
				 */
				s->inp = inp;
				s->nst = get_state();
				s = s->nst;
				depth++;
				break;
			} else s = s->link;
		}
		if (MI_EQUAL(s->inp, inp)) {
			s = s->nst;
			depth++;
		}
	}
}

/* print out the machine for debugging */
machine_print(mp)
	struct machine_node *mp;
{
	register struct machine_node *mp2;
	struct machine_node *fail;
	struct match *match, *p;

	if (mp==NULL)
		return;
	printf("%x %d:", mp, mp->index);
	if((fail = mp->fail))
		printf("\tfail %x", fail);
	if((match = mp->match)) {
		printf("\taccept ");
		for(p = match; p!=NULL; p = p->next) {
			LabelData *tp = p->tree;
			printf("%s ", (tp->label)->name);
		}
	}
	putchar('\n');
	for(mp2=mp; mp2!=NULL; mp2 = mp2->link) {
		if (MI_DEFINED(mp2->inp.value)) {
			if (MI_BRANCH(mp->inp.value))
				printf(" %d -> ", mp2->inp.value);
			else
				printf(" [%s] -> ", (mp2->inp.sym)->name);
			printf("%x", mp2->nst);
		}
		assert(mp2->fail==fail);
		assert(mp2->match==match);
	}
	if(MI_DEFINED(mp->inp.value))
		putchar('\n');
	for (mp2 = mp; mp2!=NULL; mp2=mp2->link) {
		machine_print(mp2->nst);
	}
}

/*
 * This routine augments the machine with failure transition.
 * The trie is examined in breadth first order.
 *	See Aho, Corasick Comm ACM 18:6 for details
 */
cfail()
{
	struct machine_node **stack, **stack2;
	struct machine_node **stackTop, **stack2Top, **temp;
	int stackCnt, stack2Cnt;
	struct machine_node *state;
	struct machine_input sym;
	register struct machine_node *s, *q;

	/* allocate stacks */
	stackTop = stack = (struct machine_node **) malloc
				(ntrees*sizeof (struct machine_node *));
	stack2 = stack2Top = (struct machine_node **) malloc
				(ntrees*sizeof (struct machine_node *));
	stackCnt = stack2Cnt = 0;
	s = machine;
	do {
		if (MI_DEFINED(s->inp.value)) {
			assert(++stackCnt <= ntrees);
			*stackTop++ = s->nst;
		}
		s = s->link;
	} while (s != NULL);

	for(;;) {
		if (stackCnt == 0) {
			/* swap stacks */
			if (stack2Cnt == 0)
				break;
			stackCnt = stack2Cnt; stack2Cnt = 0;
			stackTop = stack2Top;
			temp = stack; stack = stack2; stack2 = temp;
			stack2Top = stack2;
		}
		stackCnt--;
		s = *--stackTop;
		do {
			int startstate = 0;
			sym = s->inp;
			if (MI_DEFINED(sym.value)) {	/* g(s,c) != 0 */
			    assert (++stack2Cnt <= ntrees);
			    *stack2Top++ = (q=s->nst);
			    state = s->fail;	/* state = f(s) */
			    for(;;) {
				if (state == 0)	{
					state = machine;
					startstate++;
				}
				if (MI_EQUAL(state->inp, sym)) {
				    struct match *mp = NULL;

				    /* append any accepting matches */
				    for(mp=(state->nst)->match;mp;mp=mp->next)
					add_match(q,mp->tree, mp->depth);
				    mp = q->match;

				    do {
					/* f(q) = g(state,sym)
					 * for all q links */
					q->fail = state->nst;
					q->match = mp;
				    } while ((q = q->link) != 0);
				    break;
				}
				else if (state->link != 0)
				    state = state->link;
				else {
					state = state->fail;
					if (state==NULL&&startstate)
						break;
				}
			    }
			}
			s = s->link;
		} while (s!=NULL);
	}
}

/*
 * Called from the YACC rule pattern_spec
 */
void
machine_build()
{
	cfail();
	(void) MachineNumber(machine, 0);
	if(debug_flag&DB_MACH) {
		fputs("\n-------\n", stderr);
		machine_print(machine);
	}
}

struct match *acceptTable[MAXACCEPTS];
struct match **acceptTableTop = acceptTable;
static short int *arityTable;
int nextAcceptIndex = 0;

/*
 * This is the first pass of the process to generate the
 * flattened machine used in the walker.  Called from machine_build
 */
int
MachineNumber(mach, index)
	struct machine_node *mach;
	int index;
{
	struct machine_node *mp;

	for(mp=mach; mp!=NULL; mp = mp->link)
		if(MI_DEFINED(mp->inp.value))
			index = MachineNumber(mp->nst, index);

	mach->index = index;
	if (mach->match!=NULL) index += 2;
	if(mach->link!=NULL || mach->nst!=NULL) {
		index += 2;
		for(mp = mach; mp!=NULL; mp = mp->link)
			if(mp->nst!=NULL) 
				index += 2;
	}

	if (mach->fail!=NULL)
		index += 2;
	index++;
	return(index);
}

/*
 * Build the flattened out machine used in the walker.
 * See machine.h for description
 */
MachineGen()
{
	struct match **atp, *ap;
	short int *sp;

	oreset();
	fputs("short int mtTable[] = {\n", outfile);
	machineGen2(machine);
	fputs("};\n", outfile);

	fputs("short int mtAccept[] = {\n", outfile);

	oreset();
	for(atp = acceptTable; atp < acceptTableTop; atp++) {
		for(ap = *atp; ap!=NULL; ap = ap->next) {
			register LabelData *tree = ap->tree;

			/* if you change any of the code below you must change
			 * the increment added to nextAcceptIndex in
			 * machineGen2
			 */
			oputint(tree->treeIndex);
			oputint(ap->depth);
		}
		oputint(-1);
	}
	fprintf(outfile, "};\nshort int mtStart = %d;\n", machine->index);
}
		 
machineGen2(mach)
	struct machine_node *mach;
{
	struct machine_node *mp;
	struct match *ap;

	if (mach==NULL)
		return;

	for(mp=mach; mp!=NULL; mp = mp->link)
		if(MI_DEFINED(mp->inp.value))
			machineGen2(mp->nst);

	assert(mach->index == ointcnt());
	if (mach->match!=NULL) {
		*acceptTableTop++ = mach->match;
		oputint(ACCEPT); oputint(nextAcceptIndex);
		for(ap = mach->match; ap!=NULL; ap = ap->next)
			nextAcceptIndex += 2;
		nextAcceptIndex++;
	}
	if(mach->link!=NULL || mach->nst!=NULL) {
		if(MI_BRANCH(mp->inp.value)&&gen_table2) {
			oputint (TABLE2);
			for(mp = mach; mp!=NULL; mp = mp->link) {
				if(mp->nst!=NULL) {
					oputoct(mp->inp.value);
					oputint((mp->nst)->index);
				}
			}
		} else {
			oputint (TABLE);
			for(mp = mach; mp!=NULL; mp = mp->link) {
				if(mp->nst!=NULL) {
					oputoct(mp->inp.value);
					oputint((mp->nst)->index);
				}
			}
		}
		oputint(-1);
	}

	/* A failure transition or a -1 terminate every state */
	if (mach->fail!=NULL) {
		oputint(FAIL); oputint((mach->fail)->index);
	}
	oputint(-1);
}