V10/cmd/twig/machine.c
#include "common.h"
#include "code.h"
#include "sym.h"
#include "machine.h"
int gen_table2; /* generate type two tables */
struct machine_node *machine = NULL;
static int depth;
static struct machine_node *
get_state()
{
struct machine_node *mp = (struct machine_node *)
malloc (sizeof (struct machine_node));
assert (mp!=NULL);
mp->inp.value = -1;
mp->nst = NULL;
mp->link = NULL;
mp->fail = NULL;
mp->match = NULL;
return(mp);
}
add_match(s, tp, depth)
struct machine_node *s;
LabelData *tp;
int depth;
{
struct match *mp = (struct match *) malloc (sizeof (struct match));
assert (mp!=NULL);
mp->next = s->match;
mp->depth = depth;
mp->tree = tp;
s->match = mp;
}
/*
* Build a trie from the path strings. Failure transition are added later.
*/
cgotofn(tp)
LabelData *tp;
{
struct machine_input inp;
int ret;
register struct machine_node *s;
register int quit = 0;
if (machine==NULL) {
/* first time thru */
machine = get_state();
}
s = machine;
depth = -1;
assert (tp->tree!=NULL);
path_setup (tp->tree);
for(;;) {
ret = path_getsym(&inp);
/* no more paths */
if (ret == EOF)
break;
if (ret == NULL) {
/*
* the end of the path. Add a match to the
* accepting state.
*/
assert ((depth&01)==0); /* make sure it's even */
add_match(s, tp, depth >> 1);
s = machine;
depth = -1;
ntrees++;
continue;
}
while (!MI_EQUAL(s->inp, inp)) {
if (!MI_DEFINED(s->inp.value) || s->link == NULL) {
if (MI_DEFINED(s->inp.value)) {
/*
* The trie must be split here
*/
s->link = get_state();
s = s->link;
}
/*
* Fill in the state
*/
s->inp = inp;
s->nst = get_state();
s = s->nst;
depth++;
break;
} else s = s->link;
}
if (MI_EQUAL(s->inp, inp)) {
s = s->nst;
depth++;
}
}
}
/* print out the machine for debugging */
machine_print(mp)
struct machine_node *mp;
{
register struct machine_node *mp2;
struct machine_node *fail;
struct match *match, *p;
if (mp==NULL)
return;
printf("%x %d:", mp, mp->index);
if((fail = mp->fail))
printf("\tfail %x", fail);
if((match = mp->match)) {
printf("\taccept ");
for(p = match; p!=NULL; p = p->next) {
LabelData *tp = p->tree;
printf("%s ", (tp->label)->name);
}
}
putchar('\n');
for(mp2=mp; mp2!=NULL; mp2 = mp2->link) {
if (MI_DEFINED(mp2->inp.value)) {
if (MI_BRANCH(mp->inp.value))
printf(" %d -> ", mp2->inp.value);
else
printf(" [%s] -> ", (mp2->inp.sym)->name);
printf("%x", mp2->nst);
}
assert(mp2->fail==fail);
assert(mp2->match==match);
}
if(MI_DEFINED(mp->inp.value))
putchar('\n');
for (mp2 = mp; mp2!=NULL; mp2=mp2->link) {
machine_print(mp2->nst);
}
}
/*
* This routine augments the machine with failure transition.
* The trie is examined in breadth first order.
* See Aho, Corasick Comm ACM 18:6 for details
*/
cfail()
{
struct machine_node **stack, **stack2;
struct machine_node **stackTop, **stack2Top, **temp;
int stackCnt, stack2Cnt;
struct machine_node *state;
struct machine_input sym;
register struct machine_node *s, *q;
/* allocate stacks */
stackTop = stack = (struct machine_node **) malloc
(ntrees*sizeof (struct machine_node *));
stack2 = stack2Top = (struct machine_node **) malloc
(ntrees*sizeof (struct machine_node *));
stackCnt = stack2Cnt = 0;
s = machine;
do {
if (MI_DEFINED(s->inp.value)) {
assert(++stackCnt <= ntrees);
*stackTop++ = s->nst;
}
s = s->link;
} while (s != NULL);
for(;;) {
if (stackCnt == 0) {
/* swap stacks */
if (stack2Cnt == 0)
break;
stackCnt = stack2Cnt; stack2Cnt = 0;
stackTop = stack2Top;
temp = stack; stack = stack2; stack2 = temp;
stack2Top = stack2;
}
stackCnt--;
s = *--stackTop;
do {
int startstate = 0;
sym = s->inp;
if (MI_DEFINED(sym.value)) { /* g(s,c) != 0 */
assert (++stack2Cnt <= ntrees);
*stack2Top++ = (q=s->nst);
state = s->fail; /* state = f(s) */
for(;;) {
if (state == 0) {
state = machine;
startstate++;
}
if (MI_EQUAL(state->inp, sym)) {
struct match *mp = NULL;
/* append any accepting matches */
for(mp=(state->nst)->match;mp;mp=mp->next)
add_match(q,mp->tree, mp->depth);
mp = q->match;
do {
/* f(q) = g(state,sym)
* for all q links */
q->fail = state->nst;
q->match = mp;
} while ((q = q->link) != 0);
break;
}
else if (state->link != 0)
state = state->link;
else {
state = state->fail;
if (state==NULL&&startstate)
break;
}
}
}
s = s->link;
} while (s!=NULL);
}
}
/*
* Called from the YACC rule pattern_spec
*/
void
machine_build()
{
cfail();
(void) MachineNumber(machine, 0);
if(debug_flag&DB_MACH) {
fputs("\n-------\n", stderr);
machine_print(machine);
}
}
struct match *acceptTable[MAXACCEPTS];
struct match **acceptTableTop = acceptTable;
static short int *arityTable;
int nextAcceptIndex = 0;
/*
* This is the first pass of the process to generate the
* flattened machine used in the walker. Called from machine_build
*/
int
MachineNumber(mach, index)
struct machine_node *mach;
int index;
{
struct machine_node *mp;
for(mp=mach; mp!=NULL; mp = mp->link)
if(MI_DEFINED(mp->inp.value))
index = MachineNumber(mp->nst, index);
mach->index = index;
if (mach->match!=NULL) index += 2;
if(mach->link!=NULL || mach->nst!=NULL) {
index += 2;
for(mp = mach; mp!=NULL; mp = mp->link)
if(mp->nst!=NULL)
index += 2;
}
if (mach->fail!=NULL)
index += 2;
index++;
return(index);
}
/*
* Build the flattened out machine used in the walker.
* See machine.h for description
*/
MachineGen()
{
struct match **atp, *ap;
short int *sp;
oreset();
fputs("short int mtTable[] = {\n", outfile);
machineGen2(machine);
fputs("};\n", outfile);
fputs("short int mtAccept[] = {\n", outfile);
oreset();
for(atp = acceptTable; atp < acceptTableTop; atp++) {
for(ap = *atp; ap!=NULL; ap = ap->next) {
register LabelData *tree = ap->tree;
/* if you change any of the code below you must change
* the increment added to nextAcceptIndex in
* machineGen2
*/
oputint(tree->treeIndex);
oputint(ap->depth);
}
oputint(-1);
}
fprintf(outfile, "};\nshort int mtStart = %d;\n", machine->index);
}
machineGen2(mach)
struct machine_node *mach;
{
struct machine_node *mp;
struct match *ap;
if (mach==NULL)
return;
for(mp=mach; mp!=NULL; mp = mp->link)
if(MI_DEFINED(mp->inp.value))
machineGen2(mp->nst);
assert(mach->index == ointcnt());
if (mach->match!=NULL) {
*acceptTableTop++ = mach->match;
oputint(ACCEPT); oputint(nextAcceptIndex);
for(ap = mach->match; ap!=NULL; ap = ap->next)
nextAcceptIndex += 2;
nextAcceptIndex++;
}
if(mach->link!=NULL || mach->nst!=NULL) {
if(MI_BRANCH(mp->inp.value)&&gen_table2) {
oputint (TABLE2);
for(mp = mach; mp!=NULL; mp = mp->link) {
if(mp->nst!=NULL) {
oputoct(mp->inp.value);
oputint((mp->nst)->index);
}
}
} else {
oputint (TABLE);
for(mp = mach; mp!=NULL; mp = mp->link) {
if(mp->nst!=NULL) {
oputoct(mp->inp.value);
oputint((mp->nst)->index);
}
}
}
oputint(-1);
}
/* A failure transition or a -1 terminate every state */
if (mach->fail!=NULL) {
oputint(FAIL); oputint((mach->fail)->index);
}
oputint(-1);
}