V10/cmd/twig/rawwalker.c1
/*
* See Aho, Corasick Comm ACM 18:6, and Hoffman and O'Donell JACM 29:1
* for detail of the matching algorithm
*/
/* turn this flag on if your machine has a fast byte string zero operation */
/*#define BZERO 1*/
int mtDebug = 0;
int treedebug = 0;
extern int _machstep();
extern short int mtStart;
extern short int mtTable[], mtAccept[], mtMap[], mtPaths[], mtPathStart[];
#ifdef LINE_XREF
extern short int mtLine[];
#endif
extern NODEPTR mtGetNodes(), mtAction();
extern skeleton *_allocskel();
extern __match *_allocmatch();
extern __partial *_allocpartial();
#define MPBLOCKSIZ 3000
__match *_mpblock[MPBLOCKSIZ], **_mpbtop;
/*
* See sym.h in the preprocessor for details
* Basically used to support eh $%n$ construct.
*/
__match **
_getleaves (mp, skp)
register __match *mp; register skeleton *skp;
{
skeleton *stack[MAXDEPTH];
skeleton **stp = stack;
register short int *sip = &mtPaths[mtPathStart[mp->tree]];
register __match **mmp = _mpbtop;
__match **mmp2 = mmp;
if((_mpbtop += *sip++ + 1) > &_mpblock[MPBLOCKSIZ]) {
printf ("ich: %d\n", _mpbtop-_mpblock);
assert(0);
}
for(;;)
switch(*sip++) {
case ePUSH:
*stp++ = skp;
skp = skp->leftchild;
break;
case eNEXT:
skp = skp->sibling;
break;
case eEVAL:
if ((mp = skp->succ[M_DETAG(*sip++)])==NULL) abort();
*mmp++ = mp;
break;
case ePOP:
skp = *--stp;
break;
case eSTOP:
*mmp = NULL;
return (mmp2);
}
}
_evalleaves (mpp)
register __match **mpp;
{
for (; *mpp!=NULL; mpp++) {
register __match *mp = *mpp;
_do (mp->skel, mp, 1);
}
}
_do(sp, winner, evalall)
skeleton *sp; register __match *winner; int evalall;
{
register __match **mpp;
register skeleton *skel = winner->skel;
if(winner==NULL) {
_prskel(sp, 0);
puts ("no winner");
return;
}
if(winner->mode==xDEFER || (evalall && winner->mode!=xTOPDOWN))
REORDER (winner->lleaves);
mtSetNodes (skel->parent, skel->nson,
skel->root=mtAction (winner->tree, winner->lleaves, sp));
}
skeleton *
_walk(sp, ostate)
register skeleton *sp;
int ostate;
{
int state, nstate, nson;
register __partial *pp;
register __match *mp, *mp2;
register skeleton *nsp, *lastchild = NULL;
NODEPTR son, root;
root = sp->root;
nson = 1; sp->mincost = INFINITY;
state = _machstep (ostate, root, mtValue(root));
while((son = mtGetNodes(root, nson))!=NULL) {
nstate = _machstep (state, NULL, MV_BRANCH(nson));
nsp = _allocskel();
nsp->root = son;
nsp->parent = root;
nsp->nson = nson;
_walk(nsp, nstate);
if(COSTLESS(nsp->mincost, INFINITY)) {
assert (nsp->winner->mode==xREWRITE);
if (mtDebug || treedebug) {
printf ("rewrite\n");
_prskel (nsp, 0);
}
_do(nsp, nsp->winner, 0);
_freeskel(nsp);
continue;
}
_merge (sp, nsp);
if (lastchild==NULL) sp->leftchild = nsp;
else lastchild->sibling = nsp;
lastchild = nsp;
nson++;
}
for (pp = sp->partial; pp < &sp->partial[sp->treecnt]; pp++)
if (pp->bits&01==1) {
mp = _allocmatch();
mp->tree = pp->treeno;
_addmatches (ostate, sp, mp);
}
if(son==NULL && nson==1)
_closure (state, ostate, sp);
sp->rightchild = lastchild;
if (root==NULL) {
COST c; __match *win; int i; nsp = sp->leftchild;
c = INFINITY;
win = NULL;
for (i = 0; i < MAXLABELS; i++) {
mp = nsp->succ[i];
if (mp!=NULL && COSTLESS (mp->cost, c))
{ c = mp->cost; win = mp; }
}
if (mtDebug || treedebug)
_prskel (nsp, 0);
_do (nsp, win, 0);
}
if (mtDebug)
_prskel (sp, 0);
return(sp);
}
static short int _nodetab[MAXNDVAL], _labeltab[MAXLABELS];
/*
* Convert the start state which has a large branching factor into
* a index table. This must be called before the matcher is used.
*/
_matchinit()
{
short int *sp;
struct pair { short int val, where } *pp;
for (sp=_nodetab; sp < &_nodetab[MAXNDVAL]; sp++) *sp = HANG;
for (sp=_labeltab; sp < &_labeltab[MAXLABELS]; sp++) *sp = HANG;
sp = &mtTable[mtStart];
assert (*sp==TABLE);
for (pp = (struct pair *) ++sp; pp->val!=EOF; pp++) {
if (MI_NODE(pp->val))
_nodetab[M_DETAG(pp->val)] = pp->where;
else if (MI_LABEL(pp->val))
_labeltab[M_DETAG(pp->val)] = pp->where;
}
}
int
_machstep(state, root, input)
short int state, input;
NODEPTR root;
{
register short int *stp = &mtTable[state];
int start = 0;
if (state==HANG)
return (input==(MV_BRANCH(1)) ? mtStart : HANG);
rescan:
if (stp==&mtTable[mtStart]) {
if (MI_NODE(input)) return(_nodetab[M_DETAG(input)]);
if (MI_LABEL(input)) return(_labeltab[M_DETAG(input)]);
}
for(;;) {
if(*stp==ACCEPT) stp += 2;
if(*stp==TABLE) {
stp++;
while(*stp!=EOT) {
if(input==*stp) {
return(*++stp);
}
stp += 2;
}
stp++;
}
if(*stp!=FAIL) {
if (start) return (HANG);
else { stp = &mtTable[mtStart]; start = 1; goto rescan;}
} else {
stp++;
stp = &mtTable[*stp];
goto rescan;
}
}
}
_addmatches(ostate, sp, np)
int ostate;
register skeleton *sp;
register __match *np;
{
int label;
int state;
register __match *mp;
label = mtMap[np->tree];
/*
* this is a very poor substitute for good design of the DFA.
* What we need is a special case that allows any label to be accepted
* by the start state but we don't want the start state to recognize
* them after a failure.
*/
state = _machstep(ostate, NULL, MV_LABEL(label));
if (ostate!=mtStart && state==HANG) {
_freematch(np);
return;
}
np->lleaves = _getleaves (np, sp);
np->skel = sp;
if((np->mode = mtEvalCost(np, np->lleaves, sp))==xABORT)
{
_freematch(np);
return;
}
/*
if(np->mode==xREWRITE && COSTLESS(np->cost, sp->mincost)) {
sp->mincost = np->cost;
sp->winner = np;
}
*/
if ((mp = sp->succ[label])!=NULL) {
if (!COSTLESS (np->cost, mp->cost))
{ _freematch(np); return; }
else _freematch(mp);
}
if(COSTLESS(np->cost,sp->mincost)) {
if(np->mode==xREWRITE) {
sp->mincost = np->cost; sp->winner = np; }
else { sp->mincost = INFINITY; sp->winner = NULL; }
}
sp->succ[label] = np;
_closure(state, ostate, sp);
}
_closure(state, ostate, skp)
int state, ostate;
skeleton *skp;
{
register struct ap { short int tree, depth; } *ap;
short int *sp = &mtTable[state];
register __match *mp;
if(state==HANG || *sp!=ACCEPT) return(NULL);
for(ap = (struct ap *) &mtAccept[*++sp]; ap->tree!=-1; ap++)
if (ap->depth==0) {
mp = _allocmatch();
mp->tree = ap->tree;
_addmatches (ostate, skp, mp);
} else {
register __partial *pp, *lim = &skp->partial[skp->treecnt];
for(pp=skp->partial; pp < lim; pp++)
if(pp->treeno==ap->tree)
break;
if(pp==lim) {
skp->treecnt++;
pp->treeno = ap->tree;
pp->bits = (1<<(ap->depth));
} else pp->bits |= (1<<(ap->depth));
}
}
_merge (old, new)
skeleton *old, *new;
{
register __partial *op = old->partial, *np = new->partial;
int nson = new->nson;
register __partial *lim = np + new->treecnt;
if (nson==1) {
old->treecnt = new->treecnt;
for(; np < lim; op++, np++) {
op->treeno = np->treeno;
op->bits = np->bits/2;
}
} else {
__partial *newer = _allocpartial();
register __partial *newerp = newer;
register int cnt;
lim = op+old->treecnt;
for(cnt=new->treecnt; cnt-- ; np++) {
for(op = old->partial; op < lim; op++)
if (op->treeno == np->treeno) {
newerp->treeno = op->treeno;
newerp++->bits = op->bits & (np->bits/2);
break;
}
}
_freepartial(old->partial);
old->partial = newer;
old->treecnt = newerp-newer;
}
}
/* Save and Allocate Matches */
#define BLKF 100
__match *freep = NULL;
static int _count = 0;
static __match *_blockp = NULL;
#ifdef CHECKMEM
int x_matches, f_matches;
#endif
__match *
_allocmatch()
{
__match *mp;
if(freep!=NULL) {
mp = freep;
freep = ((__match *)((struct freeb *) freep)->next);
#ifdef CHECKMEM
f_matches--;
#endif
} else {
if(_count==0) {
_blockp = (__match *) malloc (BLKF*sizeof (__match));
_count = BLKF;
#ifdef CHECKMEM
x_matches += BLKF;
#endif
}
mp = _blockp++;
_count--;
}
return(mp);
}
_freematch(mp)
__match *mp;
{
((struct freeb *) mp)->next = (struct freeb *) freep;
freep = mp;
#ifdef CHECKMEM
f_matches++;
#endif
}
static __partial *pfreep = NULL;
static int pcount = 0;
static __partial *pblock = NULL;
#ifdef CHECKMEM
static int x_partials, f_partials;
#endif
__partial *
_allocpartial()
{
__partial *pp;
if (pfreep!=NULL) {
pp = pfreep;
pfreep = *(__partial **) pp;
#ifdef CHECKMEM
f_partials--;
#endif
} else {
if (pcount==0) {
pblock=(__partial *)malloc(BLKF*MAXTREES*sizeof(__partial));
pcount = BLKF;
#ifdef CHECKMEM
x_partials += BLKF;
#endif
}
pp = pblock;
pblock += MAXTREES;
pcount--;
}
return(pp);
}
_freepartial(pp)
__partial *pp;
{
* (__partial **)pp = pfreep;
pfreep = pp;
#ifdef CHECKMEM
f_partials++;
#endif
}
/* storage management */
static skeleton *sfreep = NULL;
static int scount = 0;
static skeleton * sblock = NULL;
skeleton *
_allocskel()
{
skeleton *sp;
int i;
if(sfreep!=NULL) {
sp = sfreep;
sfreep = sp->sibling;
} else {
if(scount==0) {
sblock = (skeleton *) malloc (BLKF * sizeof (skeleton));
scount = BLKF;
}
sp = sblock++;
scount--;
}
sp->sibling = NULL; sp->leftchild = NULL;
for (i=0; i < MAXLABELS; sp->succ[i++] = NULL);
sp->treecnt = 0;
sp->partial = _allocpartial();
return(sp);
}
_freeskel(sp)
skeleton *sp;
{
int i;
__match *mp;
if(sp==NULL)
return;
if(sp->leftchild)
_freeskel(sp->leftchild);
if(sp->sibling)
_freeskel(sp->sibling);
_freepartial (sp->partial);
for (i=0; i < MAXLABELS; i++)
if ((mp = sp->succ[i])!=NULL) _freematch (mp);
sp->sibling = sfreep;
sfreep = sp;
}
_match()
{
skeleton *sp;
sp = _allocskel();
sp->root = NULL;
_mpbtop = _mpblock;
_freeskel(_walk(sp, HANG));
#ifdef CHECKMEM
if(f_matches+_count!=x_matches) {
printf(";#m %d %d %d\n", f_matches, _count, x_matches);
assert(0);
}
if(f_partials+pcount!=x_partials) {
printf(";#p %d %d %d\n", f_partials, pcount, x_partials);
assert(0);
}
#endif
}
NODEPTR
_mtG(root,i)
NODEPTR root;
int i;
{
int *p = &i;
while(*p!=-1)
root = mtGetNodes(root, *p++);
return(root);
}
/* diagnostic routines */
_prskel (skp, lvl)
skeleton *skp; int lvl;
{
int i; __match *mp;
__partial *pp;
if(skp==NULL) return;
for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
printf ("###\n");
for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
for (i = 0; i < MAXLABELS; i++)
if ((mp=skp->succ[i])!=NULL)
#ifdef LINE_XREF
printf ("[%d<%d> %d]", mp->tree,
mtLine[mp->tree], mp->cost);
#else
printf ("[%d %d]", mp->tree, mp->cost);
#endif
putchar('\n');
for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
for (i = 0, pp=skp->partial; i < skp->treecnt; i++, pp++)
#ifdef LINE_XREF
printf("(%d<%d> %x)", pp->treeno, mtLine[pp->treeno],
pp->bits);
#else
printf ("(%d %x)", pp->treeno, pp->bits);
#endif
putchar('\n');
fflush(stdout);
_prskel (skp->leftchild, lvl+2);
_prskel (skp->sibling, lvl);
}