V10/cmd/twig/rawwalker.ex

/*
 * See Aho, Corasick Comm ACM 18:6, and Hoffman and O'Donell JACM 29:1
 * for detail of the matching algorithm
 */

/* turn this flag on if your machine has a fast byte string zero operation */
/*#define BZERO	1*/
int mtDebug = 0;
int treedebug = 0;
extern int _machstep();
extern short int mtStart;
extern short int mtTable[], mtAccept[], mtMap[], mtPaths[], mtPathStart[];
#ifdef LINE_XREF
	extern short int mtLine[];
#endif
extern NODEPTR mtGetNodes(), mtAction();
extern skeleton *_allocskel();
extern __match *_allocmatch();
extern __partial *_allocpartial();

#define MPBLOCKSIZ 3000
__match *_mpblock[MPBLOCKSIZ], **_mpbtop;

/*
 * See sym.h in the preprocessor for details
 * Basically used to support eh $%n$ construct.
 */
__match **
_getleaves (mp, skp)
	register __match *mp; register skeleton *skp;
{
	skeleton *stack[MAXDEPTH];
	skeleton **stp = stack;
	register short int *sip = &mtPaths[mtPathStart[mp->tree]];
	register __match **mmp = _mpbtop;
	__match **mmp2 = mmp;
	if((_mpbtop += *sip++ + 1) > &_mpblock[MPBLOCKSIZ]) {
		printf ("ich: %d\n", _mpbtop-_mpblock);
		assert(0);
	}

	for(;;)
		switch(*sip++) {
		case ePUSH:
			*stp++ = skp;
			skp = skp->leftchild;
			break;

		case eNEXT:
			skp = skp->sibling;
			break;

		case eEVAL:
			if ((mp = skp->succ[M_DETAG(*sip++)])==NULL) abort();
			*mmp++ = mp;
			break;

		case ePOP:
			skp = *--stp;
			break;

		case eSTOP:
			*mmp = NULL;
			return (mmp2);
		}
}

_evalleaves (mpp)
	register __match **mpp;
{
	for (; *mpp!=NULL; mpp++) {
		register __match *mp = *mpp;
		_do (mp->skel, mp, 1);
	}
}


_do(sp, winner, evalall)
	skeleton *sp; register __match *winner; int evalall;
{
	register __match **mpp;
	register skeleton *skel = winner->skel;
	if(winner==NULL) {
		_prskel(sp, 0);
		puts ("no winner");
		return;
	}
	if(winner->mode==xDEFER || (evalall && winner->mode!=xTOPDOWN))
		REORDER (winner->lleaves);
	mtSetNodes (skel->parent, skel->nson,
		skel->root=mtAction (winner->tree, winner->lleaves, sp));
}

skeleton *
_walk(sp, ostate)
	register skeleton *sp;
	int ostate;
{
	int state, nstate, nson;
	register __partial *pp;
	register __match *mp, *mp2;
	register skeleton *nsp, *lastchild = NULL;
	NODEPTR son, root;

	root = sp->root;
	nson = 1; sp->mincost = INFINITY;
	state = _machstep (ostate, root, mtValue(root));

	while((son = mtGetNodes(root, nson))!=NULL) {
		nstate = _machstep (state, NULL, MV_BRANCH(nson));
		nsp = _allocskel();
		nsp->root = son;
		nsp->parent = root;
		nsp->nson = nson;
		_walk(nsp, nstate);
		if(COSTLESS(nsp->mincost, INFINITY)) {
			assert (nsp->winner->mode==xREWRITE);
			if (mtDebug || treedebug) {
				printf ("rewrite\n");
				_prskel (nsp, 0);
			}
			_do(nsp, nsp->winner, 0);
			_freeskel(nsp);
			continue;
		}
		_merge (sp, nsp);
		if (lastchild==NULL) sp->leftchild = nsp;
		else lastchild->sibling = nsp;
		lastchild = nsp;
		nson++;
	}

	for (pp = sp->partial; pp < &sp->partial[sp->treecnt]; pp++)
		if (pp->bits&01==1) {
			mp = _allocmatch();
			mp->tree = pp->treeno;
			_addmatches (ostate, sp, mp);
		}
	if(son==NULL && nson==1)
		_closure (state, ostate, sp);

	sp->rightchild = lastchild;
	if (root==NULL) {
		COST c; __match *win; int i; nsp = sp->leftchild;
		c = INFINITY;
		win = NULL;
		for (i = 0; i < MAXLABELS; i++) {
			mp = nsp->succ[i];
			if (mp!=NULL && COSTLESS (mp->cost, c))
				{ c = mp->cost; win = mp; }
		}
		if (mtDebug || treedebug)
			_prskel (nsp, 0);
		_do (nsp, win, 0);
	}
	if (mtDebug)
		_prskel (sp, 0);
	return(sp);
}

static short int _nodetab[MAXNDVAL], _labeltab[MAXLABELS];

/*
 * Convert the start state which has a large branching factor into
 * a index table.  This must be called before the matcher is used.
 */
_matchinit()
{
	short int *sp;
	struct pair { short int val, where } *pp;
	for (sp=_nodetab; sp < &_nodetab[MAXNDVAL]; sp++) *sp = HANG;
	for (sp=_labeltab; sp < &_labeltab[MAXLABELS]; sp++) *sp = HANG;
	sp = &mtTable[mtStart];
	assert (*sp==TABLE);
	for (pp = (struct pair *) ++sp; pp->val!=EOF; pp++) {
		if (MI_NODE(pp->val))
			_nodetab[M_DETAG(pp->val)] = pp->where;
		else if (MI_LABEL(pp->val))
			_labeltab[M_DETAG(pp->val)] = pp->where;
	}
}

int
_machstep(state, root, input)
	short int state, input;
	NODEPTR root;
{
	register short int *stp = &mtTable[state];
	int start = 0;
	if (state==HANG)
		return (input==(MV_BRANCH(1)) ? mtStart : HANG);
rescan:
	if (stp==&mtTable[mtStart]) {
		if (MI_NODE(input)) return(_nodetab[M_DETAG(input)]);
		if (MI_LABEL(input)) return(_labeltab[M_DETAG(input)]);
	}
	
	for(;;) {
		if(*stp==ACCEPT) stp += 2;

		if(*stp==TABLE) {
			stp++;
			while(*stp!=EOT) {
				if(input==*stp) {
					return(*++stp);
				}
				stp += 2;
			}
			stp++;
		}
		if(*stp!=FAIL) {
			if (start) return (HANG);
			else { stp = &mtTable[mtStart]; start = 1;  goto rescan;}
		} else {
			stp++;
			stp = &mtTable[*stp];
			goto rescan;
		}
	}
}

_addmatches(ostate, sp, np)
	int ostate;
	register skeleton *sp;
	register __match *np;
{
	int label;
	int state, qstate;
	register __match *mp;
	static short int quick[128][MAXLABELS];
	static short int qtag[128][MAXLABELS];

        label = mtMap[np->tree];

	/*
	 * The following lines replace the line:
	 *
	 *	state = _machstep(ostate, NULL, MV_LABEL(label));
	 *
	 * Statistics show that a large percentage (approx 2/3) of calls to
	 * _machstep occur in this function.  The lines below attempt
	 * to reduce the number of these calls by using an unsophisticated
	 * but apparently adequate caching scheme.
	 */
	qstate = ((ostate>>2)+2)&0177;
	if(ostate != qtag[qstate][label]) {
		state = _machstep(ostate, NULL, MV_LABEL(label));
		quick[qstate][label] = state;
		qtag[qstate][label] = ostate;
	} else state = quick[qstate][label];

	/*
	 * this is a very poor substitute for good design of the DFA.
	 * What we need is a special case that allows any label to be accepted
	 * by the start state but we don't want the start state to recognize
	 * them after a failure.
	 */
	if (ostate!=mtStart  && state==HANG) {
		_freematch(np);
		return;
	}

	np->lleaves = _getleaves (np, sp);
	np->skel = sp;
        if((np->mode = mtEvalCost(np, np->lleaves, sp))==xABORT)
	{
		_freematch(np);
		return;
	}

	if ((mp = sp->succ[label])!=NULL) {
		if (!COSTLESS (np->cost, mp->cost))
			{ _freematch(np); return; }
		else _freematch(mp);
	}
	if(COSTLESS(np->cost,sp->mincost)) {
		if(np->mode==xREWRITE) {
			sp->mincost = np->cost; sp->winner = np; }
		else { sp->mincost = INFINITY; sp->winner = NULL; }
	}
	sp->succ[label] = np;
	_closure(state, ostate, sp);
}

_closure(state, ostate, skp)
	int state, ostate;
	skeleton *skp;
{
	register struct ap { short int tree, depth; } *ap;
	short int *sp = &mtTable[state];
	register __match *mp;

	if(state==HANG || *sp!=ACCEPT) return(NULL);

	for(ap = (struct ap *) &mtAccept[*++sp]; ap->tree!=-1; ap++)
		if (ap->depth==0) {
			mp = _allocmatch();	
			mp->tree = ap->tree;
			_addmatches (ostate, skp, mp);
		} else {
			register __partial *pp, *lim = &skp->partial[skp->treecnt];
			for(pp=skp->partial; pp < lim; pp++)
				if(pp->treeno==ap->tree)
					break;
			if(pp==lim) {
				skp->treecnt++;
				pp->treeno = ap->tree;
				pp->bits = (1<<(ap->depth));
			} else pp->bits |= (1<<(ap->depth));
		}
}

_merge (old, new)
	skeleton *old, *new;
{
	register __partial *op = old->partial, *np = new->partial;
	int nson = new->nson;
	register __partial *lim = np + new->treecnt;
	if (nson==1) {
		old->treecnt = new->treecnt;
		for(; np < lim; op++, np++) {
			op->treeno = np->treeno;
			op->bits = np->bits/2;
		}
	} else {
		__partial *newer = _allocpartial();
		register __partial *newerp = newer;
		register int cnt;
		lim = op+old->treecnt;
		for(cnt=new->treecnt; cnt-- ; np++) {
			for(op = old->partial; op < lim; op++)
				if (op->treeno == np->treeno) {
					newerp->treeno = op->treeno;
					newerp++->bits = op->bits & (np->bits/2);
					break;
				}
		}
		_freepartial(old->partial);
		old->partial = newer;
		old->treecnt = newerp-newer;
	}
}
 
/* Save and Allocate Matches */
#define BLKF	100
__match *freep = NULL;
static int _count = 0;
static __match *_blockp = NULL;
#ifdef CHECKMEM
int x_matches, f_matches;
#endif

__match *
_allocmatch()
{
	__match *mp;

	if(freep!=NULL) {
		mp = freep;
		freep = ((__match *)((struct freeb *) freep)->next);
#ifdef CHECKMEM
		f_matches--;
#endif
	} else {
		if(_count==0) {
			_blockp = (__match *) malloc (BLKF*sizeof (__match));
			_count = BLKF;
#ifdef CHECKMEM
			x_matches += BLKF;
#endif
		}
		mp = _blockp++;
		_count--;
	}
	return(mp);
}

_freematch(mp)
	__match *mp;
{
	((struct freeb *) mp)->next = (struct freeb *) freep;
	freep = mp;
#ifdef CHECKMEM
	f_matches++;
#endif
}

static __partial *pfreep = NULL;
static int pcount = 0;
static __partial *pblock = NULL;
#ifdef CHECKMEM
static int x_partials, f_partials;
#endif

__partial *
_allocpartial()
{
	__partial *pp;
	if (pfreep!=NULL) {
		pp = pfreep;
		pfreep = *(__partial **) pp;
#ifdef CHECKMEM
		f_partials--;
#endif
	} else {
		if (pcount==0) {
			pblock=(__partial *)malloc(BLKF*MAXTREES*sizeof(__partial));
			pcount = BLKF;
#ifdef CHECKMEM
			x_partials += BLKF;
#endif
		}
		pp = pblock;
		pblock += MAXTREES;
		pcount--;
	}
	return(pp);
}

_freepartial(pp)
	__partial *pp;
{
	* (__partial **)pp = pfreep;
	pfreep = pp;
#ifdef CHECKMEM
	f_partials++;
#endif
}


/* storage management */
static skeleton *sfreep = NULL;
static int scount = 0;
static skeleton * sblock = NULL;

skeleton *
_allocskel()
{
	skeleton *sp;
	int i;
	if(sfreep!=NULL) {
		sp = sfreep;
		sfreep = sp->sibling;
	} else {
		if(scount==0) {
			sblock = (skeleton *) malloc (BLKF * sizeof (skeleton));
			scount = BLKF;
		}
		sp = sblock++;
		scount--;
	}
	sp->sibling = NULL; sp->leftchild = NULL;
	for (i=0; i < MAXLABELS; sp->succ[i++] = NULL);
	sp->treecnt = 0;
	sp->partial = _allocpartial();
	return(sp);
}

_freeskel(sp)
	skeleton *sp;
{
	int i;
	__match *mp;
	if(sp==NULL)
		return;
	if(sp->leftchild)
		_freeskel(sp->leftchild);
	if(sp->sibling)
		_freeskel(sp->sibling);
	_freepartial (sp->partial);
	for (i=0; i < MAXLABELS; i++)
		if ((mp = sp->succ[i])!=NULL) _freematch (mp);
	sp->sibling = sfreep;
	sfreep = sp;
}

_match()
{
	skeleton *sp;
	sp = _allocskel();
	sp->root = NULL;
	_mpbtop = _mpblock;
	_freeskel(_walk(sp, HANG));
#ifdef CHECKMEM
	if(f_matches+_count!=x_matches) {
		printf(";#m %d %d %d\n", f_matches, _count, x_matches);
		assert(0);
	}
	if(f_partials+pcount!=x_partials) {
		printf(";#p %d %d %d\n", f_partials, pcount, x_partials);
		assert(0);
	}
#endif
}

NODEPTR
_mtG(root,i)
	NODEPTR root;
	int i;
{
	int *p = &i;
	while(*p!=-1)
		root = mtGetNodes(root, *p++);
	return(root);
}

/* diagnostic routines */

_prskel (skp, lvl)
	skeleton *skp; int lvl;
{
	int i; __match *mp;
	__partial *pp;
	if(skp==NULL) return;
	for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
	printf ("###\n");
	for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
	for (i = 0; i < MAXLABELS; i++)
		if ((mp=skp->succ[i])!=NULL)
#ifdef LINE_XREF
			printf ("[%d<%d> %d]", mp->tree,
				mtLine[mp->tree], mp->cost);
#else
			printf ("[%d %d]", mp->tree, mp->cost);
#endif
	putchar('\n');
	for (i = lvl; i > 0; i--) { putchar (' '); putchar (' '); }
	for (i = 0, pp=skp->partial; i < skp->treecnt; i++, pp++)
#ifdef LINE_XREF
			printf("(%d<%d> %x)", pp->treeno, mtLine[pp->treeno],
				pp->bits);
#else
			printf ("(%d %x)", pp->treeno, pp->bits);
#endif
	putchar('\n');
	fflush(stdout);
	_prskel (skp->leftchild, lvl+2);
	_prskel (skp->sibling, lvl);
}