V10/cmd/twig/machine.h

/*
 * The machine used here is the same as the one used in fgrep.
 * Logically, it is a trie of all the strings to be recognized.
 * In this case, the strings are the paths of tree patterns.
 * The nodes of the trie are the states of the machine and leaves
 * are accepting states.  The trie is augmented by failure transitions.
 *
 * Each machine state is a linked list of machine_node's and referred to
 * by pointing to the head of the list.  The machine_node represents a
 * transition.  The nst field points to the next state when the current
 * input is MI_EQUAL to the inp field.  If the input is not MI_EQUAL to any
 * of the inp fields in the list,  the next state is the one that fail
 * points to.  The fail field must be the same for all machine_node's in
 * a state.  Accepting states have inp.value equal to -1, and the match
 * field points to a list of match structures.  The match structures record
 * which trees have been partially matched.
 *
 * The index is used by the machine generator to convert the machine into
 * a list of integers.
 *
 * Further details about the data structure and algorithms can be
 * found in:
 *	Knuth, Morris, Pratt in SIAM J Computing 6:3
 *	Aho, Corasick in Comm ACM 18:6
 *	Hoffman, O'Donnell in JACM 29:1
 */
struct machine_input {
	short value;
	struct symbol_entry *sym;
};
#define MI_EQUAL(x,y)	((x).value==(y).value)

#include "machcomm.h"

extern int path_getsym();

struct machine_node {
	struct machine_input inp;
	struct machine_node *nst;
	struct machine_node *link;
	struct machine_node *fail;
	struct match *match;
	short int index;
};

struct match {
	struct match *next;
	short int depth;
	LabelData *tree;
};

extern struct machine_node *machine;

extern void machine_build();