V10/cmd/twig/machine.h
/*
* The machine used here is the same as the one used in fgrep.
* Logically, it is a trie of all the strings to be recognized.
* In this case, the strings are the paths of tree patterns.
* The nodes of the trie are the states of the machine and leaves
* are accepting states. The trie is augmented by failure transitions.
*
* Each machine state is a linked list of machine_node's and referred to
* by pointing to the head of the list. The machine_node represents a
* transition. The nst field points to the next state when the current
* input is MI_EQUAL to the inp field. If the input is not MI_EQUAL to any
* of the inp fields in the list, the next state is the one that fail
* points to. The fail field must be the same for all machine_node's in
* a state. Accepting states have inp.value equal to -1, and the match
* field points to a list of match structures. The match structures record
* which trees have been partially matched.
*
* The index is used by the machine generator to convert the machine into
* a list of integers.
*
* Further details about the data structure and algorithms can be
* found in:
* Knuth, Morris, Pratt in SIAM J Computing 6:3
* Aho, Corasick in Comm ACM 18:6
* Hoffman, O'Donnell in JACM 29:1
*/
struct machine_input {
short value;
struct symbol_entry *sym;
};
#define MI_EQUAL(x,y) ((x).value==(y).value)
#include "machcomm.h"
extern int path_getsym();
struct machine_node {
struct machine_input inp;
struct machine_node *nst;
struct machine_node *link;
struct machine_node *fail;
struct match *match;
short int index;
};
struct match {
struct match *next;
short int depth;
LabelData *tree;
};
extern struct machine_node *machine;
extern void machine_build();