wildmat.c, with less bugs

Bernd Felsche bernie at metapro.DIALix.oz.au
Mon Feb 25 20:25:30 AEST 1991


Thanks to the vigilance of:
	Jim Dumser <jmd at ee.umr.edu>
and
	Bernd Raichle <raichle at azu.informatik.uni-stuttgart.de>

another bug has been exterminated. Funny how these things crop up in
source code one adopts.

Jim pointed out that the routine didn't even work for the supplied
examples, and Bernd (not me, the other one), pointed out the ?*? came
up TRUE on matching with "a".

Thanks guys. And again thanks to Rich Salz for posting the original
bugs... oops, I mean, source :-)

Bernd actually worked out how the routine works and suggested a fix
which I had already implemented and sent to Jim for checking. Jim
hasn't complained yet so .....

Here's the updated code.

______________________cut with diamond along here_______________________

/*
**  Do shell-style pattern matching for ?, \, [], and * characters.
**  Might not be robust in face of malformed patterns; e.g., "foo[a-"
**  could cause a segmentation violation.  It is 8bit clean.
**
**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
**  Special thanks to Lars Mathiesen for the ABORT code.  This can greatly
**  speed up failing wildcard patterns.  For example:
**	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
**	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
**	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-p-70-iso8859-1
**  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
**  the ABORT, then it takes 22310 calls to fail.  Ugh.
**
**  bernie    613-01 91/01/04 19:34 
**  Fixed problem with terminating * not matching with (null)
**
**  bernie    597-00 91/01/08 11:24 
**  Fixed shell glob negate from '^' to '!'
**
**  bernie    597-02 91/01/21 13:43 
**	Fixed . matching * or ? on first char.
**
 *  bernie      1-00 91/02/14 10:28 
**	Fixed Star return on ABORT
*/

#define TRUE		1
#define FALSE		0
#define ABORT		-1

static int count;

static int
Star(s, p)
    register char	*s;
    register char	*p;
{
    register int stat;

    while ( (stat=DoMatch(s, p)) == FALSE) /* gobble up * match */
	if (*++s == '\0') return ABORT;
    return stat;
}


static int
DoMatch(s, p)	/* match string "s" to pattern "p" */
    register char	*s;
    register char	*p;
{
    register int 	 last;
    register int 	 matched;
    register int 	 reverse;

    count++;

    for ( ; *p; s++, p++) { /* parse the string to end */
	if (*s == '\0')
		return *p == '*' && *++p == '\0' ? TRUE : ABORT;

	switch (*p) { /* parse pattern */

	case '\\':
	    /* Literal match with following character. */
	    p++;
	    /* FALLTHROUGH */

	default: /*literal match*/
	    if (*s != *p)
		return FALSE;
	    continue;

	case '?':
	    /* Match anything. */
	    continue;

	case '*':
	    /* Trailing star matches everything. */
	    return( *++p ? Star(s, p) : TRUE );

	case '[':
	    /* [!....] means inverse character class. */
	    if (reverse = p[1] == '!') p++;

	    for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
		/* This next line requires a good C compiler. */
		/*     range?		(in bounds)                  (equal) */
		if ( ( *p == '-' ) ? (*s <= *++p && *s >= last ) : (*s == *p) )
		    matched = TRUE;

	    if (matched == reverse) return FALSE;
	    continue;
	}
    }
    return *s == '\0';
}


int wildmat(s, p)
    char	*s;
    char	*p;
{
	if ( (*p == '?' || *p == '*' ) && *s == '.' ) {
		return FALSE;
	} else {
		return DoMatch(s, p) == TRUE;
	}
}


char pattern[] = "-*-*-*-*-*-*-12-*-*-*-m-*-*-*",
     text1[] = "-adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1",
     text2[] = "-adobe-courier-bold-o-normal--12-120-75-75-p-70-iso8859-1";

main()
{
	int result;

	count= 0; result = wildmat(text1,pattern);
	printf("%s\n%s\n%s %d\n",
		text1, pattern, ( result ? "TRUE" : "FALSE" ), count);

	count = 0; result = wildmat(text2,pattern);
	printf("%s\n%s\n%s %d\n",
		text2, pattern, ( result ? "TRUE" : "FALSE" ), count);
}

/*
**  Do shell-style pattern matching for ?, \, [], and * characters.
**  Might not be robust in face of malformed patterns; e.g., "foo[a-"
**  could cause a segmentation violation.  It is 8bit clean.
**
**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
**  Special thanks to Lars Mathiesen for the ABORT code.  This can greatly
**  speed up failing wildcard patterns.  For example:
**	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
**	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
**	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-p-70-iso8859-1
**  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
**  the ABORT, then it takes 22310 calls to fail.  Ugh.
**
**  bernie    613-01 91/01/04 19:34 
**  Fixed problem with terminating * not matching with (null)
**
**  bernie    597-00 91/01/08 11:24 
**  Fixed shell glob negate from '^' to '!'
**
**  bernie    597-02 91/01/21 13:43 
**	Fixed . matching * or ? on first char.
*/

#define TRUE		1
#define FALSE		0
#define ABORT		-1

static int count;

static int
Star(s, p)
    register char	*s;
    register char	*p;
{
    register int stat;

    while ( (stat=DoMatch(s, p)) == FALSE) /* gobble up * match */
	if (*++s == '\0') return ABORT;
    return stat;
}


static int
DoMatch(s, p)	/* match string "s" to pattern "p" */
    register char	*s;
    register char	*p;
{
    register int 	 last;
    register int 	 matched;
    register int 	 reverse;

    count++;

    for ( ; *p; s++, p++) { /* parse the string to end */
	if (*s == '\0')
		return *p == '*' && *++p == '\0' ? TRUE : ABORT;

	switch (*p) { /* parse pattern */

	case '\\':
	    /* Literal match with following character. */
	    p++;
	    /* FALLTHROUGH */

	default: /*literal match*/
	    if (*s != *p)
		return FALSE;
	    continue;

	case '?':
	    /* Match anything. */
	    continue;

	case '*':
	    /* Trailing star matches everything. */
	    return( *++p ? Star(s, p) : TRUE );

	case '[':
	    /* [!....] means inverse character class. */
	    if (reverse = p[1] == '!') p++;

	    for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
		/* This next line requires a good C compiler. */
		/*     range?		(in bounds)                  (equal) */
		if ( ( *p == '-' ) ? (*s <= *++p && *s >= last ) : (*s == *p) )
		    matched = TRUE;

	    if (matched == reverse) return FALSE;
	    continue;
	}
    }
    return *s == '\0';
}


int wildmat(s, p)
    char	*s;
    char	*p;
{
	if ( (*p == '?' || *p == '*' ) && *s == '.' ) {
		return FALSE;
	} else {
		return DoMatch(s, p) == TRUE;
	}
}

/* here's the bonus test program */

char pattern[] = "-*-*-*-*-*-*-12-*-*-*-m-*-*-*",
     text1[] = "-adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1",
     text2[] = "-adobe-courier-bold-o-normal--12-120-75-75-p-70-iso8859-1";

main()
{
	int result;

	count= 0; result = wildmat(text1,pattern);
	printf("%s\n%s\n%s %d\n",
		text1, pattern, ( result ? "TRUE" : "FALSE" ), count);

	count = 0; result = wildmat(text2,pattern);
	printf("%s\n%s\n%s %d\n",
		text2, pattern, ( result ? "TRUE" : "FALSE" ), count);
}
-- 
Bernd Felsche,                 _--_|\   #include <std/disclaimer.h>
Metapro Systems,              / sale \  Fax:   +61 9 472 3337
328 Albany Highway,           \_.--._/  Phone: +61 9 362 9355
Victoria Park,  Western Australia   v   Email: bernie at metapro.DIALix.oz.au



More information about the Alt.sources mailing list