1BSD/ex-1.1/ex_re.c
#include "ex.h"
#include "ex_re.h"
/*
* Ex - a text editor
* Bill Joy UCB June/September 1977
*/
compile(eof, oknl)
int eof;
char oknl;
{
register c;
register char *ep;
char *lastep;
char bracket[NBRA], *bracketp, *rhsp;
int cclcnt;
if (letter(eof) || digit(eof))
error("Re delimiter must not be letter or digit|Regular expressions cannot be delimited by letters or digits");
ep = expbuf;
c = getchar();
if (eof == '\\')
switch (c) {
case '/':
case '?':
if (scanre.sexpbuf[0] == 0)
noscanre:
error("No previous scan re|No previous scanning regular expression");
resre(&scanre);
return (c);
case '&':
if (subre.sexpbuf[0] == 0)
nosubre:
error("No previous substitute re|No previous substitute regular expression");
resre(&subre);
return (c);
default:
error("Badly formed re|Regular expression \\ must be followed by /, ?, or &");
}
if (c == eof || c == '\n' || c == EOF) {
if (*ep == 0)
error("No previous re|No previous regular expression");
if (c == '\n' && oknl == 0)
error("Missing closing delimiter@for regular expression");
if (c == '\n')
ungetchar(c);
return (eof);
}
bracketp = bracket;
nbra = 0;
circfl = 0;
if (c == '^') {
c = getchar();
circfl++;
}
ungetchar(c);
for (;;) {
if (ep >= &expbuf[ESIZE - 2])
complex:
cerror("Re too complex|Regular expression too complicated");
c = getchar();
if (c == eof || c == EOF) {
if (bracketp != bracket)
cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
*ep++ = CEOF;
return (eof);
}
if (value(MAGIC)) {
if (c != '*' || ep == expbuf)
lastep = ep;
} else
if (c != '\\' || peekchar() != '*' || ep == expbuf)
lastep = ep;
switch (c) {
case '\\':
c = getchar();
switch (c) {
case '(':
if (nbra >= NBRA)
cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
*bracketp++ = nbra;
*ep++ = CBRA;
*ep++ = nbra++;
continue;
case ')':
if (bracketp <= bracket)
cerror("Extra \\)|More \\)'s than \\('s in regular expression");
*ep++ = CKET;
*ep++ = *--bracketp;
continue;
case '<':
*ep++ = CBRC;
continue;
case '>':
*ep++ = CLET;
continue;
}
if (value(MAGIC) == 0)
magic:
switch (c) {
case '.':
*ep++ = CDOT;
continue;
case '~':
rhsp = rhsbuf;
while (*rhsp) {
if (*rhsp & QUOTE) {
c = *rhsp & 0177;
if (c == '&')
error("Replacement pattern contains &@- cannot use in re");
if (c >= '1' && c <= '9')
error("Replacement pattern contains \\d@- cannot use in re");
}
if (ep >= &expbuf[ESIZE-2])
goto complex;
*ep++ = CCHR;
*ep++ = *rhsp++ & 0177;
}
continue;
case '*':
if (ep == expbuf)
break;
if (*lastep == CBRA || *lastep == CKET)
cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
if (*lastep == CCHR && (lastep[1] & QUOTE))
cerror("Illegal *|Can't * a \\n in regular expression");
*lastep =| STAR;
continue;
case '[':
*ep++ = CCL;
*ep++ = 0;
cclcnt = 1;
c = getchar();
if (c == '^') {
c = getchar();
ep[-2] = NCCL;
}
if (c == ']')
cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
while (c != ']') {
if (c == '\\' && any(peekchar(), "]-^\\"))
c = getchar() | QUOTE;
if (c == '\n' || c == EOF)
cerror("Missing ]");
*ep++ = c;
cclcnt++;
if (ep >= &expbuf[ESIZE])
goto complex;
c = getchar();
}
lastep[1] = cclcnt;
continue;
}
if (c == EOF) {
ungetchar(EOF);
c = '\\';
goto defchar;
}
*ep++ = CCHR;
if (c == '\n')
cerror("No newlines in re's|Can't escape newlines into regular expressions");
if (c < '1' || c > NBRA + '1') {
*ep++ = c;
continue;
}
c =- '1';
if (c >= nbra)
cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
*ep++ = c | QUOTE;
continue;
case '\n':
if (oknl) {
ungetchar(c);
*ep++ = CEOF;
return (eof);
}
cerror("Badly formed re|Missing closing delimiter for regular expression");
case '$':
if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
*ep++ = CDOL;
continue;
}
goto defchar;
case '.':
case '~':
case '*':
case '[':
if (value(MAGIC))
goto magic;
defchar:
default:
*ep++ = CCHR;
*ep++ = c;
continue;
}
}
}
cerror(s)
char *s;
{
expbuf[0] = 0;
error(s);
}
same(a, b)
register int a, b;
{
return (a == b || value(IGNORECASE) && (a ^ b) == ' ' && letter(a) == letter(b));
}
execute(gf, addr)
int *addr;
{
register char *p1, *p2;
register c;
if (gf) {
if (circfl)
return (0);
locs = p1 = loc2;
} else {
if (addr == zero)
return (0);
p1 = getline(*addr);
locs = 0;
}
p2 = expbuf;
if (circfl) {
loc1 = p1;
return (advance(p1, p2));
}
/* fast check for first character */
if (*p2 == CCHR) {
c = p2[1];
do {
if (c != *p1 && (!value(IGNORECASE) ||
(c ^ *p1) != ' ' || letter(c) != letter(*p1)))
continue;
if (advance(p1, p2)) {
loc1 = p1;
return (1);
}
} while (*p1++);
return (0);
}
/* regular algorithm */
do {
if (advance(p1, p2)) {
loc1 = p1;
return (1);
}
} while (*p1++);
return (0);
}
#define uletter(c) (letter(c) || c == '_')
advance(lp, ep)
register char *lp, *ep;
{
register char *curlp;
char *nextep, *sp, *sp1, c;
for (;;) switch (*ep++) {
case CCHR:
if (*ep & QUOTE) {
c = *ep++ & 0177;
sp = braslist[c];
sp1 = braelist[c];
while (sp < sp1) {
if (!same(*sp, *lp))
return (0);
sp++, lp++;
}
continue;
}
if (!same(*ep, *lp))
return (0);
ep++, lp++;
continue;
case CDOT:
if (*lp++)
continue;
return (0);
case CDOL:
if (*lp == 0)
continue;
return (0);
case CEOF:
loc2 = lp;
return (1);
case CCL:
if (cclass(ep, *lp++, 1)) {
ep =+ *ep;
continue;
}
return (0);
case NCCL:
if (cclass(ep, *lp++, 0)) {
ep =+ *ep;
continue;
}
return (0);
case CBRA:
braslist[*ep++] = lp;
continue;
case CKET:
braelist[*ep++] = lp;
continue;
case CDOT|STAR:
curlp = lp;
while (*lp++)
continue;
goto star;
case CCHR|STAR:
curlp = lp;
while (same(*lp, *ep))
lp++;
lp++;
ep++;
goto star;
case CCL|STAR:
case NCCL|STAR:
curlp = lp;
while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
continue;
ep =+ *ep;
goto star;
star:
do {
lp--;
if (lp == locs)
break;
if (advance(lp, ep))
return (1);
} while (lp > curlp);
return (0);
case CBRC:
if (lp == expbuf)
continue;
if (uletter(*lp) && !uletter(lp[-1]) && !digit(lp[-1]))
continue;
return (0);
case CLET:
if (!uletter(*lp) && !digit(*lp))
continue;
return (0);
default:
error("Re internal error@- if possible remember what you did and tell system staff");
}
}
cclass(set, c, af)
register char *set;
register c;
int af;
{
register n;
if (c == 0)
return (0);
if (value(IGNORECASE) && ucletter(c))
c = letter(c);
n = *set++;
while (--n)
if (n > 2 && set[1] == '-') {
if (c >= (set[0] & 0177) && c <= (set[2] & 0177))
return (af);
set =+ 3;
n =- 2;
} else
if ((*set++ & 0177) == c)
return (af);
return (!af);
}
copy(to, from, size)
register char *from, *to;
register int size;
{
if (size > 0)
do
*to++ = *from++;
while (--size > 0);
}