4.4BSD/usr/src/contrib/nvi/nvi/ex/ex_substitute.c

Compare this file to the similar file:
Show the results in this format:

/*-
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static char sccsid[] = "@(#)ex_substitute.c	8.1 (Berkeley) 6/9/93";
#endif /* not lint */

#include <sys/types.h>

#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>

#include "vi.h"
#include "excmd.h"

enum which {AGAIN, MUSTSETR, FIRST};

static int		checkmatchsize __P((SCR *, regex_t *));
static inline int	regsub __P((SCR *,
			    char *, char **, size_t *, size_t *));
static int		substitute __P((SCR *, EXF *,
			    EXCMDARG *, char *, regex_t *, enum which));

int
ex_substitute(sp, ep, cmdp)
	SCR *sp;
	EXF *ep;
	EXCMDARG *cmdp;
{
	regex_t *re, lre;
	int eval, reflags;
	char *endp, *sub;
	char delim[2];

	/*
	 * Historic vi only permitted '/' to begin the substitution command.
	 * We permit ';' as well, since users often want to operate on UNIX
	 * pathnames.  We don't just allow anything because the flag chars
	 * wouldn't work.
	 */
	if (*cmdp->string == '/' || *cmdp->string == ';') {
		/* Delimiter is the first character. */
		delim[0] = cmdp->string[0];
		delim[1] = '\0';

		/* Get the substitute string. */
		endp = cmdp->string + 1;
		sub = strsep(&endp, delim);

		/* Get the replacement string, save it off. */
		if (endp == NULL || *endp == NULL) {
			msgq(sp, M_ERR, "No replacement string specified.");
			return (1);
		}
		if (sp->repl != NULL)
			free(sp->repl);
		sp->repl = strsep(&endp, delim);
		sp->repl = strdup(sp->repl);
		sp->repl_len = strlen(sp->repl);

		/* If the substitute string is empty, use the last one. */
		if (*sub == NULL) {
			if (!F_ISSET(sp, S_RE_SET)) {
				msgq(sp, M_ERR,
				    "No previous regular expression.");
				return (1);
			}
			if (checkmatchsize(sp, &sp->sre))
				return (1);
			return (substitute(sp, ep,
			    cmdp, endp ? endp : "", &sp->sre, AGAIN));
		}

		/* Set RE flags. */
		reflags = 0;
		if (O_ISSET(sp, O_EXTENDED))
			reflags |= REG_EXTENDED;
		if (O_ISSET(sp, O_IGNORECASE))
			reflags |= REG_ICASE;

		/* Compile the RE. */
		re = &lre;
		if (eval = regcomp(re, (char *)sub, reflags)) {
			re_error(sp, eval, re);
			return (1);
		}

		/*
		 * Set saved RE.  Historic practice is that substitutes set
		 * direction as well as the RE.
		 */
		sp->sre = lre;
		sp->searchdir = FORWARD;
		F_SET(sp, S_RE_SET);

		if (checkmatchsize(sp, &sp->sre))
			return (1);
		return (substitute(sp, ep, cmdp, endp ? endp : "", re, FIRST));
	}
	return (substitute(sp, ep, cmdp, cmdp->string, &sp->sre, MUSTSETR));
}

int
ex_subagain(sp, ep, cmdp)
	SCR *sp;
	EXF *ep;
	EXCMDARG *cmdp;
{
	if (!F_ISSET(sp, S_RE_SET)) {
		msgq(sp, M_ERR, "No previous regular expression.");
		return (1);
	}
	return (substitute(sp, ep, cmdp, cmdp->string, &sp->sre, AGAIN));
}

/* 
 * The nasty part of the substitution is what happens when the replacement
 * string contains newlines.  It's a bit tricky -- consider the information
 * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 * to build a set of newline offets which we use to break the line up later,
 * when the replacement is done.  Don't change it unless you're pretty damned
 * confident.
 */
#define	NEEDNEWLINE(sp) {						\
	if (sp->newl_len == sp->newl_cnt) {				\
		sp->newl_len += 25;					\
		if ((sp->newl = realloc(sp->newl,			\
		    sp->newl_len * sizeof(size_t))) == NULL) {		\
			msgq(sp, M_ERR,					\
			    "Error: %s", strerror(errno));		\
			sp->newl_len = 0;				\
			return (1);					\
		}							\
	}								\
}

#define	BUILD(sp, l, len) {						\
	if (lbclen + (len) > lblen) {					\
		lblen += MAX(lbclen + (len), 256);			\
		if ((lb = realloc(lb, lblen)) == NULL) {		\
			msgq(sp, M_ERR,					\
			    "Error: %s", strerror(errno));		\
			lbclen = 0;					\
			return (1);					\
		}							\
	}								\
	memmove(lb + lbclen, l, len);					\
	lbclen += len;							\
}

#define	NEEDSP(sp, len, pnt) {						\
	if (lbclen + (len) > lblen) {					\
		lblen += MAX(lbclen + (len), 256);			\
		if ((lb = realloc(lb, lblen)) == NULL) {		\
			msgq(sp, M_ERR,					\
			    "Error: %s", strerror(errno));		\
			lbclen = 0;					\
			return (1);					\
		}							\
		pnt = lb + lbclen;					\
	}								\
}

/*
 * substitute --
 *	Do the substitution.  This stuff is *really* tricky.  There are
 *	lots of special cases, and general nastiness.  Don't mess with it
 * 	unless you're pretty confident.
 */
static int
substitute(sp, ep, cmdp, s, re, cmd)
	SCR *sp;
	EXF *ep;
	EXCMDARG *cmdp;
	char *s;
	regex_t *re;
	enum which cmd;
{
	MARK from, to;
	recno_t elno, lno, lastline;
	size_t blen, cnt, last, lbclen, lblen, len, offset;
	int eflags, eval, linechanged, quit;
	int cflag, gflag, lflag, nflag, pflag, rflag;
	char *bp, *lb;

	/*
	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode,
	 * but it only displayed the last change and they really don't
	 * make any sense.  In the current model the problem is combining
	 * them with the 'c' flag -- the screen would have to flip back
	 * and forth between the confirm screen and the ex print screen,
	 * which would be pretty awful.  Not worth the effort.
	 */
	cflag = gflag = lflag = nflag = pflag = rflag = 0;
	for (; *s; ++s)
		switch (*s) {
		case ' ':
		case '\t':
			break;
		case '#':
			if (F_ISSET(sp, S_MODE_VI)) {
				msgq(sp, M_ERR,
				    "'#' flag not supported in vi mode.");
				return (1);
			}
			nflag = 1;
			break;
		case 'c':
			cflag = 1;
			break;
		case 'g':
			gflag = 1;
			break;
		case 'l':
			if (F_ISSET(sp, S_MODE_VI)) {
				msgq(sp, M_ERR,
				    "'l' flag not supported in vi mode.");
				return (1);
			}
			lflag = 1;
			break;
		case 'p':
			if (F_ISSET(sp, S_MODE_VI)) {
				msgq(sp, M_ERR,
				    "'p' flag not supported in vi mode.");
				return (1);
			}
			pflag = 1;
			break;
		case 'r':
			if (cmd == FIRST) {
				msgq(sp, M_ERR,
		    "Regular expression specified; r flag meaningless.");
				return (1);
			}
			if (!F_ISSET(sp, S_RE_SET)) {
				msgq(sp, M_ERR,
				    "No previous regular expression.");
				return (1);
			}
			rflag = 1;
			break;
		default:
			goto usage;
		}

	if (rflag == 0 && cmd == MUSTSETR) {
usage:		msgq(sp, M_ERR, "Usage: %s", cmdp->cmd->usage);
		return (1);
	}

	/* Get some space. */
	GET_SPACE(sp, bp, blen, 512);

	/*
	 * lb:		build buffer pointer.
	 * lbclen:	current length of built buffer.
	 * lblen;	length of build buffer.
	 */
	lb = NULL;
	lbclen = lblen = 0;

	/*
	 * Since multiple changes can happen in a line, we only increment
	 * the change count on the first change to a line.
	 */
	lastline = OOBLNO;

	/* For each line... */
	for (quit = 0, lno = cmdp->addr1.lno,
	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {

		/* Get the line. */
		if ((s = file_gline(sp, ep, lno, &len)) == NULL) {
			GETLINE_ERR(sp, lno);
			return (1);
		}

		/*
		 * Make a local copy if doing confirmation -- when calling
		 * the confirm routine we're likely to lose our cached copy.
		 */
		if (cflag) {
			ADD_SPACE(sp, bp, blen, len)
			memmove(bp, s, len);
			s = bp;
		}

		/* Reset the buffer pointer. */
		lbclen = 0;

		/*
		 * We don't want to have to do a setline if the line didn't
		 * change -- keep track of whether or not this line changed.
		 */
		linechanged = 0;

		/* It's not nul terminated, but we pretend it is. */
		eflags = REG_STARTEND;

		/* The search area is from 's' to the end of the line. */
nextmatch:	sp->match[0].rm_so = 0;
		sp->match[0].rm_eo = len;

		/* Get the next match. */
skipmatch:	eval = regexec(re,
		    (char *)s, re->re_nsub + 1, sp->match, eflags);

		/*
		 * There wasn't a match -- if there was an error, deal with
		 * it.  If there was a previous match in this line, resolve
		 * the changes into the database.  Otherwise, just move on.
		 */
		if (eval == REG_NOMATCH) {
			if (linechanged)
				goto endmatch;
			continue;
		}
		if (eval != 0) {
			re_error(sp, eval, re);
			goto ret1;
		}

		/* Confirm change. */
		if (cflag) {
			/*
			 * Set the cursor position for confirmation.  Note,
			 * if we matched on a '$', the cursor may be past
			 * the end of line.
			 *
			 * XXX
			 * May want to "fix" this in the confirm routine;
			 * the confirm routine may be able to display a
			 * cursor past EOL.
			 */
			from.lno = lno;
			from.cno = sp->match[0].rm_so;
			to.lno = lno;
			to.cno = sp->match[0].rm_eo;
			if (len != 0) {
				if (to.cno >= len)
					to.cno = len - 1;
				if (from.cno >= len)
					from.cno = len - 1;
			}

			switch (sp->s_confirm(sp, ep, &from, &to)) {
			case YES:
				break;
			case NO:
				/*
				 * Copy the bytes before the match and the
				 * bytes in the match into the build buffer.
				 */
				BUILD(sp, s, sp->match[0].rm_eo);
				goto skip;
			case QUIT:
				/* Set the quit flag. */
				quit = 1;

				/* If in a global, pass the info back. */
				if (F_ISSET(sp, S_GLOBAL))
					F_SET(sp, S_GLOBAL_QUIT);
				
				/*
				 * If any changes, resolve them, otherwise
				 * return to the main loop.
				 */
				if (linechanged)
					goto endmatch;
				continue;
			}
		}

		/* Copy the bytes before the match into the build buffer. */
		BUILD(sp, s, sp->match[0].rm_so);

		/* Substitute the matching bytes. */
		if (regsub(sp, s, &lb, &lbclen, &lblen))
			goto ret1;

		/* Set the change flag so we know this line was modified. */
		linechanged = 1;

		/* Move the pointers past the matched bytes. */
skip:		s += sp->match[0].rm_eo;
		len -= sp->match[0].rm_eo;

		/*
		 * If doing a global change with confirmation, we have to
		 * update the screen.  The basic idea is to store the line
		 * so the screen update routines can find it, but start at
		 * the old offset.
		 */
		if (linechanged && gflag && cflag) {
			/* Save offset. */
			offset = lbclen;
			
			/* Copy the suffix. */
			if (len)
				BUILD(sp, s, len)

			/* Store inserted lines, adjusting the build buffer. */
			last = 0;
			if (sp->newl_cnt) {
				for (cnt = 0; cnt < sp->newl_cnt;
				    ++cnt, ++lno, ++elno, ++lastline) {
					if (file_iline(sp, ep, lno,
					    lb + last, sp->newl[cnt] - last))
						goto ret1;
					last = sp->newl[cnt] + 1;
					++sp->rptlines[L_ADDED];
				}
				lbclen -= last;
				offset -= last;

				sp->newl_cnt = 0;
				linechanged = 1;
			}

			/* Store the changed line. */
			if (linechanged)
				if (file_sline(sp, ep, lno, lb + last, lbclen))
					goto ret1;

			/* Get a new copy of the line. */
			if ((s = file_gline(sp, ep, lno, &len)) == NULL) {
				GETLINE_ERR(sp, lno);
				goto ret1;
			}
			ADD_SPACE(sp, bp, blen, len)
			memmove(bp, s, len);
			s = bp;

			/* Restart the build. */
			lbclen = 0;

			/* Update changed line counter. */
			if (lastline != lno) {
				++sp->rptlines[L_CHANGED];
				lastline = lno;
			}

			/* Start in the middle of the line. */
			sp->match[0].rm_so = offset;
			sp->match[0].rm_eo = len;
			goto skipmatch;
		}

		/*
		 * If it's a global change, and there's something left in
		 * the line, check it.
		 */
		if (len && gflag) {
			eflags |= REG_NOTBOL;
			goto nextmatch;
		}

		/* Copy any remaining bytes into the build buffer. */
endmatch:	if (len)
			BUILD(sp, s, len)

		/* Store inserted lines, adjusting the build buffer. */
		last = 0;
		if (sp->newl_cnt) {
			for (cnt = 0; cnt < sp->newl_cnt;
			    ++cnt, ++lno, ++elno, ++lastline) {
				if (file_iline(sp, ep,
				    lno, lb + last, sp->newl[cnt] - last))
					goto ret1;
				last = sp->newl[cnt] + 1;
				++sp->rptlines[L_ADDED];
			}
			lbclen -= last;

			sp->newl_cnt = 0;
			linechanged = 1;
		}

		/* Store the changed line. */
		if (linechanged)
			if (file_sline(sp, ep, lno, lb + last, lbclen))
				goto ret1;

		/* Update changed line counter. */
		if (lastline != lno) {
			++sp->rptlines[L_CHANGED];
			lastline = lno;
		}

		/* Display as necessary. */
		if (lflag || nflag || pflag) {
			from.lno = to.lno = lno;
			from.cno = to.cno = 0;
			if (lflag)
				ex_print(sp, ep, &from, &to, E_F_LIST);
			if (nflag)
				ex_print(sp, ep, &from, &to, E_F_HASH);
			if (pflag)
				ex_print(sp, ep, &from, &to, E_F_PRINT);
		}
	}

	/*
	 * Cursor moves to last line changed, unless doing confirm,
	 * in which case don't move it.
	 */
	if (!cflag && lastline != OOBLNO)
		sp->lno = lastline;

	/*
	 * Note if nothing found.  Else, if nothing displayed to the
	 * screen, put something up.
	 */
	if (sp->rptlines[L_CHANGED] == 0 && !F_ISSET(sp, S_GLOBAL))
		msgq(sp, M_INFO, "No match found.");
	else if (!lflag && !nflag && !pflag)
		F_SET(sp, S_AUTOPRINT);

	FREE_SPACE(sp, bp, blen);
	return (0);

ret1:	FREE_SPACE(sp, bp, blen);
	return (1);
}

/*
 * regsub --
 * 	Do the substitution for a regular expression.
 */
static inline int
regsub(sp, ip, lbp, lbclenp, lblenp)
	SCR *sp;
	char *ip;			/* Input line. */
	char **lbp;
	size_t *lbclenp, *lblenp;
{
	size_t lbclen, lblen;		/* Local copies. */
	size_t mlen;			/* Match length. */
	size_t rpl;			/* Remaining replacement length. */
	char *rp;			/* Replacement pointer. */
	int ch;
	int no;				/* Match replacement offset. */
	char *p;			/* Build buffer pointer. */
	char *lb;			/* Local copies. */

	lb = *lbp;			/* Get local copies. */
	lbclen = *lbclenp;
	lblen = *lblenp;

	rp = sp->repl;			/* Set up replacment info. */
	rpl = sp->repl_len;
	for (p = lb + lbclen; rpl--;) {
		ch = *rp++;
		if (ch == '&') {	/* Entire pattern. */
			no = 0;
			goto sub;
					/* Partial pattern. */
		} else if (ch == '\\' && isdigit(*rp)) {
			no = *rp++ - '0';
			--rpl;
sub:			if (sp->match[no].rm_so != -1 &&
			    sp->match[no].rm_eo != -1) {
				mlen =
				    sp->match[no].rm_eo - sp->match[no].rm_so;
				NEEDSP(sp, mlen, p);
				memmove(p, ip + sp->match[no].rm_so, mlen);
				p += mlen;
				lbclen += mlen;
			}
		} else {		/* Newline, ordinary characters. */
			if (sp->special[ch] == K_CR ||
			    sp->special[ch] == K_NL) {
				NEEDNEWLINE(sp);
				sp->newl[sp->newl_cnt++] = lbclen;
			} else if (ch == '\\' && (*rp == '\\' || *rp == '&'))
 				ch = *rp++;
			NEEDSP(sp, 1, p);
 			*p++ = ch;
			++lbclen;
		}
	}

	*lbp = lb;			/* Update caller's information. */
	*lbclenp = lbclen;
	*lblenp = lblen;
	return (0);
}

static int
checkmatchsize(sp, re)
	SCR *sp;
	regex_t *re;
{
	/* Build nsub array as necessary. */
	if (sp->matchsize < re->re_nsub + 1) {
		sp->matchsize = re->re_nsub + 1;
		if ((sp->match = realloc(sp->match,
		    sp->matchsize * sizeof(regmatch_t))) == NULL) {
			msgq(sp, M_ERR, "Error: %s", strerror(errno));
			sp->matchsize = 0;
			return (1);
		}
	}
	return (0);
}