OpenSolaris_b135/cmd/bdiff/bdiff.c

Compare this file to the similar file:
Show the results in this format:

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
/*	  All Rights Reserved  	*/


/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <fatal.h>
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <wait.h>

#define	ONSIG	16

/*
 *	This program segments two files into pieces of <= seglim lines
 *	(which is passed as a third argument or defaulted to some number)
 *	and then executes diff upon the pieces. The output of
 *	'diff' is then processed to make it look as if 'diff' had
 *	processed the files whole. The reason for all this is that seglim
 *	is a reasonable upper limit on the size of files that diff can
 *	process.
 *	NOTE -- by segmenting the files in this manner, it cannot be
 *	guaranteed that the 'diffing' of the segments will generate
 *	a minimal set of differences.
 *	This process is most definitely not equivalent to 'diffing'
 *	the files whole, assuming 'diff' could handle such large files.
 *
 *	'diff' is executed by a child process, generated by forking,
 *	and communicates with this program through pipes.
 */

static char Error[128];

static int seglim;	/* limit of size of file segment to be generated */

static char diff[]  =  "/usr/bin/diff";
static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */
static char tempfile[32];
static char otmp[32], ntmp[32];
static int	fflags;
static int	fatal_num = 1;		/* exit number for fatal exit */
static offset_t	linenum;
static size_t obufsiz, nbufsiz, dbufsiz;
static char *readline(char **, size_t *, FILE *);
static void addgen(char **, size_t *, FILE *);
static void delgen(char **, size_t *, FILE *);
static void fixnum(char *);
static void fatal(char *);
static void setsig(void);
static void setsig1(int);
static char *satoi(char *, offset_t *);
static FILE *maket(char *);

static char *prognam;

int
main(int argc, char *argv[])
{
	FILE *poldfile, *pnewfile;
	char *oline, *nline, *diffline;
	char *olp, *nlp, *dp;
	int otcnt, ntcnt;
	pid_t i;
	int pfd[2];
	FILE *poldtemp, *pnewtemp, *pipeinp;
	int status;

	prognam = argv[0];
	/*
	 * Set flags for 'fatal' so that it will clean up,
	 * produce a message, and terminate.
	 */
	fflags = FTLMSG | FTLCLN | FTLEXIT;

	setsig();

	if (argc < 3 || argc > 5)
		fatal("arg count");

	if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
		fatal("both files standard input");
	if (strcmp(argv[1], "-") == 0)
		poldfile = stdin;
	else
		if ((poldfile = fopen(argv[1], "r")) == NULL) {
			(void) snprintf(Error, sizeof (Error),
				"Can not open '%s'", argv[1]);
			fatal(Error);
		}
	if (strcmp(argv[2], "-") == 0)
		pnewfile = stdin;
	else
		if ((pnewfile = fopen(argv[2], "r")) == NULL) {
			(void) snprintf(Error, sizeof (Error),
				"Can not open '%s'", argv[2]);
			fatal(Error);
		}

	seglim = 3500;

	if (argc > 3) {
		if (argv[3][0] == '-' && argv[3][1] == 's')
			fflags &= ~FTLMSG;
		else {
			if ((seglim = atoi(argv[3])) == 0)
				fatal("non-numeric limit");
			if (argc == 5 && argv[4][0] == '-' &&
					argv[4][1] == 's')
				fflags &= ~FTLMSG;
		}
	}

	linenum = 0;

	/* Allocate the buffers and initialize their lengths */

	obufsiz = BUFSIZ;
	nbufsiz = BUFSIZ;
	dbufsiz = BUFSIZ;

	if ((oline = (char *)malloc(obufsiz)) == NULL ||
	    (nline = (char *)malloc(nbufsiz)) == NULL ||
	    (diffline = (char *)malloc(dbufsiz)) == NULL)
		fatal("Out of memory");

	/*
	 * The following while-loop will prevent any lines
	 * common to the beginning of both files from being
	 * sent to 'diff'. Since the running time of 'diff' is
	 * non-linear, this will help improve performance.
	 * If, during this process, both files reach EOF, then
	 * the files are equal and the program will terminate.
	 * If either file reaches EOF before the other, the
	 * program will generate the appropriate 'diff' output
	 * itself, since this can be easily determined and will
	 * avoid executing 'diff' completely.
	 */
	for (;;) {
		olp = readline(&oline, &obufsiz, poldfile);
		nlp = readline(&nline, &nbufsiz, pnewfile);

		if (!olp && !nlp)	/* EOF found on both:  files equal */
			return (0);

		if (!olp) {
			/*
			 * The entire old file is a prefix of the
			 * new file. Generate the appropriate "append"
			 * 'diff'-like output, which is of the form:
			 * 		nan, n
			 * where 'n' represents a line-number.
			 */
			addgen(&nline, &nbufsiz, pnewfile);
		}

		if (!nlp) {
			/*
			 * The entire new file is a prefix of the
			 * old file. Generate the appropriate "delete"
			 * 'diff'-like output, which is of the form:
			 * 		n, ndn
			 * where 'n' represents a line-number.
			 */
			delgen(&oline, &obufsiz, poldfile);
		}

		if (strcmp(olp, nlp) == 0)
			linenum++;
		else
			break;
	}

	/*
	 * Here, first 'linenum' lines are equal.
	 * The following while-loop segments both files into
	 * seglim segments, forks and executes 'diff' on the
	 * segments, and processes the resulting output of
	 * 'diff', which is read from a pipe.
	 */
	for (;;) {
		/* If both files are at EOF, everything is done. */
		if (!olp && !nlp)	/* finished */
			return (0);

		if (!olp) {
			/*
			 * Generate appropriate "append"
			 * output without executing 'diff'.
			 */
			addgen(&nline, &nbufsiz, pnewfile);
		}

		if (!nlp) {
			/*
			 * Generate appropriate "delete"
			 * output without executing 'diff'.
			 */
			delgen(&oline, &obufsiz, poldfile);
		}

		/*
		 * Create a temporary file to hold a segment
		 * from the old file, and write it.
		 */
		poldtemp = maket(otmp);
		otcnt = 0;
		while (olp && otcnt < seglim) {
			(void) fputs(oline, poldtemp);
			if (ferror(poldtemp) != 0) {
				fflags |= FTLMSG;
				fatal("Can not write to temporary file");
			}
			olp = readline(&oline, &obufsiz, poldfile);
			otcnt++;
		}
		(void) fclose(poldtemp);

		/*
		 * Create a temporary file to hold a segment
		 * from the new file, and write it.
		 */
		pnewtemp = maket(ntmp);
		ntcnt = 0;
		while (nlp && ntcnt < seglim) {
			(void) fputs(nline, pnewtemp);
			if (ferror(pnewtemp) != 0) {
				fflags |= FTLMSG;
				fatal("Can not write to temporary file");
			}
			nlp = readline(&nline, &nbufsiz, pnewfile);
			ntcnt++;
		}
		(void) fclose(pnewtemp);

		/* Create pipes and fork.  */
		if ((pipe(pfd)) == -1)
			fatal("Can not create pipe");
		if ((i = fork()) < (pid_t)0) {
			(void) close(pfd[0]);
			(void) close(pfd[1]);
			fatal("Can not fork, try again");
		} else if (i == (pid_t)0) {	/* child process */
			(void) close(pfd[0]);
			(void) close(1);
			(void) dup(pfd[1]);
			(void) close(pfd[1]);

			/* Execute 'diff' on the segment files. */
			(void) execlp(diff, diff, otmp, ntmp, 0);

			/*
			 * Exit code here must be > 1.
			 * Parent process treats exit code of 1 from the child
			 * as non-error because the child process "diff" exits
			 * with a status of 1 when a difference is encountered.
			 * The error here is a true error--the parent process
			 * needs to detect it and exit with a non-zero status.
			 */
			(void) close(1);
			(void) snprintf(Error, sizeof (Error),
			    "Can not execute '%s'", diff);
			fatal_num = 2;
			fatal(Error);
		} else {			/* parent process */
			(void) close(pfd[1]);
			pipeinp = fdopen(pfd[0], "r");

			/* Process 'diff' output. */
			while ((dp = readline(&diffline, &dbufsiz, pipeinp))) {
				if (isdigit(*dp))
					fixnum(diffline);
				else
					(void) printf("%s", diffline);
			}

			(void) fclose(pipeinp);

			/* EOF on pipe. */
			(void) wait(&status);
			if (status&~0x100) {
				(void) snprintf(Error, sizeof (Error),
				    "'%s' failed", diff);
				fatal(Error);
			}
		}
		linenum += seglim;

		/* Remove temporary files. */
		(void) unlink(otmp);
		(void) unlink(ntmp);
	}
}

/* Routine to save remainder of a file. */
static void
saverest(char **linep, size_t *bufsizp, FILE *iptr)
{
	char *lp;
	FILE *temptr;

	temptr = maket(tempfile);

	lp = *linep;

	while (lp) {
		(void) fputs(*linep, temptr);
		linenum++;
		lp = readline(linep, bufsizp, iptr);
	}
	(void) fclose(temptr);
}

/* Routine to write out data saved by 'saverest' and to remove the file. */
static void
putsave(char **linep, size_t *bufsizp, char type)
{
	FILE *temptr;

	if ((temptr = fopen(tempfile, "r")) == NULL) {
		(void) snprintf(Error, sizeof (Error),
		    "Can not open tempfile ('%s')", tempfile); fatal(Error);
	}

	while (readline(linep, bufsizp, temptr))
		(void) printf("%c %s", type, *linep);

	(void) fclose(temptr);

	(void) unlink(tempfile);
}

static void
fixnum(char *lp)
{
	offset_t num;

	while (*lp) {
		switch (*lp) {

		case 'a':
		case 'c':
		case 'd':
		case ',':
		case '\n':
			(void) printf("%c", *lp);
			lp++;
			break;

		default:
			lp = satoi(lp, &num);
			num += linenum;
			(void) printf("%lld", num);
		}
	}
}

static void
addgen(char **lpp, size_t *bufsizp, FILE *fp)
{
	offset_t oldline;
	(void) printf("%llda%lld", linenum, linenum+1);

	/* Save lines of new file. */
	oldline = linenum + 1;
	saverest(lpp, bufsizp, fp);

	if (oldline < linenum)
		(void) printf(",%lld\n", linenum);
	else
		(void) printf("\n");

	/* Output saved lines, as 'diff' would. */
	putsave(lpp, bufsizp, '>');

	exit(0);
}

static void
delgen(char **lpp, size_t *bufsizp, FILE *fp)
{
	offset_t savenum;

	(void) printf("%lld", linenum+1);
	savenum = linenum;

	/* Save lines of old file. */
	saverest(lpp, bufsizp, fp);

	if (savenum +1 != linenum)
		(void) printf(",%lldd%lld\n", linenum, savenum);
	else
		(void) printf("d%lld\n", savenum);

	/* Output saved lines, as 'diff' would.  */
	putsave(lpp, bufsizp, '<');

	exit(0);
}

static void
clean_up()
{
	(void) unlink(tempfile);
	(void) unlink(otmp);
	(void) unlink(ntmp);
}

static FILE *
maket(char *file)
{
	FILE *iop;
	int fd;

	(void) strcpy(file, tempskel);
	if ((fd = mkstemp(file)) == -1 ||
		(iop = fdopen(fd, "w+")) == NULL) {
		(void) snprintf(Error, sizeof (Error),
		    "Can not open/create temp file ('%s')", file);
		fatal(Error);
	}
	return (iop);
}

static void
fatal(char *msg)
/*
 *	General purpose error handler.
 *
 *	The argument to fatal is a pointer to an error message string.
 *	The action of this routine is driven completely from
 *	the "fflags" global word (see <fatal.h>).
 *
 *	The FTLMSG bit controls the writing of the error
 *	message on file descriptor 2.  A newline is written
 *	after the user supplied message.
 *
 *	If the FTLCLN bit is on, clean_up is called.
 */
{
	if (fflags & FTLMSG)
		(void) fprintf(stderr, "%s: %s\n", prognam, msg);
	if (fflags & FTLCLN)
		clean_up();
	if (fflags & FTLEXIT)
		exit(fatal_num);
}

static void
setsig()
/*
 *	General-purpose signal setting routine.
 *	All non-ignored, non-caught signals are caught.
 *	If a signal other than hangup, interrupt, or quit is caught,
 *	a "user-oriented" message is printed on file descriptor 2.
 *	If hangup, interrupt or quit is caught, that signal
 *	is set to ignore.
 *	Termination is like that of "fatal",
 *	via "clean_up()"
 */
{
	void (*act)(int);
	int j;

	for (j = 1; j < ONSIG; j++) {
		act = signal(j, setsig1);
		if (act == SIG_ERR)
			continue;
		if (act == SIG_DFL)
			continue;
		(void) signal(j, act);
	}
}

static void
setsig1(int sig)
{

	(void) signal(sig, SIG_IGN);
	clean_up();
	exit(1);
}

static char *
satoi(char *p, offset_t *ip)
{
	offset_t sum;

	sum = 0;
	while (isdigit(*p))
		sum = sum * 10 + (*p++ - '0');
	*ip = sum;
	return (p);
}

/*
 * Read a line of data from a file.  If the current buffer is not large enough
 * to contain the line, double the size of the buffer and continue reading.
 * Loop until either the entire line is read or until there is no more space
 * to be malloc'd.
 */

static char *
readline(char **bufferp, size_t *bufsizp, FILE *filep)
{
	char *bufp;
	size_t newsize;		/* number of bytes to make buffer */
	size_t oldsize;

	(*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */
	(*bufferp)[*bufsizp - 2] = ' ';	/* arbitrary non-newline char */
	bufp = fgets(*bufferp, *bufsizp, filep);
	if (bufp == NULL)
		return (bufp);
	while ((*bufferp)[*bufsizp -1] == '\0' &&
	    (*bufferp)[*bufsizp - 2] != '\n' &&
	    strlen(*bufferp) == *bufsizp - 1) {
		newsize = 2 * (*bufsizp);
		bufp = (char *)realloc((void *)*bufferp, newsize);
		if (bufp == NULL)
			fatal("Out of memory");
		oldsize = *bufsizp;
		*bufsizp = newsize;
		*bufferp = bufp;
		(*bufferp)[*bufsizp - 1] = '\t';
		(*bufferp)[*bufsizp - 2] = ' ';
		bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep);
		if (bufp == NULL) {
			if (filep->_flag & _IOEOF) {
				bufp = *bufferp;
				break;
			} else
				fatal("Read error");
		} else
			bufp = *bufferp;
	}
	return (bufp);
}