Ultrix-3.1/src/cmd/csplit.c

Compare this file to the similar file:
Show the results in this format:


/**********************************************************************
 *   Copyright (c) Digital Equipment Corporation 1984, 1985, 1986.    *
 *   All Rights Reserved. 					      *
 *   Reference "/usr/src/COPYRIGHT" for applicable restrictions.      *
 **********************************************************************/

static char Sccsid[] = "@(#)csplit.c 3.0 4/21/86";

/* System 5 static char sccsid[] = "@(#)csplit.c	1.6";*/
/*
*	csplit - Context or line file splitter
*	Compile: cc -O -s -o csplit csplit.c
*/

#include <stdio.h>
#include <errno.h>
#include <signal.h>

#define LAST	0L
#define ERR	-1
#define FALSE	0
#define TRUE	1
#define EXPMODE	2
#define LINMODE	3
#define EXPSIZ	128
#define	LINSIZ	256
#define MAXFLS	99

	/* Globals */

char *strrchr();
char linbuf[LINSIZ];		/* Input line buffer */
char expbuf[EXPSIZ];		/* Compiled expression buffer */
char tmpbuf[BUFSIZ];		/* Temporary buffer for stdin */
char file[15] = "xx";		/* File name buffer */
char *targ;			/* Arg ptr for error messages */
char *sptr;
FILE *infile, *outfile;		/* I/O file streams */
int silent, keep, create;	/* Flags: -s(ilent), -k(eep), (create) */
int errflg;
extern int optind;
extern char *optarg;
long offset;			/* Regular expression offset value */
long curline;			/* Current line in input file */

/*
*	These defines are needed for regexp handling (see regexp(7))
*/
#define INIT		register char *ptr = ++instring;
#define GETC()		(*ptr++)
#define PEEKC()		(*ptr)
#define UNGETC(c)	(--ptr)
#define RETURN()	return;
#define ERROR()		fatal("%s: Illegal Regular Expression\n",targ);

#include <regexp.h>

main(argc,argv)
int argc;
char **argv;
{
	FILE *getfile();
	int ch, mode, sig();
	char *ptr;
	char *getline();
	long findline();

	while((ch=getopt(argc,argv,"skf:")) != EOF) {
		switch(ch) {
			case 'f':
				strcpy(file,optarg);
				if((ptr=strrchr(optarg,'/')) == NULL)
					ptr = optarg;
				else
					ptr++;

				if(strlen(ptr) > 12)
					fatal("Prefix %s too long\n",ptr);
				break;
			case 's':
				silent++;
				break;
			case 'k':
				keep++;
				break;
			case '?':
				errflg++;
		}
	}

	argv = &argv[optind];
	argc -= optind;
	if(argc <= 1 || errflg)
		fatal("Usage: csplit [-s] [-k] [-f prefix] file args ...\n",NULL);
	if(strcmp(*argv, "-") == 0) {
		infile = tmpfile();

		while(fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
			if(fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
				if(errno == ENOSPC) {
					fprintf(stderr, "csplit: No space left on device\n");
					exit(1);
				}else{
					fprintf(stderr, "csplit: Bad write to temporary file\n");
					exit(1);
				}
		}
		rewind(infile);
	}
	else if((infile = fopen(*argv,"r")) == NULL)
		fatal("Cannot open %s\n", *argv);
	++argv;
	curline = 1L;
	signal(SIGINT,sig);

	/*
	*	The following for loop handles the different argument types.
	*	A switch is performed on the first character of the argument
	*	and each case calls the appropriate argument handling routine.
	*/

	for(; *argv; ++argv) {
		targ = *argv;
		switch(**argv) {
		case '/':
			mode = EXPMODE;
			create = TRUE;
			re_arg(*argv);
			break;
		case '%':
			mode = EXPMODE;
			create = FALSE;
			re_arg(*argv);
			break;
		case '{':
			num_arg(*argv,mode);
			mode = FALSE;
			break;
		default:
			mode = LINMODE;
			create = TRUE;
			line_arg(*argv);
			break;
		}
	}
	create = TRUE;
	to_line(LAST);
}

/*
*	Atol takes an ascii argument (str) and converts it to a long (plc)
*	It returns ERR if an illegal character.  The reason that atol
*	does not return an answer (long) is that any value for the long
*	is legal, and this version of atol detects error strings.
*/

atol(str,plc)
register char *str;
long *plc;
{
	register int f;
	*plc = 0;
	f = 0;
	for(;;str++) {
		switch(*str) {
		case ' ':
		case '\t':
			continue;
		case '-':
			f++;
		case '+':
			str++;
		}
		break;
	}
	for(; *str != NULL; str++)
		if(*str >= '0' && *str <= '9')
			*plc = *plc * 10 + *str - '0';
		else
			return(ERR);
	if(f)
		*plc = -(*plc);
	return(TRUE);	/* not error */
}

/*
*	Closefile prints the byte count of the file created, (via fseek
*	and ftell), if the create flag is on and the silent flag is not on.
*	If the create flag is on closefile then closes the file (fclose).
*/

closefile()
{
	long ftell();

	if(!silent && create) {
		fseek(outfile,0L,2);
		fprintf(stdout,"%ld\n",ftell(outfile));
	}
	if(create)
		fclose(outfile);
}

/*
*	Fatal handles error messages and cleanup.
*	Because "arg" can be the global file, and the cleanup processing
*	uses the global file, the error message is printed first.  If the
*	"keep" flag is not set, fatal unlinks all created files.  If the
*	"keep" flag is set, fatal closes the current file (if there is one).
*	Fatal exits with a value of 1.
*/

fatal(string,arg)
char *string, *arg;
{
	register char *fls;
	register int num;

	fprintf(stderr,string,arg);
	if(!keep) {
		if(outfile) {
			fclose(outfile);
			for(fls=file; *fls != NULL; fls++);
			fls -= 2;
			for(num=atoi(fls); num >= 0; num--) {
				sprintf(fls,"%.02d",num);
				unlink(file);
			}
		}
	} else
		if(outfile)
			closefile();
	exit(1);
}

/*
*	Findline returns the line number referenced by the current argument.
*	Its arguments are a pointer to the compiled regular expression (expr),
*	and an offset (oset).  The variable lncnt is used to count the number
*	of lines searched.  First the current stream location is saved via
*	ftell(), and getline is called so that R.E. searching starts at the
*	line after the previously referenced line.  The while loop checks
*	that there are more lines (error if none), bumps the line count, and
*	checks for the R.E. on each line.  If the R.E. matches on one of the
*	lines the old stream location is restored, and the line number
*	referenced by the R.E. and the offset is returned.
*/

long findline(expr,oset)
register char *expr;
long oset;
{
	static int benhere;
	long lncnt = 0, saveloc, ftell();

	saveloc = ftell(infile);
	if(curline != 1L || benhere)		/* If first line, first time, */
		getline(FALSE);			/* then don't skip */
	else
		lncnt--;
	benhere = 1;
	while(getline(FALSE) != NULL) {
		lncnt++;
		if((sptr=strrchr(linbuf,'\n')) != NULL)
			*sptr = '\0';
		if(step(linbuf,expr)) {
			fseek(infile,saveloc,0);
			return(curline+lncnt+oset);
		}
	}
	fseek(infile,saveloc,0);
	return(curline+lncnt+oset+2);
}

/*
*	Flush uses fputs to put lines on the output file stream (outfile)
*	Since fputs does its own buffering, flush doesn't need to.
*	Flush does nothing if the create flag is not set.
*/

flush()
{
	if(create)
		fputs(linbuf,outfile);
}

/*
*	Getfile does nothing if the create flag is not set.  If the
*	create flag is set, getfile positions the file pointer (fptr) at
*	the end of the file name prefix on the first call (fptr=0).
*	Next the file counter (ctr) is tested for MAXFLS, fatal if too
*	many file creations are attempted.  Then the file counter is
*	stored in the file name and incremented.  If the subsequent
*	fopen fails, the file name is copied to tfile for the error
*	message, the previous file name is restored for cleanup, and
*	fatal is called.  If the fopen succecedes, the stream (opfil)
*	is returned.
*/

FILE *getfile()
{
	static char *fptr;
	static int ctr;
	FILE *opfil;
	char tfile[15];

	if(create) {
		if(fptr == 0)
			for(fptr = file; *fptr != NULL; fptr++);
		if(ctr > MAXFLS)
			fatal("100 file limit reached at arg %s\n",targ);
		sprintf(fptr,"%.02d",ctr++);
		if((opfil = fopen(file,"w")) == NULL) {
			strcpy(tfile,file);
			sprintf(fptr,"%.02d",(ctr-2));
			fatal("Cannot create %s\n",tfile);
		}
		return(opfil);
	}
	return(NULL);
}

/*
*	Getline gets a line via fgets from the input stream "infile".
*	The line is put into linbuf and may not be larger than LINSIZ.
*	If getline is called with a non-zero value, the current line
*	is bumped, otherwise it is not (for R.E. searching).
*/

char *getline(bumpcur)
int bumpcur;
{
	char *ret;
	if(bumpcur)
		curline++;
	ret=fgets(linbuf,LINSIZ,infile);
	return(ret);
}

/*
*	Line_arg handles line number arguments.
*	line_arg takes as its argument a pointer to a character string
*	(assumed to be a line number).  If that character string can be
*	converted to a number (long), to_line is called with that number,
*	otherwise error.
*/

line_arg(line)
char *line;
{
	long to;

	if(atol(line,&to) == ERR)
		fatal("%s: bad line number\n",line);
	to_line(to);
}

/*
*	Num_arg handles repeat arguments.
*	Num_arg copies the numeric argument to "rep" (error if number is
*	larger than 11 characters or } is left off).  Num_arg then converts
*	the number and checks for validity.  Next num_arg checks the mode
*	of the previous argument, and applys the argument the correct number
*	of times. If the mode is not set properly its an error.
*/

num_arg(arg,md)
register char *arg;
int md;
{
	long repeat, toline;
	char rep[12];
	register char *ptr;

	ptr = rep;
	for(++arg; *arg != '}'; arg++) {
		if(ptr == &rep[11])
			fatal("%s: Repeat count too large\n",targ);
		if(*arg == NULL)
			fatal("%s: missing '}'\n",targ);
		*ptr++ = *arg;
	}
	*ptr = NULL;
	if((atol(rep,&repeat) == ERR) || repeat < 0L)
		fatal("Illegal repeat count: %s\n",targ);
	if(md == LINMODE) {
		toline = offset = curline;
		for(;repeat > 0L; repeat--) {
			toline += offset;
			to_line(toline);
		}
	} else	if(md == EXPMODE)
			for(;repeat > 0L; repeat--)
				to_line(findline(expbuf,offset));
		else
			fatal("No operation for %s\n",targ);
}

/*
*	Re_arg handles regular expression arguments.
*	Re_arg takes a csplit regular expression argument.  It checks for
*	delimiter balance, computes any offset, and compiles the regular
*	expression.  Findline is called with the compiled expression and
*	offset, and returns the corresponding line number, which is used
*	as input to the to_line function.
*/

re_arg(string)
char *string;
{
	register char *ptr;
	register char ch;

	ch = *string;
	ptr = string;
	while(*(++ptr) != ch) {
		if(*ptr == '\\')
			++ptr;
		if(*ptr == NULL)
			fatal("%s: missing delimiter\n",targ);
	}
	if(atol(++ptr,&offset) == ERR)
		fatal("%s: illegal offset\n",string);
	compile(string, expbuf, &expbuf[EXPSIZ], ch);
	to_line(findline(expbuf,offset));
}

/*
*	Sig handles breaks.  When a break occurs the signal is reset,
*	and fatal is called to clean up and print the argument which
*	was being processed at the time the interrupt occured.
*/

sig()
{
	signal(SIGINT,sig);
	fatal("Interrupt - program aborted at arg '%s'\n",targ);
}

/*
*	To_line creates split files.
*	To_line gets as its argument the line which the current argument
*	referenced.  To_line calls getfile for a new output stream, which
*	does nothing if create is False.  If to_line's argument is not LAST
*	it checks that the current line is not greater than its argument.
*	While the current line is less than the desired line to_line gets
*	lines and flushes (error if EOF is reached).
*	If to_line's argument is LAST, it checks for more lines, and gets
*	and flushes lines till the end of file.
*	Finally, to_line calls closefile to close the output stream.
*/

to_line(ln)
long ln;
{
	outfile = getfile();
	if(ln != LAST) {
		if(curline > ln)
			fatal("%s - out of range\n",targ);
		while(curline < ln) {
			if(getline(TRUE) == NULL)
				fatal("%s - out of range\n",targ);
			flush();
		}
	} else		/* last file */
		if(getline(TRUE) != NULL) {
			flush();
			while(TRUE) {
				if(getline(TRUE) == NULL)
					break;
				flush();
			}
		} else
			fatal("%s - out of range\n",targ);
	closefile();
}