OpenSolaris_b135/cmd/refer/sortbib.c

Compare this file to the similar file:
Show the results in this format:

/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
/*	  All Rights Reserved  	*/

/*
 * Copyright (c) 1980 Regents of the University of California.
 * All rights reserved. The Berkeley software License Agreement
 * specifies the terms and conditions for redistribution.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <locale.h>
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>

#define	BUF BUFSIZ
#define	MXFILES 16

char tempfile[32];		/* temporary file for sorting keys */
int tmpfd = -1;
char *keystr = "AD";		/* default sorting on author and date */
int multauth = 0;		/* by default sort on senior author only */
int oneauth;			/* has there been author in the record? */

static int article(char *);
static void deliver(FILE *[], FILE *);
static int endcomma(char *);
static void error(char *);
static void eval(char []);
static void parse(char [], char fld[][BUF]);
static void sortbib(FILE *, FILE *, int);
static void onintr(void);

/* sortbib: sort bibliographic database in place */
int
main(int argc, char *argv[])
{
	FILE *fp[MXFILES], *tfp;
	int i;

	(void) setlocale(LC_ALL, "");

#if !defined(TEXT_DOMAIN)
#define	TEXT_DOMAIN "SYS_TEST"
#endif
	(void) textdomain(TEXT_DOMAIN);

	if (argc == 1) {		/* can't use stdin for seeking anyway */
		puts(gettext("Usage:  sortbib [-sKEYS] database [...]\n\
\t-s: sort by fields in KEYS (default is AD)"));
		exit(1);
	}
	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') {
		/* if a key is specified use it, otherwise use default key */
		if (argv[1][2] != '\0')
			keystr = argv[1] + 2;
		eval(keystr);		/* evaluate A+ for multiple authors */
		argv++; argc--;
	}
	if (argc > MXFILES+1) {	/* too many open file streams */
		fprintf(stderr,
		gettext("sortbib: More than %d databases specified\n"),
		    MXFILES);
		exit(1);
	}
	for (i = 1; i < argc; i++)		/* open files in arg list */
		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
			error(argv[i]);
	strcpy(tempfile, "/tmp/SbibXXXXXX");	/* tempfile for sorting keys */
	if ((tmpfd = mkstemp(tempfile)) == -1)
		error(tempfile);

	(void) close(tmpfd);
	if (signal(SIGINT, SIG_IGN) != SIG_IGN)	/* remove if interrupted */
		signal(SIGINT, (void(*)())onintr);
	if ((tfp = fopen(tempfile, "w")) == NULL) {
		(void) unlink(tempfile);
		error(tempfile);
	}
	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
		sortbib(fp[i], tfp, i);
	fclose(tfp);
	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
	(void) unlink(tempfile);
	return (0);
}

int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */

/* read records, prepare list for sorting */
static void
sortbib(FILE *fp, FILE *tfp, int i)
{
	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
	char line[BUF], fld[4][BUF];		/* one line, the sort fields */

	/* measure byte offset, then get new line */
	while (offset = ftell(fp), fgets(line, BUF, fp)) {
		if (recno == 0)		/* accept record w/o initial newline */
			newrec = 1;
		if (line[0] == '\n') {	/* accept null line record separator */
			if (!rsmode)
				rsmode = 1;	/* null line mode */
			if (rsmode == 1)
				newrec = 1;
		}
		if (line[0] == '.' && line[1] == '[') {	/* also accept .[ .] */
			if (!rsmode)
				rsmode = 2;	/* bracket pair mode */
			if (rsmode == 2)
				newrec = 1;
		}
		if (newrec) {		/* by whatever means above */
			newrec = 0;
			length = offset - lastoffset;	/* measure rec len */
			if (length > BUF*8) {
				fprintf(stderr,
				gettext("sortbib: record %d longer than %d "
				    "(%d)\n"), recno, BUF*8, length);
				(void) unlink(tempfile);
				exit(1);
			}
			if (recno++) {			/* info for sorting */
				fprintf(tfp, "%d %d %d : %s %s %s %s\n",
				    i, lastoffset, length,
				    fld[0], fld[1], fld[2], fld[3]);
				if (ferror(tfp)) {
					(void) unlink(tempfile);
					error(tempfile);
				}
			}
			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
			oneauth = 0;		/* reset number of authors */
			lastoffset = offset;	/* save for next time */
		}
		if (line[0] == '%')	/* parse out fields to be sorted */
			parse(line, fld);
	}
	offset = ftell(fp);		/* measure byte offset at EOF */
	length = offset - lastoffset;	/* measure final record length */
	if (length > BUF*8) {
		fprintf(stderr,
		    gettext("sortbib: record %d longer than %d (%d)\n"),
		    recno, BUF*8, length);
		(void) unlink(tempfile);
		exit(1);
	}
	if (line[0] != '\n') {		/* ignore null line just before EOF */
		fprintf(tfp, "%d %d %d : %s %s %s %s\n",
		    i, lastoffset, length, fld[0], fld[1], fld[2], fld[3]);
		if (ferror(tfp)) {
			(void) unlink(tempfile);
			error(tempfile);	/* disk error in /tmp */
		}
	}
}

/* deliver sorted entries out of database(s) */
static void
deliver(FILE *fp[], FILE *tfp)
{
	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
	char cmd[80];			/* for using system sort command */
	long int offset;
	int i, length;

	/* when sorting, ignore case distinctions; tab char is ':' */
	sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile);
	if (system(cmd) == 127) {
		(void) unlink(tempfile);
		error("sortbib");
	}
	tfp = fopen(tempfile, "r");
	while (fgets(str, sizeof (str), tfp)) {
		/* get file pointer, record offset, and length */
		if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3)
			error(gettext("sortbib: sorting error"));
		/* seek to proper disk location in proper file */
		if (fseek(fp[i], offset, 0) == -1) {
			(void) unlink(tempfile);
			error("sortbib");
		}
		/* read exactly one record from bibliography */
		if (fread(buff, sizeof (*buff), length, fp[i]) == 0) {
			(void) unlink(tempfile);
			error("sortbib");
		}
		/* add newline between unseparated records */
		if (buff[0] != '\n' && rsmode == 1)
			putchar('\n');
		/* write record buffer to standard output */
		if (fwrite(buff, sizeof (*buff), length, stdout) == 0) {
			(void) unlink(tempfile);
			error("sortbib");
		}
	}
}

/* get fields out of line, prepare for sorting */
static void
parse(char line[], char fld[][BUF])
{
	char wd[8][BUF/4], *strcat();
	int n, i, j;

	for (i = 0; i < 8; i++)		/* zap out old strings */
		*wd[i] = NULL;
	n = sscanf(line, "%s %s %s %s %s %s %s %s",
	    wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
	for (i = 0; i < 4; i++) {
		if (wd[0][1] == keystr[i]) {
			if (wd[0][1] == 'A') {
				if (oneauth && !multauth)	/* no repeat */
					break;
				else if (oneauth)		/* mult auths */
					strcat(fld[i], "~~");
				if (!endcomma(wd[n-2]))		/* surname */
					strcat(fld[i], wd[n-1]);
				else {				/* jr. or ed. */
					strcat(fld[i], wd[n-2]);
					n--;
				}
				strcat(fld[i], " ");
				for (j = 1; j < n-1; j++)
					strcat(fld[i], wd[j]);
				oneauth = 1;
			} else if (wd[0][1] == 'D') {
				strcat(fld[i], wd[n-1]);	/* year */
				if (n > 2)
					strcat(fld[i], wd[1]);	/* month */
			} else if (wd[0][1] == 'T' || wd[0][1] == 'J') {
				j = 1;
				if (article(wd[1]))	/* skip article */
					j++;
				for (; j < n; j++)
					strcat(fld[i], wd[j]);
			} else  /* any other field */
				for (j = 1; j < n; j++)
					strcat(fld[i], wd[j]);
		}
		/* %Q quorporate or queer author - unreversed %A */
		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
			for (j = 1; j < n; j++)
				strcat(fld[i], wd[j]);
	}
}

/* see if string contains an article */
static int
article(char *str)
{
	if (strcmp("The", str) == 0)	/* English */
		return (1);
	if (strcmp("A", str) == 0)
		return (1);
	if (strcmp("An", str) == 0)
		return (1);
	if (strcmp("Le", str) == 0)	/* French */
		return (1);
	if (strcmp("La", str) == 0)
		return (1);
	if (strcmp("Der", str) == 0)	/* German */
		return (1);
	if (strcmp("Die", str) == 0)
		return (1);
	if (strcmp("Das", str) == 0)
		return (1);
	if (strcmp("El", str) == 0)	/* Spanish */
		return (1);
	if (strcmp("Den", str) == 0)	/* Scandinavian */
		return (1);
	return (0);
}

/* evaluate key string for A+ marking */
static void
eval(char keystr[])
{
	int i, j;

	for (i = 0, j = 0; keystr[i]; i++, j++) {
		if (keystr[i] == '+') {
			multauth = 1;
			i++;
		}
		if (keystr[i] == NULL)
			break;
		keystr[j] = keystr[i];
	}
	keystr[j] = NULL;
}

/* exit in case of various system errors */
static void
error(char *s)
{
	perror(s);
	exit(1);
}

/* remove tempfile in case of interrupt */
static void
onintr(void)
{
	fprintf(stderr, gettext("\nInterrupt\n"));
	unlink(tempfile);
	exit(1);
}

static int
endcomma(char *str)
{
	int n;

	n = strlen(str) - 1;
	if (str[n] == ',') {
		str[n] = NULL;
		return (1);
	}
	return (0);
}