Ultrix-3.1/src/cmd/ltf/filetype.c

Compare this file to the similar file:
Show the results in this format:


/**********************************************************************
 *   Copyright (c) Digital Equipment Corporation 1984, 1985, 1986.    *
 *   All Rights Reserved. 					      *
 *   Reference "/usr/src/COPYRIGHT" for applicable restrictions.      *
 **********************************************************************/

#ifndef lint
static	char	*sccsid = "@(#)filetype.c	3.0	(ULTRIX)	4/21/86";
#endif	lint

/**/
/*
 *
 *	File name:
 *
 *	    filetype.c
 *
 *	Source file description:
 *
 *		This file contains logic to determine the
 *		general type of file specified by the caller.
 *		ie. Is it binary, text, library, etc..
 *
 *		(further documentation below)
 *
 *		It is used by the Labeled Tape Facility (LTF)
 *		in order to determine the type that the Unix
 *		file should take on whence it becomes an
 *		ANSI tape file.
 *
 *
 *	Functions:
 *
 *	ascom()		...
 *	ccom()		...
 *	english()	...
 *	Filetype()	Top level logic to determine file type.
 *	lookupe()	...
 *	troffint()	Check for  troff intermediate file
 *
 *
 *	Compile:
 *
 *	    cc -O -c filetype.c		 <- For Ultrix-32/32m
 *
 *	    cc CFLAGS=-DU11-O filetype.c <- For Ultrix-11
 *
 *
 *	Modification history:
 *	~~~~~~~~~~~~~~~~~~~~
 *
 *	revision			comments
 *	--------	-----------------------------------------------
 *	  01.0		09-April-85	Ray Glaser
 *			Create orginal version.
 *	
 */
/**/
/*
 * ->	Local includes
 */

#include "ltfdefs.h"	/* Common LTF definitions */

/*
 * ->	Local defines  "required for / defined by"  this module
 */

#define	MBUFSIZ	1024	/* Chunk of file to read for "typing" */
#define NL	012	/* Newline character */
#define	NLM	123	/* Used to check for 'c' code */

/*
 * ->	Globals for this module only
 */

char	*as[] = {
	"globl","byte","align","text","data","comm",0};

char	*asc[] = {
	"chmk","mov","tst","clr","jmp",0};

char	buf[MBUFSIZ];

char	*c[] = {
	"int","char","float","double","struct","extern",0};

char	*com[] = { "alias",
	"date","cc","xref","pr","pr50","CFLAGS","lpr","rm","FILES",
	"tar","mdtar","sync","make","/etc","/bin","for","case",
	"echo","do","umask","set","stty","setenv",
	00 };

int	errno;
char	*fort[] = {
	"function","subroutine","common","dimension","block","integer",
	"real","data","double",0};

int	i  = 0;
FILE	*ifile;
int	in;
char	*sys_errlist[];
char	*troff[] = {	/* new troff intermediate lang */
	"x","T","res","init","font","202","V0","p1",0};

int	Type;	/* Tentative file type value */
int	x;
/**/
/*
 *
 * Function:
 *
 *	Filetype
 *
 * Function Description:
 *
 *	This function attempts to determine the type of Unix disk file.
 *	It returns a generalized file type indication as defined in
 *	the include file "filetypes.h".
 *
 * Arguments:
 *
 *
 *	char	*file		Points to a null terminated string
 *				assumed to be the desired file name
 *				to be typed.
 *
 *	char	*caller		Points to a string defining the desired
 *				routine name for messages to stderr.
 *
 * Return value(s):
 *
 *	int	value
 *
 *	Zero if the file type could not be determined, else:
 *	one of the file types defined in "filetypes.h".
 *	Additionally, the character string variables "Tftypes"
 *	is filled with a 3 character representation of the
 *	true Ultrix disk file type. (see README.1 file).
 *	
 * 
 * Side Effects:
 *
 *	This function outputs error messages to stderr if any problems
 *	occur during the attempt to access/type the given file.
 *
 */

/**/
/*
 *	FILETYPE	Determination thereof..
 */

Filetype(file,caller)

	char	*caller;
	char	*file;

{
/*
 * ->	Local variables
 */

int	nl;
struct stat	mbuf;

/*------*\
   Code
\*------*/

strcpy(Tftypes,"???");	/* Assume we can't determine file type */
mbuf = Inode;
/* Check if this is a file that should not be open
*/
switch (mbuf.st_mode & S_IFMT) {
	case S_IFCHR:	/* Charater special */
		strcpy(Tftypes,"csp");
		return(CHCTRSP);

	case S_IFBLK:	/* Block special */
		strcpy(Tftypes,"bsp");
		return(BLKSP);

	case S_IFIFO:	/* Fifo - pipe */
		strcpy(Tftypes,"pip");
		return(CHCTRSP);
#ifndef U11
	case S_IFSOCK:	/* Socket ! */
		strcpy(Tftypes,"soc");
		return(SOCKET);
#endif
}/*E switch mbuf.st_mode & S_IFMT */
if((ifile = fopen(file, "r")) == NULL) {
	PERROR "\n%s: %s %s\n", caller, CANTOPW, file);
	perror(caller);
	return(EOF);
}
switch (mbuf.st_mode & S_IFMT) {

	case S_IFLNK:	/* Symbolic link */
		strcpy(Tftypes,"sym");
		fclose(ifile);
		return(SYMLNK);

	case S_IFDIR:	/* Directory */
		strcpy(Tftypes,"dir");
		fclose(ifile);
		return(DIRECT);

}/*E switch mbuf.st_mode & S_IFMT */

/*
 *	Read in a MBUFSIZ amount of data from the file for
 *	further examination.
 */
if ((in = read(fileno(ifile), buf, MBUFSIZ)) <= 0)
    if (in < 0) {
	PERROR "\n%s: %s %s\n", caller, CANTRD, file);
	perror(caller);
	exit(FAIL);
    }
    else { 
	fclose(ifile);
	strcpy(Tftypes,"nul");
	return(EMPTY);	/* File appears to be empty */
    }
/*
 *	This check is looking for files that are used as output
 *	of ltf instead of using tape.  NOTE:  This is bogus as
 *	the rest of all these checks are.  This is the bare 
 *	minimum of checking.  Should check not only for VOL1,
 *	but also HDR1, etc., tape mark, then EOF1, etc., tape
 *	mark.  Thus this checks for VOL1 in the first 4 characters,
 *	and makes the file type binary.
 */
if (!strncmp(buf, "VOL1", 4)) {
	fclose(ifile);
	strcpy(Tftypes,"bin");
	return(BINARY);	/* Data file of some type */
}
switch(*(int *)buf) {

	case 0407:	/* Old impure Format on Ultrix-11 should
			 * be PDP-11 normal (I space only).
			 * On Ultrix-32 - seems to indicate
			 * a  .o  (object) file. */

		fclose(ifile);
#ifdef U11
		/* check to see if relocation info is stripped */
		if (((int *)buf)[7])
		    strcpy(Tftypes,"exe");
		else
#endif
		    strcpy(Tftypes,"bin");
		return(BINARY);

	case 0410:	/* Read-only/shared text */
	case 0413:	/* Demand Load Format */
#ifdef U11
	case 0401:	/* PDP-11 Standalone executable */
	case 0411:	/* PDP-11 Separated I & D */
	case 0430:	/* PDP-11 7 Overlays */
	case 0431:	/* PDP-11 7 Overlays */
	case 0450:	/* PDP-11 15 Overlays */
	case 0451:	/* PDP-11 15 Overlays */
#endif
		fclose(ifile);
		strcpy(Tftypes,"exe");
		return(BINARY);	

	case 0177555:	/* Very old archive */
	case 0177545:	/* Old archive */
		fclose(ifile);
		strcpy(Tftypes,"oar");
		return(BINARY);	

	case 070707:	/* CPIO data */
		fclose(ifile);
		strcpy(Tftypes,"cpi");
		return(CPIO);

}/*E switch(*(int *)buf) */

if(strncmp(buf, "!<arch>\n__.SYMDEF", 17) == 0 ) {

	fclose(ifile);
	strcpy(Tftypes,"arl");
	return(BINARY);	/* Archive Random Library */
}
if (strncmp(buf, "!<arch>\n", 8)==0) {

	fclose(ifile);
	strcpy(Tftypes,"arc");
	return(BINARY);	/* Archive */
}
if (mbuf.st_size % 512 == 0) {	/* it may be a PRESS file */
	lseek(ifile, -512L, 2);	/* last block */
	if ((in = read(fileno(ifile), buf, MBUFSIZ)) <= 0) {
	    if (in < 0) {
		PERROR "\n%s: %s %s\n", caller, CANTRD, file);
		perror(caller);
		exit(FAIL);
	    }
	} 
	if (in > 0 && *(int *)buf == 12138) {
		fclose(ifile);
		strcpy(Tftypes,"cmp");
		return(BINARY);	/* Press file ..*/
	}
}/*E if mbuf.st_size ..*/

/*
 * See if it looks like a command file.
 */
i = 0;
while(buf[i] == ' ' || buf[i] == '#' || buf[i] == '!') {
	while(buf[i++] != '\n')
		if(i >= in)	goto notcom;

}/*E while buf[i] ..*/

#if 0 /*for debugging*/
printf("\ngoing to lookupe \n");
for (x=i;x<i+80;x++)
  printf("%c",buf[x]);
#endif 0

if (lookupe(com)==1) {
	Type = TEXT;
	strcpy(Tftypes,"com");
	goto outa;
}

notcom:
i = 0;
if (!ccom()) {
	goto notc;
}

while(buf[i] == '#') {
	j = i;
	while(buf[i++] != '\n') {
		if(i - j > 255) {
			fclose(ifile);
			strcpy(Tftypes,"adf");
			return(BINARY);	/* Data file of some type */
		}
		if(i >= in) {
			goto notc;
		}
	}/* while buf[i++] ...*/

	if(!ccom()) {
		goto notc;
	}
}/*E while buf[i] ..*/

/*
 */
check:

if(lookupe(c) == 1) {
	while((ch = buf[i++]) != ';' && ch != '{') if(i >= in) {
		goto notc;
	}
	strcpy(Tftypes,"cc ");
	Type = TEXT;	/* 'C' program text */
	goto outa;

}/*E if lookupe(c) */

nl = 0;

while(buf[i] != '(') {
	if(buf[i] <= 0)
		goto notas;
	if(buf[i] == ';') {
		i++; 
		goto check; 
	}
	if(buf[i++] == '\n')
		if(nl++ > NLM) {
			goto notc;
		}
		if(i >= in) {
			goto notc;
		}

}/*E while(buf[i] ..*/

while(buf[i] != ')') {
	if(buf[i++] == '\n')
		if(nl++ > NLM) {
			goto notc;
		}
		if(i >= in) {
			goto notc;
		}

}/*E while buf[i] ..*/

while(buf[i] != '{') {
	if(buf[i++] == '\n')
		if(nl++ > NLM) {
			goto notc;
		}
		if(i >= in) {
			goto notc;
		}

}/*E while buf[i] ..*/

strcpy(Tftypes,"cc ");
Type = TEXT;	/* 'C' program text */
goto outa;

/*
 */
notc:

i = 0;
while(buf[i] == 'c' || buf[i] == '#') {

	while(buf[i++] != '\n')
		if(i >= in)	goto notfort;

}/*E while buf[i] ..*/

if(lookupe(fort) == 1) {
	strcpy(Tftypes,"for");
	Type = TEXT;	/* Fortran program text */
	goto outa;

}/*E if lookupe fort */

/*
 */
notfort:
i=0;
if (!ascom()) goto notas;

j = i-1;
if (buf[i] == '.') {
	i++;
	if(lookupe(as) == 1) {
		strcpy(Tftypes,"asm");
		Type = TEXT;	/* Assembler program text */
		goto outa;

	}/*T if lookupe as .. */
	else	if(buf[j] == '\n' && isalpha(buf[j+2])) {

			strcpy(Tftypes,"rof");
			Type = TEXT;	/* roff, nroff or eqn
					 * input text.
					 */
			goto outa;
		}
}/*E if buf[i] ..*/

while (!lookupe(asc)) {
	if (!ascom()) goto notas;

	while(buf[i] != '\n' && buf[i++] != ':')
		if(i >= in) goto notas;

	while(buf[i] == '\n' || buf[i] == ' ' || buf[i] == '\t'
		|| buf[i] == '\f' || buf[i] == NL)
		if(i++ >= in) goto notas;

	j = i-1;
	if(buf[i] == '.') {
		i++;
		if(lookupe(as) == 1) {

			strcpy(Tftypes,"asm");
			Type = TEXT; /* ASM program text */
			goto outa;
		}
		else if(buf[j] == '\n' && isalpha(buf[j+2])) {

				strcpy(Tftypes,"rof");
				Type = TEXT;	/* roff, nroff, or
						 * eqn input text.
						 */
				goto outa;
		}
	}/*E if buf[i] == . */
}/* while lookupe(asc) ..*/

strcpy(Tftypes,"asm");
Type = TEXT;	/* Assembler program text */	
goto outa;

/*
 */
notas:

for (i=0; i < in; i++)
	if (buf[i] & 0200) {
		if (buf[0]=='\100' && buf[1]=='\357') {
			fclose(ifile);
			strcpy(Tftypes,"rof");
			return(BINARY);	/* troff (CAT) output */
		}

	fclose(ifile);
	strcpy(Tftypes,"bin");
	return(BINARY);	/* Data file of some type */

}/*E for i=0 ..*/

if (mbuf.st_mode&((S_IEXEC)|(S_IEXEC>>3)|(S_IEXEC>>6))) {

	strcpy(Tftypes,"com");
	Type = TEXT;	/* Commands text */
	goto outa;
}
	else if (troffint(buf, in)) {

		strcpy(Tftypes,"rof");
		Type = BINARY; /* troff intermediate output text */
		}
	else if (english(buf, in)) {

		strcpy(Tftypes,"eng");
		Type = ENGLISH;	/* English text */
		}
	else	{
		strcpy(Tftypes,"asc");
		Type = TEXT;	/* Ascii text */
		}
/*
 */
outa:

while(i < in)
	if((!buf[i]) || ((buf[i++] & 0377) > 0176)) {

		/* With garbage, assume binary
		 */
		fclose(ifile);
		strcpy(Tftypes,"bin");
		return(BINARY);
	}

if (((Type == TEXT) || (Type == ENGLISH))) {
	/*
	 *	We don't want to be mislead. If we think this is
	 *	a 'text' file... Make certain of it..
	 */
	while((in = read(fileno(ifile),buf,MBUFSIZ)) > 0) {

		for(i = 0; i < in; i++)
			if((!buf[i]) || ((buf[i] & 0377) > 0176)) {

				/* With garbage, assume binary
				 */
				fclose(ifile);
				strcpy(Tftypes,"bin");
				return(BINARY);
			}
		/* If it doesn't meet the English frequency test,
		 * again assume it is TEXT of some kind.
		 */
		if (Type == ENGLISH)
			if (!(english(buf, in))) {
				/*
				 * If it fails English freq test,
				 * switch over to TEXT and continue
				 * checking for BINARY data.
				 */
				strcpy(Tftypes,"asc");
				Type = TEXT;
			}
	}/*E while in .. */
	if (in < 0) {
	    PERROR "\n%s: %s %s\n", caller, CANTRD, file);
	    perror(caller);
	    exit(FAIL);
	}

}/*E if Type == TEXT */

	/* If the above logic didn't change our mind,
	 * return the tentative file type as the actual.
	 * Convert tentative specifics to generalized types
	 * as/if required.
	 */
if (Type == ENGLISH) {
	fclose(ifile);
	return(TEXT);
}
else {
	fclose(ifile);
	return(Type);
}
}/*E Filetype() */
/**/
/*
 *
 * Function:
 *
 *	ascom
 *
 * Function Description:
 *
 *	
 *
 * Arguments:
 *
 *	none
 *
 * Return values:
 *
 *	Indication of what value(s) are returned.
 *
 *
 * Side Effects:
 *
 */

ascom()
{

while(buf[i] == '/') { 
	i++;

	while(buf[i++] != '\n')if(i >= in)
		return(0);

	while(buf[i] == '\n')if(i++ >= in)
		return(0);

}/*E while buf[i] ..*/

return(1);

}/*E ascom */
/**/
/*
 *
 * Function:
 *
 *	ccom
 *
 * Function Description:
 *
 *	This section will provide a description of the function.
 *
 * Arguments:
 *
 *	none
 *
 * Return values:
 *
 *	Indication of what value(s) are returned.
 *
 *
 * Side Effects:
 *
 *	
 */

ccom()
{

/*------*\
   Code
\*------*/

while((ch = buf[i]) == ' ' || ch == '\t' || ch == '\n'
	|| ch == '\f' || ch == NL)
	if(i++ >= in)
	{
	return(0);
}

if(buf[i] == '/' && buf[i+1] == '*') {
	i += 2;
	while(buf[i] != '*' || buf[i+1] != '/') {
		if(buf[i] == '\\')
			i += 2;
		else
			i++;

		if(i >= in) {
			return(0);
		}

	}/* while buf[i] ..*/

	if((i += 2) >= in) {
		return(0);
	}
}/*E if buf[i] ..*/

if(buf[i] == '\n')
	if(ccom() == 0) {
		return(0);
	}
return(1);

}/*E ccom() */
/**/
/*
 *
 * Function:
 *
 *	english
 *
 * Function Description:
 *
 *	This routine attempts to determine if the file contains
 *	english text based on the frequency (or lack thereof)
 *	of key letter usage in the English language.	
 *
 * Arguments:
 *
 *	char	*bp	Pointer to the buffer of text
 *	int	n	Number of characters in the buffer.
 *
 * Return values:
 *
 *	Zero	if the buffer doesn't look like english text.
 *	Non-zero if buffer appears to be English.
 *
 *
 * Side Effects:
 *
 *	none	
 */

english(bp, n)
	char	*bp;
	int	n;
{
/* Local variables
 */

int	ct[NASC], freq, rare, vow;
int	badpun = 0, punct = 0;

/*-*/

if (n<50)
	return(0); /* no point in statistics on squibs */

for(j=0; j<NASC; j++)
	ct[j]=0;

for(j=0; j<n; j++) {
	if (bp[j]<NASC)
		ct[bp[j]|040]++;

	switch (bp[j]) {

		case '.': 
		case ',': 
		case ')': 
		case '%':
		case ';': 
		case ':': 
		case '?':
			punct++;
			if ( j < n-1 &&
			    bp[j+1] != ' '  &&
			    bp[j+1] != '\f' &&
			    bp[j+1] != '\t' &&
			    bp[j+1] != NL   &&
			    bp[j+1] != '\n')
				badpun++;

	}/*E switch bp[j] */
}/*E for j=0 ..*/

if (badpun*5 > punct)
	return(0);

vow = ct['a'] + ct['e'] + ct['i'] + ct['o'] + ct['u'];
freq = ct['e'] + ct['t'] + ct['a'] + ct['i'] + ct['o'] + ct['n'];
rare = ct['v'] + ct['j'] + ct['k'] + ct['q'] + ct['x'] + ct['z'];

if (2*ct[';'] > ct['e'])
	return(0);

if ( (ct['>']+ct['<']+ct['/'])>ct['e'])
	return(0); /* shell file test */

return (vow*5 >= n-ct[' '] && freq >= 10*rare);

}/*E english() */
/**/
/*
 *
 * Function:
 *
 *	lookupe
 *
 * Function Description:
 *
 *	This section will provide a description of the function.
 *
 * Arguments:
 *
 *	char	*tab	??
 *
 * Return values:
 *
 *	Indication of what value(s) are returned.
 *
 *
 * Side Effects:
 *
 *	
 */

lookupe(tab)
	char	*tab[];
{
/* Local variables
 */

int	k, l;

/*-*/

while(buf[i] == ' ' || buf[i] == '\t' || buf[i] == '\n'
	|| buf[i] == '\f' || buf[i] == NL)
	i++;

for(j=0; tab[j] != 0; j++) {
	l=0;
	for(k=i; ((ch=tab[j][l++]) == buf[k] && ch != '\0');k++)
		;
	if(ch == '\0')
		if(buf[k] == ' ' || buf[k] == '\n' || buf[k] == '\t'
		    || buf[k] == '\f' || buf[k] == '{' ||
			 buf[k] == NL || buf[k] == '/') {
			i=k;
			return(1);

		}/*E if buf[k] ..*/
}/*E for j=0 ..*/

return(0);

}/*E lookupe() */
/**/
/*
 *
 * Function:
 *
 *	troffint
 *
 * Function Description:
 *
 *	See if the file appears to be  troff  intermediate text
 *
 * Arguments:
 *
 *	char	*bp	?
 *	int	n	?
 *
 * Return values:
 *
 *	Indication of what value(s) are returned.
 *
 *
 * Side Effects:
 *
 *	
 */

troffint(bp, n)
	char	*bp;
	int	n;
{

i = 0;
for (j = 0; j < 6; j++) {
	if (lookupe(troff) == 0)
		return(0);
	if (lookupe(troff) == 0)
		return(0);
	while (i < n && buf[i] != '\n')
		i++;
	if (i++ >= n)
		return(0);

}/*E for j=0 .. */

return(1);

}/*E troffint() */


/**\\**\\**\\**\\**\\**  EOM  filetype.c  **\\**\\**\\**\\**\\*/
/**\\**\\**\\**\\**\\**  EOM  filetype.c  **\\**\\**\\**\\**\\*/