/********************************************************************** * Copyright (c) Digital Equipment Corporation 1984, 1985, 1986. * * All Rights Reserved. * * Reference "/usr/src/COPYRIGHT" for applicable restrictions. * **********************************************************************/ #ifndef lint static char *sccsid = "@(#)filetype.c 3.0 (ULTRIX) 4/21/86"; #endif lint /**/ /* * * File name: * * filetype.c * * Source file description: * * This file contains logic to determine the * general type of file specified by the caller. * ie. Is it binary, text, library, etc.. * * (further documentation below) * * It is used by the Labeled Tape Facility (LTF) * in order to determine the type that the Unix * file should take on whence it becomes an * ANSI tape file. * * * Functions: * * ascom() ... * ccom() ... * english() ... * Filetype() Top level logic to determine file type. * lookupe() ... * troffint() Check for troff intermediate file * * * Compile: * * cc -O -c filetype.c <- For Ultrix-32/32m * * cc CFLAGS=-DU11-O filetype.c <- For Ultrix-11 * * * Modification history: * ~~~~~~~~~~~~~~~~~~~~ * * revision comments * -------- ----------------------------------------------- * 01.0 09-April-85 Ray Glaser * Create orginal version. * */ /**/ /* * -> Local includes */ #include "ltfdefs.h" /* Common LTF definitions */ /* * -> Local defines "required for / defined by" this module */ #define MBUFSIZ 1024 /* Chunk of file to read for "typing" */ #define NL 012 /* Newline character */ #define NLM 123 /* Used to check for 'c' code */ /* * -> Globals for this module only */ char *as[] = { "globl","byte","align","text","data","comm",0}; char *asc[] = { "chmk","mov","tst","clr","jmp",0}; char buf[MBUFSIZ]; char *c[] = { "int","char","float","double","struct","extern",0}; char *com[] = { "alias", "date","cc","xref","pr","pr50","CFLAGS","lpr","rm","FILES", "tar","mdtar","sync","make","/etc","/bin","for","case", "echo","do","umask","set","stty","setenv", 00 }; int errno; char *fort[] = { "function","subroutine","common","dimension","block","integer", "real","data","double",0}; int i = 0; FILE *ifile; int in; char *sys_errlist[]; char *troff[] = { /* new troff intermediate lang */ "x","T","res","init","font","202","V0","p1",0}; int Type; /* Tentative file type value */ int x; /**/ /* * * Function: * * Filetype * * Function Description: * * This function attempts to determine the type of Unix disk file. * It returns a generalized file type indication as defined in * the include file "filetypes.h". * * Arguments: * * * char *file Points to a null terminated string * assumed to be the desired file name * to be typed. * * char *caller Points to a string defining the desired * routine name for messages to stderr. * * Return value(s): * * int value * * Zero if the file type could not be determined, else: * one of the file types defined in "filetypes.h". * Additionally, the character string variables "Tftypes" * is filled with a 3 character representation of the * true Ultrix disk file type. (see README.1 file). * * * Side Effects: * * This function outputs error messages to stderr if any problems * occur during the attempt to access/type the given file. * */ /**/ /* * FILETYPE Determination thereof.. */ Filetype(file,caller) char *caller; char *file; { /* * -> Local variables */ int nl; struct stat mbuf; /*------*\ Code \*------*/ strcpy(Tftypes,"???"); /* Assume we can't determine file type */ mbuf = Inode; /* Check if this is a file that should not be open */ switch (mbuf.st_mode & S_IFMT) { case S_IFCHR: /* Charater special */ strcpy(Tftypes,"csp"); return(CHCTRSP); case S_IFBLK: /* Block special */ strcpy(Tftypes,"bsp"); return(BLKSP); case S_IFIFO: /* Fifo - pipe */ strcpy(Tftypes,"pip"); return(CHCTRSP); #ifndef U11 case S_IFSOCK: /* Socket ! */ strcpy(Tftypes,"soc"); return(SOCKET); #endif }/*E switch mbuf.st_mode & S_IFMT */ if((ifile = fopen(file, "r")) == NULL) { PERROR "\n%s: %s %s\n", caller, CANTOPW, file); perror(caller); return(EOF); } switch (mbuf.st_mode & S_IFMT) { case S_IFLNK: /* Symbolic link */ strcpy(Tftypes,"sym"); fclose(ifile); return(SYMLNK); case S_IFDIR: /* Directory */ strcpy(Tftypes,"dir"); fclose(ifile); return(DIRECT); }/*E switch mbuf.st_mode & S_IFMT */ /* * Read in a MBUFSIZ amount of data from the file for * further examination. */ if ((in = read(fileno(ifile), buf, MBUFSIZ)) <= 0) if (in < 0) { PERROR "\n%s: %s %s\n", caller, CANTRD, file); perror(caller); exit(FAIL); } else { fclose(ifile); strcpy(Tftypes,"nul"); return(EMPTY); /* File appears to be empty */ } /* * This check is looking for files that are used as output * of ltf instead of using tape. NOTE: This is bogus as * the rest of all these checks are. This is the bare * minimum of checking. Should check not only for VOL1, * but also HDR1, etc., tape mark, then EOF1, etc., tape * mark. Thus this checks for VOL1 in the first 4 characters, * and makes the file type binary. */ if (!strncmp(buf, "VOL1", 4)) { fclose(ifile); strcpy(Tftypes,"bin"); return(BINARY); /* Data file of some type */ } switch(*(int *)buf) { case 0407: /* Old impure Format on Ultrix-11 should * be PDP-11 normal (I space only). * On Ultrix-32 - seems to indicate * a .o (object) file. */ fclose(ifile); #ifdef U11 /* check to see if relocation info is stripped */ if (((int *)buf)[7]) strcpy(Tftypes,"exe"); else #endif strcpy(Tftypes,"bin"); return(BINARY); case 0410: /* Read-only/shared text */ case 0413: /* Demand Load Format */ #ifdef U11 case 0401: /* PDP-11 Standalone executable */ case 0411: /* PDP-11 Separated I & D */ case 0430: /* PDP-11 7 Overlays */ case 0431: /* PDP-11 7 Overlays */ case 0450: /* PDP-11 15 Overlays */ case 0451: /* PDP-11 15 Overlays */ #endif fclose(ifile); strcpy(Tftypes,"exe"); return(BINARY); case 0177555: /* Very old archive */ case 0177545: /* Old archive */ fclose(ifile); strcpy(Tftypes,"oar"); return(BINARY); case 070707: /* CPIO data */ fclose(ifile); strcpy(Tftypes,"cpi"); return(CPIO); }/*E switch(*(int *)buf) */ if(strncmp(buf, "!<arch>\n__.SYMDEF", 17) == 0 ) { fclose(ifile); strcpy(Tftypes,"arl"); return(BINARY); /* Archive Random Library */ } if (strncmp(buf, "!<arch>\n", 8)==0) { fclose(ifile); strcpy(Tftypes,"arc"); return(BINARY); /* Archive */ } if (mbuf.st_size % 512 == 0) { /* it may be a PRESS file */ lseek(ifile, -512L, 2); /* last block */ if ((in = read(fileno(ifile), buf, MBUFSIZ)) <= 0) { if (in < 0) { PERROR "\n%s: %s %s\n", caller, CANTRD, file); perror(caller); exit(FAIL); } } if (in > 0 && *(int *)buf == 12138) { fclose(ifile); strcpy(Tftypes,"cmp"); return(BINARY); /* Press file ..*/ } }/*E if mbuf.st_size ..*/ /* * See if it looks like a command file. */ i = 0; while(buf[i] == ' ' || buf[i] == '#' || buf[i] == '!') { while(buf[i++] != '\n') if(i >= in) goto notcom; }/*E while buf[i] ..*/ #if 0 /*for debugging*/ printf("\ngoing to lookupe \n"); for (x=i;x<i+80;x++) printf("%c",buf[x]); #endif 0 if (lookupe(com)==1) { Type = TEXT; strcpy(Tftypes,"com"); goto outa; } notcom: i = 0; if (!ccom()) { goto notc; } while(buf[i] == '#') { j = i; while(buf[i++] != '\n') { if(i - j > 255) { fclose(ifile); strcpy(Tftypes,"adf"); return(BINARY); /* Data file of some type */ } if(i >= in) { goto notc; } }/* while buf[i++] ...*/ if(!ccom()) { goto notc; } }/*E while buf[i] ..*/ /* */ check: if(lookupe(c) == 1) { while((ch = buf[i++]) != ';' && ch != '{') if(i >= in) { goto notc; } strcpy(Tftypes,"cc "); Type = TEXT; /* 'C' program text */ goto outa; }/*E if lookupe(c) */ nl = 0; while(buf[i] != '(') { if(buf[i] <= 0) goto notas; if(buf[i] == ';') { i++; goto check; } if(buf[i++] == '\n') if(nl++ > NLM) { goto notc; } if(i >= in) { goto notc; } }/*E while(buf[i] ..*/ while(buf[i] != ')') { if(buf[i++] == '\n') if(nl++ > NLM) { goto notc; } if(i >= in) { goto notc; } }/*E while buf[i] ..*/ while(buf[i] != '{') { if(buf[i++] == '\n') if(nl++ > NLM) { goto notc; } if(i >= in) { goto notc; } }/*E while buf[i] ..*/ strcpy(Tftypes,"cc "); Type = TEXT; /* 'C' program text */ goto outa; /* */ notc: i = 0; while(buf[i] == 'c' || buf[i] == '#') { while(buf[i++] != '\n') if(i >= in) goto notfort; }/*E while buf[i] ..*/ if(lookupe(fort) == 1) { strcpy(Tftypes,"for"); Type = TEXT; /* Fortran program text */ goto outa; }/*E if lookupe fort */ /* */ notfort: i=0; if (!ascom()) goto notas; j = i-1; if (buf[i] == '.') { i++; if(lookupe(as) == 1) { strcpy(Tftypes,"asm"); Type = TEXT; /* Assembler program text */ goto outa; }/*T if lookupe as .. */ else if(buf[j] == '\n' && isalpha(buf[j+2])) { strcpy(Tftypes,"rof"); Type = TEXT; /* roff, nroff or eqn * input text. */ goto outa; } }/*E if buf[i] ..*/ while (!lookupe(asc)) { if (!ascom()) goto notas; while(buf[i] != '\n' && buf[i++] != ':') if(i >= in) goto notas; while(buf[i] == '\n' || buf[i] == ' ' || buf[i] == '\t' || buf[i] == '\f' || buf[i] == NL) if(i++ >= in) goto notas; j = i-1; if(buf[i] == '.') { i++; if(lookupe(as) == 1) { strcpy(Tftypes,"asm"); Type = TEXT; /* ASM program text */ goto outa; } else if(buf[j] == '\n' && isalpha(buf[j+2])) { strcpy(Tftypes,"rof"); Type = TEXT; /* roff, nroff, or * eqn input text. */ goto outa; } }/*E if buf[i] == . */ }/* while lookupe(asc) ..*/ strcpy(Tftypes,"asm"); Type = TEXT; /* Assembler program text */ goto outa; /* */ notas: for (i=0; i < in; i++) if (buf[i] & 0200) { if (buf[0]=='\100' && buf[1]=='\357') { fclose(ifile); strcpy(Tftypes,"rof"); return(BINARY); /* troff (CAT) output */ } fclose(ifile); strcpy(Tftypes,"bin"); return(BINARY); /* Data file of some type */ }/*E for i=0 ..*/ if (mbuf.st_mode&((S_IEXEC)|(S_IEXEC>>3)|(S_IEXEC>>6))) { strcpy(Tftypes,"com"); Type = TEXT; /* Commands text */ goto outa; } else if (troffint(buf, in)) { strcpy(Tftypes,"rof"); Type = BINARY; /* troff intermediate output text */ } else if (english(buf, in)) { strcpy(Tftypes,"eng"); Type = ENGLISH; /* English text */ } else { strcpy(Tftypes,"asc"); Type = TEXT; /* Ascii text */ } /* */ outa: while(i < in) if((!buf[i]) || ((buf[i++] & 0377) > 0176)) { /* With garbage, assume binary */ fclose(ifile); strcpy(Tftypes,"bin"); return(BINARY); } if (((Type == TEXT) || (Type == ENGLISH))) { /* * We don't want to be mislead. If we think this is * a 'text' file... Make certain of it.. */ while((in = read(fileno(ifile),buf,MBUFSIZ)) > 0) { for(i = 0; i < in; i++) if((!buf[i]) || ((buf[i] & 0377) > 0176)) { /* With garbage, assume binary */ fclose(ifile); strcpy(Tftypes,"bin"); return(BINARY); } /* If it doesn't meet the English frequency test, * again assume it is TEXT of some kind. */ if (Type == ENGLISH) if (!(english(buf, in))) { /* * If it fails English freq test, * switch over to TEXT and continue * checking for BINARY data. */ strcpy(Tftypes,"asc"); Type = TEXT; } }/*E while in .. */ if (in < 0) { PERROR "\n%s: %s %s\n", caller, CANTRD, file); perror(caller); exit(FAIL); } }/*E if Type == TEXT */ /* If the above logic didn't change our mind, * return the tentative file type as the actual. * Convert tentative specifics to generalized types * as/if required. */ if (Type == ENGLISH) { fclose(ifile); return(TEXT); } else { fclose(ifile); return(Type); } }/*E Filetype() */ /**/ /* * * Function: * * ascom * * Function Description: * * * * Arguments: * * none * * Return values: * * Indication of what value(s) are returned. * * * Side Effects: * */ ascom() { while(buf[i] == '/') { i++; while(buf[i++] != '\n')if(i >= in) return(0); while(buf[i] == '\n')if(i++ >= in) return(0); }/*E while buf[i] ..*/ return(1); }/*E ascom */ /**/ /* * * Function: * * ccom * * Function Description: * * This section will provide a description of the function. * * Arguments: * * none * * Return values: * * Indication of what value(s) are returned. * * * Side Effects: * * */ ccom() { /*------*\ Code \*------*/ while((ch = buf[i]) == ' ' || ch == '\t' || ch == '\n' || ch == '\f' || ch == NL) if(i++ >= in) { return(0); } if(buf[i] == '/' && buf[i+1] == '*') { i += 2; while(buf[i] != '*' || buf[i+1] != '/') { if(buf[i] == '\\') i += 2; else i++; if(i >= in) { return(0); } }/* while buf[i] ..*/ if((i += 2) >= in) { return(0); } }/*E if buf[i] ..*/ if(buf[i] == '\n') if(ccom() == 0) { return(0); } return(1); }/*E ccom() */ /**/ /* * * Function: * * english * * Function Description: * * This routine attempts to determine if the file contains * english text based on the frequency (or lack thereof) * of key letter usage in the English language. * * Arguments: * * char *bp Pointer to the buffer of text * int n Number of characters in the buffer. * * Return values: * * Zero if the buffer doesn't look like english text. * Non-zero if buffer appears to be English. * * * Side Effects: * * none */ english(bp, n) char *bp; int n; { /* Local variables */ int ct[NASC], freq, rare, vow; int badpun = 0, punct = 0; /*-*/ if (n<50) return(0); /* no point in statistics on squibs */ for(j=0; j<NASC; j++) ct[j]=0; for(j=0; j<n; j++) { if (bp[j]<NASC) ct[bp[j]|040]++; switch (bp[j]) { case '.': case ',': case ')': case '%': case ';': case ':': case '?': punct++; if ( j < n-1 && bp[j+1] != ' ' && bp[j+1] != '\f' && bp[j+1] != '\t' && bp[j+1] != NL && bp[j+1] != '\n') badpun++; }/*E switch bp[j] */ }/*E for j=0 ..*/ if (badpun*5 > punct) return(0); vow = ct['a'] + ct['e'] + ct['i'] + ct['o'] + ct['u']; freq = ct['e'] + ct['t'] + ct['a'] + ct['i'] + ct['o'] + ct['n']; rare = ct['v'] + ct['j'] + ct['k'] + ct['q'] + ct['x'] + ct['z']; if (2*ct[';'] > ct['e']) return(0); if ( (ct['>']+ct['<']+ct['/'])>ct['e']) return(0); /* shell file test */ return (vow*5 >= n-ct[' '] && freq >= 10*rare); }/*E english() */ /**/ /* * * Function: * * lookupe * * Function Description: * * This section will provide a description of the function. * * Arguments: * * char *tab ?? * * Return values: * * Indication of what value(s) are returned. * * * Side Effects: * * */ lookupe(tab) char *tab[]; { /* Local variables */ int k, l; /*-*/ while(buf[i] == ' ' || buf[i] == '\t' || buf[i] == '\n' || buf[i] == '\f' || buf[i] == NL) i++; for(j=0; tab[j] != 0; j++) { l=0; for(k=i; ((ch=tab[j][l++]) == buf[k] && ch != '\0');k++) ; if(ch == '\0') if(buf[k] == ' ' || buf[k] == '\n' || buf[k] == '\t' || buf[k] == '\f' || buf[k] == '{' || buf[k] == NL || buf[k] == '/') { i=k; return(1); }/*E if buf[k] ..*/ }/*E for j=0 ..*/ return(0); }/*E lookupe() */ /**/ /* * * Function: * * troffint * * Function Description: * * See if the file appears to be troff intermediate text * * Arguments: * * char *bp ? * int n ? * * Return values: * * Indication of what value(s) are returned. * * * Side Effects: * * */ troffint(bp, n) char *bp; int n; { i = 0; for (j = 0; j < 6; j++) { if (lookupe(troff) == 0) return(0); if (lookupe(troff) == 0) return(0); while (i < n && buf[i] != '\n') i++; if (i++ >= n) return(0); }/*E for j=0 .. */ return(1); }/*E troffint() */ /**\\**\\**\\**\\**\\** EOM filetype.c **\\**\\**\\**\\**\\*/ /**\\**\\**\\**\\**\\** EOM filetype.c **\\**\\**\\**\\**\\*/