V10/cmd/spitbol/dif.spt
-TITLE SPITBOL TEST PROGRAM #10 -- FILE COMPARATOR
-IN80
* THIS PROGRAM PERFORMS THE INVALUABLE SERVICE OF COMPARING TWO
* TEXT FILES FOR DIFFERENCES WHICH ARE PRINTED OUT IF FOUND. ITS
* SEMANTICS ARE THOSE OF THE DEC-10 PROGRAM FILCOM, BUT THIS VERSION
* BEING IN SPITBOL IS PORTABLE.
*
** THE PROGRAM IS BASICALLY THE WORK OF
** P.R. TALLETT
** DATACALL LTD
** KIRKSTALL RD
** LEEDS, ENGLAND.
**
** IT HAS BEEN ELABORATED SOMEWHAT BY A.P. MCCANN.
*
************************************************************************
** INSTRUCTIONS FOR USE *
** THE PROGRAM STARTS BY ATTEMPTING TO READ A COMMAND LINE FROM THE *
** STANDARD INPUT FILE. IF THIS FAILS, A "*" IS PRINTED ON THE *
** TERMINAL AS A PROMPT FOR A REPLY. THE COMMAND LINE SUPPLIED *
** EITHER IN A BATCH OR TERMINAL RUN SHOULD CONSIST OF A COMMAND *
** LINE OF FORM *
** OFILE=INFILE1,INFILE2 *
** WHERE OFILE IS THE FILE TO RECEIVE LIST OF DIFFERENCES, INFILE1 *
** AND INFILE2 ARE FILES TO BE COMPARED. *
** *
** IN ADDITION, THE COMMAND LINE MAY CONTAIN AT ANY POINT, *
** SWITCHES TO CONTROL OPTIONS. THESE ARE LISTED BELOW, WHERE () *
** ENCLOSES OPTIONAL ITEMS AND N STANDS FOR AN INTEGER. *
** *
** /B BLANK LINES MUST MATCH IN COMPARED FILES. DEFAULT *
** IS THAT BLANK LINES ARE IGNORED IN MATCHING PROCESS. *
** *
** /L(N)(+) N LINES MUST BE FOUND IDENTICAL BEFORE A *
** DIFFERENCE LIST IS TERMINATED. DEFAULT IS N=3. *
** *
** + SIGNIFIES THAT THESE N LINES ARE TO BE LISTED. DEFAULT *
** IS THAT ONLY THE FIRST OF THE N LINES IS LISTED. *
** *
** /D(N)(-) A DOUBLE COLUMN LISTING FORMAT IS USED FOR THE *
** DIFFERENCES, GIVING THE FIRST N CHARACTERS OF DIFFERING LINES (OR *
** THE WHOLE LINE IF LESS THAN N IN LENGTH). THE TOTAL LINE LENGTH *
** IS 2N+2 INCLUDING SEPARATING CHARACTERS. DEFAULT IS N=65. *
** *
** - SIGNIFIES THAT DIFFERENCES SHOULD BE SEPARATED BY A SHORT *
** MARKER LINE. DEFAULT IS TO USE A SUFFICIENTLY LONG MARKER LINE *
** TO SEPARATE DIFFERENCES TO GIVE A NEAT "BOXED" PRINTER LISTING. *
** THE SHORTER FORMAT IS USEFUL ON SLOW TERMINALS. *
** *
** /RN ACCEPT INPUT LINES OR PRINT OUTPUT RECORDS OF *
** MAXIMUM LENGTH N CHARACTERS. DEFAULT IS 133 CHARACTERS. *
** *
** AFTER TASK COMPLETION, AN ATTEMPT IS MADE TO READ A NEW *
** COMMAND LINE FOR ANOTHER SET OF FILES. THE RUN IS TERMINATED BY *
** SUPPLYING A NULL COMMAND LINE. *
************************************************************************
*
* IF PROGRAM IS RUN FROM A TERMINAL, ERRORS IN COMMAND LINE
* CAN BE CORRECTED INTERACTIVELY. IT MAY ALSO BE RUN AS A BATCH JOB,
* BUT IN THIS CASE ERROR RECOVERY IS NOT POSSIBLE.
*
* NOTE IMPORTANT COMMENT PRECEDING START OF MAIN PROGRAM.
*
*
&ANCHOR = &TRIM = 1; &STLIMIT = 999999
DIGIT = "0123456789"; BLD = " " DIGIT
SWPAT = "" $ SL BREAK("/") $ N "/" $ SL
+ ("L" (SPAN(BLD) $ NN | "") ("+" $ NNP | "")
+ *?(NN = CONVERT(NN,"INTEGER")) |
+ "D" (SPAN(BLD) $ LEN *?(LEN = CONVERT(LEN,"INTEGER")) |
+ *?(LEN = 65)) ("-" $ LENM | "") |
+ ("R" SPAN(DIGIT)) $ RECL |
+ "B" $ BLANKS)
CMDPAT = BREAK("=") $ F1 LEN(1) BREAK(",") $ F2 LEN(1) REM $ F3
STARS = "****"
SETEXIT(.ERR); &ERRLIMIT = 5
*
* HERE ARE THE DATATYPES USED TO MAINTAIN LIST OF LINES WHICH ARE
* NOT YET COMPLETELY PROCESSED, TOGETHER WITH POINTERS INTO THEM.
* HEAD, CURRENT, TAIL ARE POINTERS TO INITIAL LINE, CURRENT LINE
* FOR MATCHING, AND LAST LINE IN THE LINKED LIST OF LINES BUILT
* FROM ELEMS.
* COUNT IS NO. OF LINES IN THE LIST.
* EOF IS SET NON-NULL WHEN END FILE IS MET.
* INP IS INPUT ASSOCD FOR READING LINES.
*
DATA("ELEM(OBJECT,LINK)")
DATA("LIST(HEAD,CURRENT,TAIL,COUNT,EOF,INP)")
*
* ROUTINE TO ADD AN OBJECT TO A LIST. RETURNS A POINTER TO OBJECT.
* IF BLANKS IS NULL, IT DOES NOT ADD BLANK LINES BUT FINDS
* AND ADDS NEXT NON-BLANK. FAILS IF INPUT EXHAUSTED IN THIS PROCESS.
*
DEFINE("ADD(LISTX,OBJ)") :(ADDEND)
ADD COUNT(LISTX) = (DIFFER(BLANKS),DIFFER(OBJ)) COUNT(LISTX) + 1:S(ADD0)
OBJ = $INP(LISTX) :S(ADD)F(FRETURN)
*
ADD0 ADD = TAIL(LISTX) = HEAD(LISTX) =
+ IDENT(HEAD(LISTX)) ELEM(OBJ) :S(RETURN)
* ADD TO EXISTING LIST IF ARRIVE HERE
ADD = TAIL(LISTX) = LINK(TAIL(LISTX)) = ELEM(OBJ):(RETURN)
ADDEND
*
* ROUTINE TO FIND AN OBJECT IN THATL. FAILS IF NOT FOUND
* OTHERWISE RETURNS POINTER INTO THATL OF MATCHING ITEM.
*
DEFINE("MATCH(OBJ)") :(MATEND)
MATCH IDENT(THAP = HEAD(THATL)) :S(FRETURN)
CD = 1
*
* ATTEMPT TO MATCH OBJECT WITH THAT POINTED AT IN THATL. NOTE POINTER.
*
MATC1 MATCH = THAPT = IDENT(OBJ,OBJECT(THAP)) THAP :S(MATC3)
*
* ADVANCE DOWN THE LIST TO TRY AGAIN.
*
MATC2 CD = CD + 1
IDENT(THAP = LINK(THAP)) :F(MATC1)S(FRETURN)
*
* FIRST LINE MATCHED . CHECK REMAINING
* NN-1 LINES OR TO END OF THISL.
*
MATC3 THIPT = CURRENT(THISL)
CDEC = CD
*
* MATCHED IF RUN OFF END OF THISL BUT NOT OFF END OF THATL.
*
MATC4 IDENT(THIPT = LINK(THIPT)) :S(RETURN)
IDENT(THAPT = LINK(THAPT)) :S(FRETURN)
CDEC = CDEC + 1
IDENT(OBJECT(THIPT),OBJECT(THAPT)) :S(MATC4)F(MATC2)
MATEND
*
* ROUTINE TO PRINT DIFFERENCE WHEN FOUND. IT OUTPUTS BOTH LISTS
* UP TO FIELD "CURRENT(LISTX)". ON FIRST ENTRY ONLY, IT PLACES
* INPUT FILE NAMES ON THE LISTING.
*
DEFINE("OUTFIL(LISTX,MARK,STARS)X") :(OUTEND)
OUTFIL OUT = "FILE 1) " F2
OUT = "FILE 2) " F3
OUT = DEFINE("OUTFIL(LISTX,MARK,STARS)X",.OUTFIL2)
*
* THIS ENTRY IS USED ON ALL OCCASIONS AFTER FIRST
*
OUTFIL2 OUT = STARS
OUTFIL3 OUT = MARK OBJECT(DIFFER(X = HEAD(LISTX)) X):F(RETURN)
HEAD(LISTX) = LINK(X)
IDENT(CURRENT(LISTX),X) :S(RETURN)F(OUTFIL3)
OUTEND
*
* ROUTINE TO PUT LINES TO OUTPUT FILE.
*
DEFINE("PUT()X,Y,XO,YO") :(ERREND)
PUT DIFFER(LEN) :S(PUTDB)
DIFFS = DIFFS + 1
OUTFIL(FILE1,"1) ",STSTARS)
OUTFIL(FILE2,"2) ",STARS) :(RETURN)
*
* HERE TO OUTPUT DIFFERENCES IN DOUBLE COLUMN FORMAT
*
PUTDB OUT = RPAD("FILE 1) " F2,LEN) "| " "FILE 2) " F3
DEFINE("PUT()X,Y,XO,YO",.PUTD)
*
* ENTRY USED FOR DOUBLE COLUMN FORMAT AFTER INITIAL ENTRY
*
PUTD DIFFS = DIFFS + 1; OUT = STSTARS
XO = X = HEAD(FILE1); YO = Y = HEAD(FILE2)
*
* CHECK WHETHER BOTH LISTS FINISHED
*
PUTD1 HEAD(FILE1) = IDENT(X,IDENT(Y)) CURRENT(FILE1) :F(PUTD2)
HEAD(FILE2) = CURRENT(FILE2) :(RETURN)
*
* PRINT A LINE GIVING DIFFERENCES IN ADJACENT COLUMNS
*
PUTD2 OUT = (DIFFER(X) GT(SIZE(XO = OBJECT(X)),LEN)
+ SUBSTR(XO,1,LEN),RPAD(XO,LEN))
+ "| "
+ (DIFFER(Y) GT(SIZE(YO = OBJECT(Y)),LEN) SUBSTR(YO,1,LEN),YO)
X = (IDENT(X),(DIFFER(CURRENT(FILE1),X) LINK(X),XO = ))
Y = (IDENT(Y),(DIFFER(CURRENT(FILE2),Y) LINK(Y),YO = )):(PUTD1)
*
* THIS ROUTINE IS ENTERED IF A SPITBOL ERROR OCCURS - USED FOR BUGS
*
ERR SETEXIT(EQ(&ERRTYPE,116) .ERR) :S(CMER)
TERMINAL = OUTPUT = "ERROR: " &ERRTEXT " IN STMT " &LASTNO
COLLECT()
DUMP(2) :(END)
ERREND
*
*
* MAIN PROGRAM
* ============
*
* NORMAL ENTRY POINT TO MAIN PROGRAM
*
INIT LEN = LENM = NNP = DIFFS = BLANKS =
NN = 3; RECL = "R133"
*
* READ COMMAND LINE EITHER FROM INPUT FILE OR TERMINAL.
* IF ERROR AND IF BATCH, PRINT ERROR MESSAGE AND STOP.
*
OUTPUT = DIFFER(BATCH) TERMINAL :S(END)
X = BATCH = INPUT :F(TERMI)
DETACH(.TERMINAL) :(DIFFX)
*
* READ COMMAND LINE FROM TERMINAL
*
TERMI TERMINAL = "*"; X = TERMINAL :F(END)
*
* CHECK FOR NULL COMMAND LINE
*
DIFFX DIFFER(X) :F(END)
*
* LOOP TO PROCESS SWITCHES. NO. OF LINES IN MATCH IS 3 BY DEFAULT.
*
SWPAT X SWPAT = N :S(SWPAT)
TERMINAL = DIFFER(SL) "? INVALID SWITCH" :S(INIT)
X CMDPAT :S(GO)
*
* MERGE FROM ERR IF BAD FILENAMES
*
CMER TERMINAL = "?COMMAND ERROR IN " X :(INIT)
GO OUTPUT(.OUT,3,F1) :S(GO1)
TERMINAL = "?CAN'T ENTER OUTPUT FILE " F1 :(INIT)
GO1 INPUT(.IN1,1,F2) :S(GO2)
TERMINAL = "?CAN'T READ INPUT FILE 1 " F2 :(INIT)
GO2 INPUT(.IN2,2,F3) :S(START)
TERMINAL = "?CAN'T READ INPUT FILE 2 " F3 :(INIT)
*
START TERMINAL =
STSTARS = DUPL("*",(DIFFER(LEN,IDENT(LENM)) 2 * LEN + 2,12))
THISL = FILE1 = LIST(,,,,,.IN1)
THATL = FILE2 = LIST(,,,,,.IN2)
*
* HUNT THROUGH FILES TILL DIFFERING LINES FOUND (IF ANY)
*
PHASE1 THIS = $INP(THISL) :F(THISEND)
*
THAT THAT = $INP(THATL) :F(THATEND)
*
IDENT IDENT(THIS,THAT) :S(PHASE1)
*
* MAKE SURE DIFFERENCE IS NOT MERELY BLANK LINES IF BLANKS NULL.
*
DIFFER(BLANKS) :S(DIFF)
IDENT(THAT) :S(THAT)
DIFFER(THIS) :S(DIFF)
THIS = $INP(THISL) :S(IDENT)F(THISEND)
*
* ARRIVE HERE WHEN A DIFFERENCE IS ENCOUNTERED. PHASE 2
* ASSESSES HOW MUCH DIFFERENCE THERE IS AND PRINTS DIFFERENCES.
* IT OPERATES BY READING A LINE ALTERNATELY FROM EACH OF THE
* TWO INPUT FILES, ADDING IT TO THE APPROPRIATE LIST AND ATTEMPTING
* TO MATCH A CURRENT LINE AGAINST LINES HELD FOR THE OTHER FILE.
* AT ALL TIMES SUFFICIENT LINES ARE KEPT FOLLOWING THE CURRENT LINE
* SO THAT A COMPLETE MATCH CHECK CAN BE MADE.
*
DIFF CURRENT(THATL) = ADD(THATL,THAT)
*
* MERGE AGAIN AFTER DEALING WITH ONE DIFFERENCE TO DO ANOTHER
*
ENTER CURRENT(THISL) = ADD(THISL,THIS)
*
* ENTER ANOTHER LINE INTO THISL LIST, ADVANCE CURRENT(THISL)
* TO CORRESPOND AND SEE IF NEW CURRENT LINE MATCHES
* ANY LINE IN OTHER LIST. THIS FILE IS EXHAUSTED EITHER IF
* CURRENT IS ALREADY NULL OR IF LINK OF CURRENT IS NULL.
*
PHASE2 ADD(THISL,THIS = $INP(THISL))
(IDENT(X = CURRENT(THISL)),IDENT(X = CURRENT(THISL)
+ = LINK(X))) :S(THISEND)
*
* LOOP HERE TILL NN LINES AVAILABLE INCLUDING "CURRENT" LINE
* OF EACH LIST SO THAT MATCH TEST CAN BE DONE.
*
ADD1 LE(NN + 1,COUNT(THISL)) :S(ADD2)
ADD(THISL,$INP(THISL)) :S(ADD1)
ADD2 LE(NN,COUNT(THATL)) :S(TRYMAT)
ADD(THATL,$INP(THATL)) :S(ADD2)
*
* ATTEMPT A MATCH
*
TRYMAT X = MATCH(OBJECT(X)) :S(PHASE3)
*
* ARRIVE HERE WHILST STILL ATTEMPTING MATCH. IF END FILE NOT READ
* ON THAT LIST THEN SWAP LISTS AND TRY MATCHING USING IT INSTEAD.
*
DIFFER(EOF(THATL)) :S(PHASE2)
X = THISL; THISL = THATL; THATL = X :(PHASE2)
*
* MATCH ATTEMPT WAS SUCCESSFUL SO OUTPUT THIS LIST
* AND PORTION OF THAT LIST UP TO AND INCLUDING MATCHING LINE.
* THEN ADVANCE HEAD OF THATL PAST THE FURTHER NN-1 LINES MATCHED
* WHICH ARE NO LONGER OF INTEREST AND CLEAR THISL SINCE ALL ITS LINES
* ARE DEALT WITH.
*
PHASE3 Y = CURRENT(THATL)
CURRENT(THATL) = (IDENT(NNP) X,THAPT)
CURRENT(THISL) = DIFFER(NNP) TAIL(THISL)
PUT()
CURRENT(THATL) = Y
HEAD(THATL) = LINK(THAPT)
COUNT = COUNT(THATL) = COUNT(THATL) - CDEC
CURRENT(THATL) = LT(COUNT,NN) HEAD(THATL)
HEAD(THISL) = COUNT(THISL) =
*
* NOW POSITION FILES TO ANOTHER DIFFERENCE OR UNTIL THATL EMPTY.
* RIGHT NOW, THISL IS EMPTY.
*
POSIT IDENT(X = HEAD(THATL)) :S(PHASE1)
THIS = $INP(THISL) :F(THISEND)
IDENT(THIS,OBJECT(X)) :F(ENTER)
HEAD(THATL) = LINK(X)
CURRENT(THATL) = LT(COUNT(THATL) = COUNT(THATL) - 1,NN) HEAD(THATL)
+ :(POSIT)
*
* HERE TO SET EOF FLAG FOR THISL. DONE BY SWAPPING LISTS
* AND JUMPING INTO CODE FOR THATL.
*
THISEND X = THISL; THISL = THATL; THATL = X :(EOF)
*
* MARK ENDFILE OF THATL BY SETTING EOF NON-NULL.
* ON ARRIVAL HERE FIRST STORE LINE JUST READ FROM THIS FILE
*
THATEND CURRENT(THISL) = ADD(THISL,THIS)
*
* TEST FOR END CONDITION WHICH IS THAT WE HAVE EOF ON BOTH LISTS.
* WE KNOW WE HAVE EOF ON THAT LIST SINCE WE ARE ABOUT TO SET FLAG.
*
*
EOF EOF(THATL) = "EOF"
IDENT(EOF(THISL)) :S(PHASE2)
*
* EOF REACHED ON BOTH FILES. OUTPUT BOTH LISTS AND PACK UP.
*
IDENT(HEAD(THISL),IDENT(HEAD(THATL))) :S(CLOSE)
CURRENT(THISL) = TAIL(THISL)
CURRENT(THATL) = TAIL(THATL)
PUT()
*
* FINISH BY TYPING OUT DIFFERENCE COUNT
*
CLOSE TERMINAL = IDENT(DIFFS) "NO DIFFERENCES FOUND" :F(DIFFS)
OUT = DIFFER(BATCH) TERMINAL :(STOP)
*
DIFFS OUT = STSTARS
TERMINAL = "% " DIFFS " DIFFERENCE" (NE(DIFFS,1) "S",) " FOUND"
OUT = DIFFER(BATCH) TERMINAL
*
STOP TERMINAL = DEFINE("OUTFIL(LISTX,MARK,STARS)X")
+ DEFINE("PUT()X,Y,XO,YO") :(INIT)
END