V10/cmd/spitbol/dif.spt

-TITLE SPITBOL TEST PROGRAM #10 -- FILE COMPARATOR
-IN80
*   THIS PROGRAM PERFORMS THE INVALUABLE SERVICE OF COMPARING TWO
*   TEXT FILES FOR DIFFERENCES WHICH ARE PRINTED OUT IF FOUND. ITS
*   SEMANTICS ARE THOSE OF THE DEC-10 PROGRAM FILCOM, BUT THIS VERSION
*   BEING IN SPITBOL IS PORTABLE.
*
**      THE PROGRAM IS BASICALLY THE WORK OF
**      P.R. TALLETT
**      DATACALL LTD
**      KIRKSTALL RD
**      LEEDS, ENGLAND.
**
**      IT HAS BEEN ELABORATED SOMEWHAT BY A.P. MCCANN.
*
************************************************************************
**                       INSTRUCTIONS FOR USE                          *
**  THE PROGRAM STARTS BY ATTEMPTING TO READ A COMMAND LINE FROM  THE  *
**  STANDARD  INPUT  FILE.   IF  THIS  FAILS, A "*" IS PRINTED ON THE  *
**  TERMINAL AS A PROMPT FOR A  REPLY.   THE  COMMAND  LINE  SUPPLIED  *
**  EITHER  IN  A  BATCH  OR TERMINAL RUN SHOULD CONSIST OF A COMMAND  *
**  LINE OF FORM                                                       *
**  OFILE=INFILE1,INFILE2                                              *
**  WHERE OFILE IS THE FILE TO RECEIVE LIST OF  DIFFERENCES,  INFILE1  *
**  AND INFILE2 ARE FILES TO BE COMPARED.                              *
**                                                                     *
**       IN ADDITION, THE COMMAND LINE  MAY  CONTAIN  AT  ANY  POINT,  *
**  SWITCHES  TO  CONTROL  OPTIONS.  THESE ARE LISTED BELOW, WHERE ()  *
**  ENCLOSES OPTIONAL ITEMS AND N STANDS FOR AN INTEGER.               *
**                                                                     *
**       /B         BLANK LINES MUST MATCH IN COMPARED FILES. DEFAULT  *
**       IS THAT BLANK LINES ARE IGNORED IN MATCHING PROCESS.          *
**                                                                     *
**       /L(N)(+)   N  LINES  MUST  BE  FOUND  IDENTICAL   BEFORE   A  *
**  DIFFERENCE LIST IS TERMINATED.  DEFAULT IS N=3.                    *
**                                                                     *
**       +  SIGNIFIES THAT THESE N LINES ARE TO BE  LISTED.   DEFAULT  *
**  IS THAT ONLY THE FIRST OF THE N LINES IS LISTED.                   *
**                                                                     *
**       /D(N)(-)   A DOUBLE COLUMN LISTING FORMAT IS  USED  FOR  THE  *
**  DIFFERENCES, GIVING THE FIRST N CHARACTERS OF DIFFERING LINES (OR  *
**  THE WHOLE LINE IF LESS THAN N IN LENGTH).  THE TOTAL LINE  LENGTH  *
**  IS 2N+2 INCLUDING SEPARATING CHARACTERS.  DEFAULT IS N=65.         *
**                                                                     *
**       -  SIGNIFIES THAT DIFFERENCES SHOULD BE SEPARATED BY A SHORT  *
**  MARKER  LINE.   DEFAULT IS TO USE A SUFFICIENTLY LONG MARKER LINE  *
**  TO SEPARATE DIFFERENCES TO GIVE A NEAT "BOXED"  PRINTER  LISTING.  *
**  THE SHORTER FORMAT IS USEFUL ON SLOW TERMINALS.                    *
**                                                                     *
**       /RN        ACCEPT INPUT LINES OR  PRINT  OUTPUT  RECORDS  OF  *
**  MAXIMUM LENGTH N CHARACTERS.  DEFAULT IS 133 CHARACTERS.           *
**                                                                     *
**       AFTER TASK COMPLETION, AN ATTEMPT IS  MADE  TO  READ  A  NEW  *
**  COMMAND  LINE FOR ANOTHER SET OF FILES.  THE RUN IS TERMINATED BY  *
**  SUPPLYING A NULL COMMAND LINE.                                     *
************************************************************************
*
*   IF PROGRAM IS RUN FROM A TERMINAL, ERRORS IN COMMAND LINE
*   CAN BE CORRECTED INTERACTIVELY. IT MAY ALSO BE RUN AS A BATCH JOB,
*   BUT IN THIS CASE ERROR RECOVERY IS NOT POSSIBLE.
*
*   NOTE IMPORTANT COMMENT PRECEDING START OF MAIN PROGRAM.
*
*
        &ANCHOR = &TRIM = 1; &STLIMIT = 999999
        DIGIT   = "0123456789"; BLD = " " DIGIT
        SWPAT   = "" $ SL BREAK("/") $ N "/" $ SL
+           ("L" (SPAN(BLD) $ NN | "") ("+" $ NNP | "")
+           *?(NN = CONVERT(NN,"INTEGER")) |
+           "D" (SPAN(BLD) $ LEN *?(LEN = CONVERT(LEN,"INTEGER")) |
+           *?(LEN = 65)) ("-" $ LENM | "") |
+           ("R" SPAN(DIGIT)) $ RECL |
+           "B" $ BLANKS)
        CMDPAT  = BREAK("=") $ F1 LEN(1) BREAK(",") $ F2 LEN(1) REM $ F3
        STARS   = "****"
        SETEXIT(.ERR); &ERRLIMIT = 5
*
*   HERE ARE THE DATATYPES USED TO MAINTAIN LIST OF LINES WHICH ARE
*   NOT YET COMPLETELY PROCESSED, TOGETHER WITH POINTERS INTO THEM.
*   HEAD, CURRENT, TAIL ARE POINTERS TO INITIAL LINE, CURRENT LINE
*   FOR MATCHING, AND LAST LINE IN THE LINKED LIST OF LINES BUILT
*   FROM ELEMS.
*   COUNT IS NO. OF LINES IN THE LIST.
*   EOF IS SET NON-NULL WHEN END FILE IS MET.
*   INP IS INPUT ASSOCD FOR READING LINES.
*
        DATA("ELEM(OBJECT,LINK)")
        DATA("LIST(HEAD,CURRENT,TAIL,COUNT,EOF,INP)")
*
*   ROUTINE TO ADD AN OBJECT TO A LIST. RETURNS A POINTER TO OBJECT.
*   IF BLANKS IS NULL, IT DOES NOT ADD BLANK LINES BUT FINDS
*   AND ADDS NEXT NON-BLANK. FAILS IF INPUT EXHAUSTED IN THIS PROCESS.
*
        DEFINE("ADD(LISTX,OBJ)")                        :(ADDEND)
ADD     COUNT(LISTX) = (DIFFER(BLANKS),DIFFER(OBJ)) COUNT(LISTX) + 1:S(ADD0)
        OBJ     = $INP(LISTX)                           :S(ADD)F(FRETURN)
*
ADD0    ADD     = TAIL(LISTX) = HEAD(LISTX) =
+           IDENT(HEAD(LISTX)) ELEM(OBJ)                :S(RETURN)
*   ADD TO EXISTING LIST IF ARRIVE HERE
        ADD     = TAIL(LISTX) = LINK(TAIL(LISTX)) = ELEM(OBJ):(RETURN)
ADDEND
*
*   ROUTINE TO FIND AN OBJECT IN THATL. FAILS IF NOT FOUND
*   OTHERWISE RETURNS POINTER INTO THATL OF MATCHING ITEM.
*
        DEFINE("MATCH(OBJ)")                            :(MATEND)
MATCH   IDENT(THAP = HEAD(THATL))                       :S(FRETURN)
        CD      = 1
*
*   ATTEMPT TO MATCH OBJECT WITH THAT POINTED AT IN THATL. NOTE POINTER.
*
MATC1   MATCH   = THAPT = IDENT(OBJ,OBJECT(THAP)) THAP  :S(MATC3)
*
*   ADVANCE DOWN THE LIST TO TRY AGAIN.
*
MATC2   CD      = CD + 1
        IDENT(THAP = LINK(THAP))                        :F(MATC1)S(FRETURN)
*
*   FIRST LINE MATCHED . CHECK REMAINING
*   NN-1 LINES OR TO END OF THISL.
*
MATC3   THIPT   = CURRENT(THISL)
        CDEC    = CD
*
*   MATCHED IF RUN OFF END OF THISL BUT NOT OFF END OF THATL.
*
MATC4   IDENT(THIPT = LINK(THIPT))                      :S(RETURN)
        IDENT(THAPT = LINK(THAPT))                      :S(FRETURN)
        CDEC    = CDEC + 1
        IDENT(OBJECT(THIPT),OBJECT(THAPT))              :S(MATC4)F(MATC2)
MATEND
*
*   ROUTINE TO PRINT DIFFERENCE WHEN FOUND. IT OUTPUTS BOTH LISTS
*   UP TO FIELD "CURRENT(LISTX)". ON FIRST ENTRY ONLY, IT PLACES
*   INPUT FILE NAMES ON THE LISTING.
*
        DEFINE("OUTFIL(LISTX,MARK,STARS)X")             :(OUTEND)
OUTFIL  OUT     = "FILE 1)  " F2
        OUT     = "FILE 2)  " F3
        OUT     = DEFINE("OUTFIL(LISTX,MARK,STARS)X",.OUTFIL2)
*
*   THIS ENTRY IS USED ON ALL OCCASIONS AFTER FIRST
*
OUTFIL2 OUT     = STARS
OUTFIL3 OUT     = MARK OBJECT(DIFFER(X = HEAD(LISTX)) X):F(RETURN)
        HEAD(LISTX) = LINK(X)
        IDENT(CURRENT(LISTX),X)                         :S(RETURN)F(OUTFIL3)
OUTEND
*
*   ROUTINE TO PUT LINES TO OUTPUT FILE.
*
        DEFINE("PUT()X,Y,XO,YO")                        :(ERREND)
PUT     DIFFER(LEN)                                     :S(PUTDB)
        DIFFS   = DIFFS + 1
        OUTFIL(FILE1,"1)   ",STSTARS)
        OUTFIL(FILE2,"2)   ",STARS)                     :(RETURN)
*
*   HERE TO OUTPUT DIFFERENCES IN DOUBLE COLUMN FORMAT
*
PUTDB   OUT     = RPAD("FILE 1) " F2,LEN) "| " "FILE 2) " F3
        DEFINE("PUT()X,Y,XO,YO",.PUTD)
*
*   ENTRY USED FOR DOUBLE COLUMN FORMAT AFTER INITIAL ENTRY
*
PUTD    DIFFS   = DIFFS + 1; OUT = STSTARS
        XO      = X = HEAD(FILE1); YO = Y = HEAD(FILE2)
*
*   CHECK WHETHER BOTH LISTS FINISHED
*
PUTD1   HEAD(FILE1) = IDENT(X,IDENT(Y))  CURRENT(FILE1) :F(PUTD2)
        HEAD(FILE2) = CURRENT(FILE2)                    :(RETURN)
*
*   PRINT A LINE GIVING DIFFERENCES IN ADJACENT COLUMNS
*
PUTD2   OUT     = (DIFFER(X) GT(SIZE(XO = OBJECT(X)),LEN)
+           SUBSTR(XO,1,LEN),RPAD(XO,LEN))
+           "| "
+           (DIFFER(Y) GT(SIZE(YO = OBJECT(Y)),LEN) SUBSTR(YO,1,LEN),YO)
        X       = (IDENT(X),(DIFFER(CURRENT(FILE1),X) LINK(X),XO = ))
        Y       = (IDENT(Y),(DIFFER(CURRENT(FILE2),Y) LINK(Y),YO = )):(PUTD1)
*
*   THIS ROUTINE IS ENTERED IF A SPITBOL ERROR OCCURS - USED FOR BUGS
*
ERR     SETEXIT(EQ(&ERRTYPE,116) .ERR)                  :S(CMER)
        TERMINAL = OUTPUT =  "ERROR: " &ERRTEXT " IN STMT " &LASTNO
        COLLECT()
        DUMP(2)                                         :(END)
ERREND
*
*
*   MAIN PROGRAM
*   ============
*
*   NORMAL ENTRY POINT TO MAIN PROGRAM
*
INIT    LEN     = LENM = NNP = DIFFS = BLANKS =
        NN      = 3; RECL = "R133"
*
*   READ COMMAND LINE EITHER FROM INPUT FILE OR TERMINAL.
*   IF ERROR AND IF BATCH, PRINT ERROR MESSAGE AND STOP.
*
        OUTPUT  = DIFFER(BATCH) TERMINAL                :S(END)
        X       = BATCH = INPUT                         :F(TERMI)
        DETACH(.TERMINAL)                               :(DIFFX)
*
*   READ COMMAND LINE FROM TERMINAL
*
TERMI   TERMINAL = "*"; X = TERMINAL                    :F(END)
*
*   CHECK FOR NULL COMMAND LINE
*
DIFFX   DIFFER(X)                                       :F(END)
*
*   LOOP TO PROCESS SWITCHES. NO. OF LINES IN MATCH IS 3 BY DEFAULT.
*
SWPAT   X SWPAT = N                                     :S(SWPAT)
        TERMINAL = DIFFER(SL) "? INVALID SWITCH"        :S(INIT)
        X CMDPAT                                        :S(GO)
*
*   MERGE FROM ERR IF BAD FILENAMES
*
CMER    TERMINAL = "?COMMAND ERROR IN " X               :(INIT)
GO      OUTPUT(.OUT,3,F1)                      :S(GO1)
        TERMINAL = "?CAN'T ENTER OUTPUT FILE " F1       :(INIT)
GO1     INPUT(.IN1,1,F2)                :S(GO2)
        TERMINAL = "?CAN'T READ INPUT FILE 1 " F2       :(INIT)
GO2     INPUT(.IN2,2,F3)                :S(START)
        TERMINAL = "?CAN'T READ INPUT FILE 2 "  F3      :(INIT)
*
START   TERMINAL =
        STSTARS = DUPL("*",(DIFFER(LEN,IDENT(LENM)) 2 * LEN + 2,12))
        THISL   = FILE1 = LIST(,,,,,.IN1)
        THATL   = FILE2 = LIST(,,,,,.IN2)
*
*   HUNT THROUGH FILES TILL DIFFERING LINES FOUND (IF ANY)
*
PHASE1  THIS    = $INP(THISL)                           :F(THISEND)
*
THAT    THAT    = $INP(THATL)                           :F(THATEND)
*
IDENT   IDENT(THIS,THAT)                                :S(PHASE1)
*
*   MAKE SURE DIFFERENCE IS NOT MERELY BLANK LINES IF BLANKS NULL.
*
        DIFFER(BLANKS)                                  :S(DIFF)
        IDENT(THAT)                                     :S(THAT)
        DIFFER(THIS)                                    :S(DIFF)
        THIS    = $INP(THISL)                           :S(IDENT)F(THISEND)
*
*   ARRIVE HERE WHEN A DIFFERENCE IS ENCOUNTERED. PHASE 2
*   ASSESSES HOW MUCH DIFFERENCE THERE IS AND PRINTS DIFFERENCES.
*   IT OPERATES BY READING A LINE ALTERNATELY FROM EACH OF THE
*   TWO INPUT FILES, ADDING IT TO THE APPROPRIATE LIST AND ATTEMPTING
*   TO MATCH A CURRENT LINE AGAINST LINES HELD FOR THE OTHER FILE.
*   AT ALL TIMES SUFFICIENT LINES ARE KEPT FOLLOWING THE CURRENT LINE
*   SO THAT A COMPLETE MATCH CHECK CAN BE MADE.
*
DIFF    CURRENT(THATL) = ADD(THATL,THAT)
*
*   MERGE AGAIN AFTER DEALING WITH ONE DIFFERENCE TO DO ANOTHER
*
ENTER   CURRENT(THISL) = ADD(THISL,THIS)
*
*   ENTER ANOTHER LINE INTO THISL LIST, ADVANCE CURRENT(THISL)
*   TO CORRESPOND AND SEE IF NEW CURRENT LINE MATCHES
*   ANY LINE IN OTHER LIST. THIS FILE IS EXHAUSTED EITHER IF
*   CURRENT IS ALREADY NULL OR IF LINK OF CURRENT IS NULL.
*
PHASE2  ADD(THISL,THIS = $INP(THISL))
        (IDENT(X = CURRENT(THISL)),IDENT(X = CURRENT(THISL)
+           = LINK(X)))                                 :S(THISEND)
*
*   LOOP HERE TILL NN LINES AVAILABLE INCLUDING "CURRENT" LINE
*   OF EACH LIST SO THAT MATCH TEST CAN BE DONE.
*
ADD1    LE(NN + 1,COUNT(THISL))                         :S(ADD2)
        ADD(THISL,$INP(THISL))                          :S(ADD1)
ADD2    LE(NN,COUNT(THATL))                             :S(TRYMAT)
        ADD(THATL,$INP(THATL))                          :S(ADD2)
*
*   ATTEMPT A MATCH
*
TRYMAT  X       = MATCH(OBJECT(X))                      :S(PHASE3)
*
*   ARRIVE HERE WHILST STILL ATTEMPTING MATCH. IF END FILE NOT READ
*   ON THAT LIST THEN SWAP LISTS AND TRY MATCHING USING IT INSTEAD.
*
        DIFFER(EOF(THATL))                              :S(PHASE2)
        X       = THISL; THISL = THATL; THATL = X       :(PHASE2)
*
*   MATCH ATTEMPT WAS SUCCESSFUL SO OUTPUT  THIS LIST
*   AND PORTION OF THAT LIST UP TO AND INCLUDING MATCHING LINE.
*   THEN ADVANCE HEAD OF THATL PAST THE FURTHER NN-1 LINES MATCHED
*   WHICH ARE NO LONGER OF INTEREST AND CLEAR THISL SINCE ALL ITS LINES
*   ARE DEALT WITH.
*
PHASE3  Y       = CURRENT(THATL)
        CURRENT(THATL) = (IDENT(NNP) X,THAPT)
        CURRENT(THISL) = DIFFER(NNP) TAIL(THISL)
        PUT()
        CURRENT(THATL) = Y
        HEAD(THATL) = LINK(THAPT)
        COUNT   = COUNT(THATL) = COUNT(THATL) - CDEC
        CURRENT(THATL) = LT(COUNT,NN) HEAD(THATL)
        HEAD(THISL) = COUNT(THISL) =
*
*   NOW POSITION FILES TO ANOTHER DIFFERENCE OR UNTIL THATL EMPTY.
*   RIGHT NOW, THISL IS EMPTY.
*
POSIT   IDENT(X = HEAD(THATL))                          :S(PHASE1)
        THIS    = $INP(THISL)                           :F(THISEND)
        IDENT(THIS,OBJECT(X))                           :F(ENTER)
        HEAD(THATL) = LINK(X)
        CURRENT(THATL) = LT(COUNT(THATL) = COUNT(THATL) - 1,NN) HEAD(THATL)
+                                                       :(POSIT)
*
*   HERE TO SET EOF FLAG FOR THISL. DONE BY SWAPPING LISTS
*   AND JUMPING INTO CODE FOR THATL.
*
THISEND X       = THISL; THISL = THATL; THATL = X       :(EOF)
*
*   MARK ENDFILE OF THATL BY SETTING EOF NON-NULL.
*   ON ARRIVAL HERE FIRST STORE LINE JUST READ FROM THIS FILE
*
THATEND CURRENT(THISL) = ADD(THISL,THIS)
*
*   TEST FOR END CONDITION WHICH IS THAT WE HAVE EOF ON BOTH LISTS.
*   WE KNOW WE HAVE EOF ON THAT LIST SINCE WE ARE ABOUT TO SET FLAG.
*
*
EOF     EOF(THATL) = "EOF"
        IDENT(EOF(THISL))                               :S(PHASE2)
*
*   EOF REACHED ON BOTH FILES. OUTPUT BOTH LISTS AND PACK UP.
*
        IDENT(HEAD(THISL),IDENT(HEAD(THATL)))           :S(CLOSE)
        CURRENT(THISL) = TAIL(THISL)
        CURRENT(THATL) = TAIL(THATL)
        PUT()
*
*   FINISH BY TYPING OUT DIFFERENCE COUNT
*
CLOSE   TERMINAL = IDENT(DIFFS) "NO DIFFERENCES FOUND"  :F(DIFFS)
        OUT     = DIFFER(BATCH) TERMINAL                :(STOP)
*
DIFFS   OUT     = STSTARS
        TERMINAL = "% " DIFFS "  DIFFERENCE" (NE(DIFFS,1) "S",) " FOUND"
        OUT     = DIFFER(BATCH) TERMINAL
*
STOP    TERMINAL = DEFINE("OUTFIL(LISTX,MARK,STARS)X")
+           DEFINE("PUT()X,Y,XO,YO")                    :(INIT)
END