home *** CD-ROM | disk | FTP | other *** search
/ Oakland CPM Archive / oakcpm.iso / cpm / forth-83 / f83v2-80.ark / HUFFMAN.BLK < prev    next >
Text File  |  1987-02-05  |  43KB  |  1 lines

  1. \               The Rest is Silence                   09APR84HHL*************************************************************   *************************************************************   ***                                                       ***   ***    Please direct all questions, comments, and         ***   ***    miscellaneous personal abuse to:                   ***   ***                                                       ***   ***    Henry Laxen          or    Michael Perry           ***   ***    1259 Cornell Avenue        1125 Bancroft Way       ***   ***    Berkeley, California       Berkeley, California    ***   ***    94706                      94702                   ***   ***                                                       ***   ***    (415) 525-8582             (415) 644-3421          ***   ***                                                       ***   *************************************************************   *************************************************************   \ Load Screen for Huffman Encoding/Decoding           29MAY84HHL                                                                3 20 THRU   FORTH                                               CR .( Huffman Utility Loaded )      EXIT                                                                                                               USAGE                                    To compress a file type:                                           COMPRESS INFILE1.EXT OUTFILE1.EXT                            To expand a file type:                                             EXPAND   INFILE2.EXT OUTFILE2.EXT                                                                                            Where INFILE2.EXT had better be the OUTFILE1.EXT of a prior     compression.  After either a COMPRESS or EXPAND executing       EMPTY will reset the dictionary back to its original state.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     \ Shared primitives - Double Number Helpers           29MAR84HHL: D+!   (S d# addr -- )                                            DUP 2@   ROT >R   D+   R> 2!   ;                             : ARRAY                                                            CREATE   (S n -- )   2* ALLOT                                   DOES>    (S n -- addr )   SWAP 2* +   ;                      : 2ARRAY                                                           CREATE   (S n -- )   4 * ALLOT                                  DOES>    (S n -- addr )   SWAP   2* 2* +   ;                 : 0OR1      (S n -- 0 | 1 )   0<> 1 AND   ;                                                                                     : HAPPY     (S -- )                                                DOES>   DUP @ IF   OFF   ."  ...Working "                                     ELSE  ON   12 SPACES   THEN                       12 BACKSPACES   ;  HAPPY                                                                                                     \ Read and Write bytes from a file                    29MAR84HHLVARIABLE READING?    ( True=reading False=writing )             : IO-ARRAY   (S n -- )                                             CREATE   DUP ,   2* ALLOT                                       DOES>   DUP @   READING? @ IF  DROP  ELSE  +  THEN  2+  ;                                                                      2 IO-ARRAY #BITS   ( Counts up to 8 )                           4 IO-ARRAY #BYTES  ( Number of bytes sent so far )              8 IO-ARRAY BIT-BUFFER   ( Holds 1 byte worth of bits )        128 IO-ARRAY IO-BUFFER    ( Holds sectors worth of data )         2 IO-ARRAY >FILE        ( Points to file control block )                                                                      : INIT-IO   (S -- )                                                READING? OFF   2 0 DO   0 #BITS !   0. #BYTES 2!                   BIT-BUFFER 8 ERASE  IO-BUFFER 128 CONTROL Z FILL             READING? ON   LOOP   ;                                       \ File System Interface                               10Apr84map: PERFORM-IO   (S -- )   [ DOS ]                                   HAPPY   IO-BUFFER SET-DMA   >FILE @                             READING? @ IF   READ   ELSE   WRITE   THEN     ;             : FILE-SIZE   (S -- d# )   [ DOS ]                                 >FILE @   FILE-SIZE  128 UM*   ;                             : REWIND      (S fcb -- )   [ DOS ]                                DUP CLOSE                                                       DUP 12 + 21 ERASE  ( Clean up the FCB )                             15 BDOS DROP ( Open )   ;                                : CLOSE   (S -- )   [ DOS ]                                        >FILE @   CLOSE  ;                                                                                                                                                                                                                                                                                                           \ File System Interface                               10Apr84map: INPUT   (S -- )   READING? ON   ;                             : OUTPUT  (S -- )   READING? OFF  ;                             CREATE IN-FCB B/FCB ALLOT   CREATE OUT-FCB B/FCB ALLOT          : IN&OUT   (S -- )                                                 FILE @                                                          INPUT IN-FCB >FILE !   OUTPUT OUT-FCB >FILE !   [ DOS ]         IN-FCB !FCB   OUT-FCB !FCB                                      IN-FCB      15 BDOS DOS-ERR? ABORT" Can't open file "           OUT-FCB DUP DELETE DROP  MAKE-FILE                              FILE !   ;                                                                                                                                                                                                                                                                                                                                                                                   \ Read and write bytes to a file                      10Apr84map: @BYTE   (S -- n )                                                INPUT   #BYTES 2@ DROP   127 AND                                DUP 0= IF   PERFORM-IO   THEN   IO-BUFFER +  C@                 1. #BYTES D+!   ;                                            : !BYTE   (S n -- )                                                OUTPUT   #BYTES 2@ DROP  127 AND IO-BUFFER + C!                 1. #BYTES D+!  #BYTES 2@ DROP  127 AND                          0= IF   PERFORM-IO   THEN   ;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                \ Convert bytes into bits                             10Apr84map: (!BYTE)   (S -- )                                                0 BIT-BUFFER 8 BOUNDS DO   2* I C@ +    LOOP   !BYTE   ;     : FLUSH-BYTE   (S -- )                                             OUTPUT   #BITS @ IF   (!BYTE)   THEN                            #BYTES 2@ DROP   127 AND IF   PERFORM-IO   THEN  CLOSE  ;    : (@BYTE)   (S -- )                                                @BYTE   BIT-BUFFER 8 BOUNDS DO                                    DUP 128 AND 0OR1   I C!  2*   LOOP   DROP ;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                \ Read or Write a Bitstream                           03JUN84HHL: !BIT   (S n -- )                                                 OUTPUT   0OR1   #BITS @ BIT-BUFFER + C!   1 #BITS +!            #BITS @ 8 = IF   (!BYTE)   #BITS OFF   THEN   ;              : @BIT   (S -- n )                                                 INPUT   #BITS @ 0= IF   (@BYTE)   8 #BITS !   THEN              8 #BITS @ - BIT-BUFFER + C@   -1 #BITS +!   ;                : !BITS   (S c n -- )                                              OUTPUT                                                          TUCK 16 SWAP - 0 ?DO   2*   LOOP                                SWAP 0 ?DO   DUP 32768 AND !BIT   2*   LOOP   DROP   ;       : @BITS   (S n -- c )                                              INPUT                                                           0 SWAP   0 ?DO   2* @BIT +   LOOP   ;                                                                                                                                                        \ Build a Frequency Table                             29MAR84HHLVOCABULARY COMPRESSING    COMPRESSING DEFINITIONS                                                                               256 2ARRAY FREQUENCY-TABLE                                      : INCLUDE   (S char -- )                                           FREQUENCY-TABLE   1. ROT D+!   ;                                                                                             256 ARRAY HUFFMAN                                               2VARIABLE  MIN1   2VARIABLE  MIN2                                VARIABLE >MIN1    VARIABLE >MIN2                                                                                                                                                                                                                                                                                                                                                                                                                               \ Construct a Huffman Code                            29MAR84HHL: MINIMUMS   (S -- f )                                             -1. MIN1 2!   -1. MIN2 2!                                       256 0 DO   I FREQUENCY-TABLE 2@   2DUP D0= NOT IF                  MIN1 2@ 2OVER DU< NOT IF                                           MIN1 2@ MIN2 2!   >MIN1 @ >MIN2 !                               2DUP    MIN1 2!   I       >MIN1 !                            ELSE   MIN2 2@ 2OVER DU< NOT IF                                    2DUP    MIN2 2!   I       >MIN2 !   THEN                  THEN   THEN   2DROP   LOOP   MIN2 2@ -1. D= NOT   ;                                                                                                                                                                                                                                                                                                                                                                                                          \ Construct a Huffman Code                            04DEC83HHL: JOIN-MINIMUMS   (S -- )                                          MIN1 2@ MIN2 2@ D+   ( new value )                                 >MIN1 @ FREQUENCY-TABLE 2!                                   0. >MIN2 @ FREQUENCY-TABLE 2! ( remove old value )   ;       : ENCODE-MINIMUMS   (S -- )                                        HERE   >MIN1 @ HUFFMAN @ ,   >MIN2 @ HUFFMAN @ ,                       >MIN1 @ 256 *   >MIN2 @ + ,                              >MIN1 @ HUFFMAN !   ;                                        : ENCODE   (S -- )                                                 BEGIN   MINIMUMS WHILE                                             ENCODE-MINIMUMS   JOIN-MINIMUMS                              REPEAT   ;                                                                                                                                                                                                                                                   \ Display a Huffman Code                              13APR84HHLCREATE >HLD  128 ALLOT  VARIABLE HLD   0 HLD !                  : +HOLD     >HLD HLD @ + C!   1 HLD +!   ;                      : -HOLD     -1 HLD +!     ;                                     256 ARRAY H-CODE                                                : .HOLD  (S char -- )                                              HERE SWAP H-CODE !   HLD @ C,   >HLD HERE HLD @                 DUP ALLOT CMOVE   ;                                          : DECODE   RECURSIVE   (S addr -- )                                        0 +HOLD   DUP    @ IF   DUP    @ DECODE                       ELSE   DUP 4 + @ FLIP 255 AND  .HOLD   THEN               -HOLD   1 +HOLD   DUP 2+ @ IF   DUP 2+ @ DECODE                       ELSE   DUP 4 + @      255 AND  .HOLD   THEN               -HOLD   DROP   ;                                             : FLATTEN   (S -- )                                                >MIN1 @ HUFFMAN @    DECODE ;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                \ Compress a string into its Huffman Equivalent       29MAY84HHL: COMPRESS-BYTE   (S n -- )                                        H-CODE @ COUNT   BOUNDS ?DO   I C@ !BIT   LOOP ;             : COMPRESS-ENCODING   (S -- )                                      256 0 DO   I H-CODE @ COUNT                                        DUP IF   1 !BIT   DUP 7 !BITS                                      BOUNDS DO   I C@ !BIT   LOOP                                 ELSE   0 !BIT   2DROP   THEN                                LOOP   ;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      \ Read and file and Encode and Compress it            29MAR84HHL: COMPRESS-FILE   (S d# -- )                                       BEGIN   2DUP D0= NOT WHILE                                         1. D-   @BYTE COMPRESS-BYTE                                  REPEAT   FLUSH-BYTE   2DROP   ;                              : ENCODE-FILE   (S d# -- )                                         BEGIN   2DUP D0= NOT WHILE                                         1. D-   @BYTE INCLUDE                                        REPEAT  2DROP   ;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            \ Read the input file and write the compessed file    29MAY84HHLCREATE ZERO 0 ,                                                 : INITIALIZE   (S -- )                                             256 0 DO                                                           0. I FREQUENCY-TABLE 2!  0 I HUFFMAN !   ZERO I H-CODE !     LOOP   INIT-IO   IN&OUT   ;                                  FORTH DEFINITIONS                                               : COMPRESS   (S -- )                                               [ COMPRESSING ]   INITIALIZE  INPUT  FILE-SIZE 2DUP             ENCODE-FILE   ENCODE  FLATTEN                                   INIT-IO   INPUT   >FILE @ REWIND                                12345 16 !BITS    2DUP 16 !BITS  16 !BITS                       COMPRESS-ENCODING  2DUP COMPRESS-FILE  2DROP   ;                                                                                                                                                                                                             \ Expand a compressed file                            03JUN84HHLVOCABULARY EXPANDING   EXPANDING DEFINITIONS                    VARIABLE ROOT                                                   : EXPAND-BITS   (S len char  -- )                                  -256 +    ROOT @    ROT 0 DO                                       @BIT 2* +    DUP @   DUP IF   NIP                               ELSE   DROP   HERE DUP ROT !   0 , 0 , 0 ,   THEN            LOOP   4 + !   ;                                                                                                             : EXPAND-ENCODING   (S -- )                                        HERE ROOT !   0 , 0 , 0 ,                                       256 0 DO   @BIT IF   7 @BITS I EXPAND-BITS   THEN               LOOP   ;                                                                                                                                                                                                                                                     \ Expand the input stream                             04Jun84map: LEAF?   (S addr -- f )                                           4 + @ -256 AND  ;                                            : EXPAND-BYTE   (S -- char )                                       ROOT @ BEGIN   @BIT 2* + @   DUP LEAF? UNTIL                    4 + @ 255 AND  ;                                             : EXPAND-FILE   (S d# -- )                                         BEGIN   2DUP D0= NOT WHILE                                         1. D-   EXPAND-BYTE !BYTE                                    REPEAT   FLUSH-BYTE   2DROP   ;                                                                                                                                                                                                                                                                                                                                                                                                                              \ Expand a Compressed File                            28MAY84HHL: INITIALIZE   (S -- )                                            INIT-IO   IN&OUT  ;                                           FORTH DEFINITIONS                                               : EXPAND                                                           [ EXPANDING ]   INITIALIZE                                      16 @BITS 12345 <> ABORT" Not a Compressed file "                16 @BITS 16 @BITS  SWAP   EXPAND-ENCODING                       EXPAND-FILE   ;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              \               Huffman File Compression              29MAY84HHL                                                                                                                                   This application was written by Henry Laxen and is in the    public domain.  Please credit the author when distributing it.  You are free to make copies, modify, publish, or ignore this    as the fancy suits you.                                            I apologize for the speed of this program (actually the lack thereof) but I wanted it to be totally transportable across     different machines, and hence all of the bit twiddling is       done in high level.  You could speed this up substantially by   writing @BIT and !BIT in code.                                  My thanks to Andrea Fischel for showing me how to recreate the  Huffman tree based on the compressed bit encoding.                                                                                                                                              \ Load Screen for Huffman Encoding/Decoding           09APR84HHL                                                                The purpose of this utility is to COMPRESS and EXPAND files     in order to save disk space.  A Huffman encoding is used in     order to achieve this compression.  An excellant description    of how Huffman codes work can be found in Volume 1 of Knuth.    The general idea is that a frequency table is built which       contains the number of occurances of each character in the      file to be compressed.  Based on this frequency table, each     8 bit byte is encoded as a variable length bit pattern.         Obviously, the frequently occuring bytes are encoded in less    than 8 bits, and the rarely occuring bytes are encoded in more  than 8 bits.  Very dramatic compression can be achieved with    this scheme.  In particular, BLK files can be substantially     compressed because of the large number of blanks present.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       \ Shared primitives - Double Number Helpers           09APR84HHLD+!   (S d# addr -- )                                              Increment the double number at addr by the d# on the stack.  ARRAY                                                              Define a word sized array in memory.  At runtime given the      index into the array, return the address of the element.     2ARRAY                                                             Define a double work sized array in memory.  Acts like          ARRAY above.                                                 0OR1      (S n -- 0 | 1 )  map 0 -> 0 and, all others -> 1                                                                      HAPPY     (S -- )                                                  Alternately print the string "...Working" or a string           of blanks, each time it is called.  This keeps the user         happy, since he believes the machine is still working.                                                                       \ Read and Write bytes from a file                    09APR84HHLREADING?   Used by file interface to distinguish read & write   IO-ARRAY   (S n -- )                                               Allows you to use the same name for a read or write version     of an array or variable.  Returns corresponding address.                                                                     #BITS        The number of bits mod 8 sent so far               #BYTES       Total number of bytes sent so far.                 BIT-BUFFER   Used to buffer 1 byte worth of bits for IO         IO-BUFFER    Used to hold 1 sector's worth of data              >FILE        Points to FCB of file to read or write                                                                             INIT-IO   (S -- )                                                  Initialize all of the IO variables defined above, and           set the initial state to reading.                                                                                            \ File System Interface                               09APR84HHLPERFORM-IO   (S -- )                                               Let user know we are still alive, and either read or write      a sector, depending on the IO direction.                     FILE-SIZE   (S -- d# )                                             Return the size in bytes of the current file.                REWIND      (S fcb -- )                                            Allows you to reread a file for the second pass.  Closes it     at sets up the FCB so that the next read occurs at the          beginning of the file.                                       CLOSE   (S -- )                                                    Close the currently open file.                                                                                                                                                                                                                                                                                               \ File System Interface                               09APR84HHLINPUT   (S -- )   Set IO state to reading.                      OUTPUT  (S -- )   Set IO state to writing.                      IN-FCB OUT-FCB      Reserved for input & output FCBs            IN&OUT   (S -- )                                                   Save the current Screen file, and read the input stream         for the name of the input and output file.  These names         are parsed and the fcbs are placed in the arrays above.         The input file is opened, and the output file is deleted        and created.  If an error occurs, the user is notified.         Finally the current Screen file is restored.                                                                                                                                                                                                                                                                                                                                                 \ Read and write bytes to a file                      09APR84HHL@BYTE   (S -- n )                                                  Read a byte from the input file, and place in on the stack.     This is the primitive through which all reads must pass,        since only it performs any actual IO.                        !BYTE   (S n -- )                                                  Take the byte from the stack and add it to the output file.     This is the primitive through which all writes must pass,       since only it performs any actual IO.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        \ Convert bytes into bits                             09APR84HHL(!BYTE)   (S -- )                                                  Pack together the bits in the bit buffer, and write result.  FLUSH-BYTE   (S -- )                                               If there are any leftover bits to write, write them, and        then perhaps flush the partially completed sector to disk.   (@BYTE)   (S -- )                                                  Read the next byte from the input file and unpack the bits      into the bit buffer.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         \ Read or Write a Bitstream                           09APR84HHL!BIT   (S n -- )                                                   Write a single bit to the output file.                                                                                       @BIT   (S -- n )                                                   Read a single bit from the input file.                                                                                       !BITS   (S c n -- )                                                Write up to 16 bits to the output file.  All bit level          write operations should use this word, and not !BIT above.                                                                   @BITS   (S n -- c )                                                Read up to 16 bits from the input file.  All bit level          read operations should use this word, and not @BIT above.                                                                                                                                    \ Build a Frequency Table                             09APR84HHLCOMPRESSING    Segregate the COMPRESSING portion of the utility                                                                 FREQUENCY-TABLE   Contains the number of occurances of each byteINCLUDE   (S char -- )                                             Increment the count in the frequency table for this char.                                                                    HUFFMAN         Used to build the tree of codes                  MIN1     MIN2  Contains the 2 smallest values in the Freq. Tab.>MIN1    >MIN2  Contains the index to the 2 smallest values                                                                                                                                                                                                                                                                                                                                                                                                     \ Construct a Huffman Code                            09APR84HHLMINIMUMS   (S -- f )                                               Run through the frequency table and find the two smallest       entries in it.  Since these are counts, we use an unsigned      comparison.  The minimum values found are stored in the         double variables MIN1 and MIN2.  The index into the table       of these values is stored in variables >MIN1 and >MIN2.         The flag returned is true if two minimums exist, and false      if there is only one entry left in the table.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                \ Construct a Huffman Code                            09APR84HHLJOIN-MINIMUMS   (S -- )                                            Combine the two minimum values found in the frequency table     into a new value which is the sum of the previous values.       Set the other minimum to zero, removing it.                  ENCODE-MINIMUMS   (S -- )                                          Generate the Huffman tree based on the two new minimum          values found in the frequency table.  The character values      are packed, two to a word.                                   ENCODE   (S -- )                                                   While minimums exist in the frequency table, we construct       our tree and combine them.  The end result of ENCODE is a       full tree, whose leaves contain characters.                                                                                                                                                                                                                  \ Display a Huffman Code                              29MAY84HHL>HLD  HLD    Collect the path data while searching the tree     +HOLD        Append the character to the path string            -HOLD        Delete the character from the path string          H-CODE       An array which points to the encoding for a char.  .HOLD        Write the collected string to the dictionary.      WHICH        Holds the character we are looking for             DECODE       A recursive, inorder search of the Huffman Tree       The leftmost nodes are searched for a leaf node.  The path      taken is collected using the HOLD mechanism set up above.       If the leaf does not match the character we are searching       for, we back up the path string and back up the path one        level, and search the right node.  When we do match, we         write out the collected path string.                         FLATTEN                                                            Flatten the Huffman Tree into an array indexed by character.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 \ Compress a string into its Huffman Equivalent       09APR84HHLCOMPRESS-BYTE   (S n -- )                                          Write the huffman code for the given byte to output file.    COMPRESS-ENCODING   (S -- )                                        We represent the encoding as follows:  If the character is      not present in the file, ie. the length of the huffman code     is zero, then we write out a 0 bit.  If the character is        present, we write a 1 bit, followed by 7 bits representing      the length of the encoding, followed by the encoding itself.                                                                                                                                                                                                                                                                                                                                                                                                                                                                 \ Read and file and Encode and Compress it            09APR84HHLCOMPRESS-FILE   (S d# -- )                                         Read through a file containing d# bytes of data, and            compress each byte per its Huffman code.                                                                                     ENCODE-FILE   (S d# -- )                                           Read through a file containing d# bytes of data, and            build a frequency table for it.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              \ Read the input file and write the compessed file    29MAY84HHLINITIALIZE   (S -- )                                               Initialize all of the relevant variables in order to compress   a file into its Huffman equivalent.                                                                                                                                                          COMPRESS   (S -- )                                                 Takes two arguments from the input stream, the input file       name and the output file name.  The input file is read and      compressed into the output file.  Every file created by         compress starts with two bytes containing 12345 followed by     32 bits of file length in bytes, followed by the compression.                                                                                                                                                                                                                                                                \ Expand a compressed file                            29MAY84HHLEXPANDING     Segregate the words associated with expanding.    ROOT       Points to the root of the rebuilt tree.              EXPAND-BITS   (S len char  -- )                                    Add a leaf to the tree containing char.   We read len bits      and either follow or create tree nodes depeding on the value    of the bit.  A leaf has a hex ff in byte 5, data in 4.       EXPAND-ENCODING   (S -- )                                          Initialize the tree to have 1 node, the root.                   Read through the compressed encoding description and            create the corresponding tree.                                                                                                                                                                                                                                                                                                                                                               \ Expand the input stream                             29MAY84HHLLEAF?   (S addr -- f )                                             Return non-zero if the node at addr is a leaf of the tree.   EXPAND-BYTE   (S -- )                                              Read bits from the file and follow the branches of the tree     until we hit a leaf.  Return the corresponding char.         EXPAND-FILE   (S d# -- )                                           Read d# bytes from the input file and expand them, writing      the expanded data to the output file.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        \ Expand a Compressed File                            09APR84HHLINITIALIZE   (S -- )                                               Initialize variables, and get file names from input stream                                                                   EXPAND                                                             The first 16 bits of the file to expand must be 12345, or       else we are trying to expand a file that we did not             compress.  This would be fatal.  The length is in the           next 32 bits, followed by the encoding & the data.