home *** CD-ROM | disk | FTP | other *** search
/ Source Code 1992 March / Source_Code_CD-ROM_Walnut_Creek_March_1992.iso / usenet / altsrcs / 3 / 3330 < prev    next >
Internet Message Format  |  1991-05-15  |  22KB

  1. From: GHGAEA8@cc1.kuleuven.ac.be
  2. Newsgroups: alt.sources
  3. Subject: LZRW1/KH
  4. Message-ID: <91135.105825GHGAEA8@cc1.kuleuven.ac.be>
  5. Date: 15 May 91 10:56:24 GMT
  6.  
  7. ---------------------------------------------------------------
  8.     This posting includes the sources for the Turbo Pascal
  9. version of the LZRW1/KH compression algoritm.
  10. ---------------------------------------------------------------
  11. File #1 : The LZRW1KH unit
  12. --------------------------
  13.  
  14. {    ###################################################################   }
  15. {    ##                                                               ##   }
  16. {    ##      ##    ##### #####  ##   ##  ##      ## ##  ## ##  ##     ##   }
  17. {    ##      ##      ### ##  ## ## # ## ###     ##  ## ##  ##  ##     ##   }
  18. {    ##      ##     ###  #####  #######  ##    ##   ####   ######     ##   }
  19. {    ##      ##    ###   ##  ## ### ###  ##   ##    ## ##  ##  ##     ##   }
  20. {    ##      ##### ##### ##  ## ##   ## #### ##     ##  ## ##  ##     ##   }
  21. {    ##                                                               ##   }
  22. {    ##   EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM  ##   }
  23. {    ##                                                               ##   }
  24. {    ###################################################################   }
  25. {    ##                                                               ##   }
  26. {    ##   This unit implements the updated LZRW1/KH algoritm which    ##   }
  27. {    ##   also implements  some RLE coding  which is usefull  when    ##   }
  28. {    ##   compress files  containing  a lot  of consecutive  bytes    ##   }
  29. {    ##   having the same value.   The algoritm is not as good  as    ##   }
  30. {    ##   LZH, but can compete with Lempel-Ziff.   It's the fasted    ##   }
  31. {    ##   one I've encountered upto now.                              ##   }
  32. {    ##                                                               ##   }
  33. {    ##                                                               ##   }
  34. {    ##                                                               ##   }
  35. {    ##                                                Kurt HAENEN    ##   }
  36. {    ##                                                               ##   }
  37. {    ###################################################################   }
  38.  
  39. UNIT LZRW1KH;
  40.  
  41. INTERFACE
  42.  
  43. CONST     BufferMaxSize  = 32768;
  44.           BufferMax      = BufferMaxSize-1;
  45.           FLAG_Copied    = $80;
  46.           FLAG_Compress  = $40;
  47.  
  48. TYPE BufferIndex    = 0..BufferMax;
  49.      BufferSize     = 0..BufferMaxSize;
  50.      BufferArray    = ARRAY [BufferIndex] OF BYTE;
  51.      BufferPtr      = ^BufferArray;
  52.  
  53. FUNCTION  Compression    (    Source,Dest    : BufferPtr;
  54.                               SourceSize     : BufferSize   )    : BufferSize;
  55.  
  56. FUNCTION  Decompression  (    Source,Dest    : BufferPtr;
  57.                               SourceSize     : BufferSize   )    : BufferSize;
  58.  
  59. IMPLEMENTATION
  60.  
  61. TYPE HashTable      = ARRAY [0..4095] OF INTEGER;
  62.  
  63. FUNCTION  GetMatch       (    Source         : BufferPtr;
  64.                               X              : BufferIndex;
  65.                               SourceSize     : BufferSize;
  66.                           VAR Hash           : HashTable;
  67.                           VAR Size           : WORD;
  68.                           VAR Pos            : BufferIndex  )    : BOOLEAN;
  69. VAR  HashValue      : WORD;
  70. BEGIN
  71.      HashValue := (40543*((((Source^[X] SHL 4) XOR Source^[X+1]) SHL 4) XOR
  72.                  Source^[X+2]) SHR 4) AND $0FFF;
  73.      GetMatch := FALSE;
  74.      IF (Hash[HashValue] <> -1) AND (X-Hash[HashValue] < 4096) THEN
  75.           BEGIN
  76.                Pos := Hash[HashValue];
  77.                Size := 0;
  78.                WHILE ((Size < 18) AND (Source^[X+Size] = Source^[Pos+Size])
  79.                     AND (X+Size < SourceSize)) DO
  80.                     INC(Size);
  81.                GetMatch := (Size >= 3)
  82.           END;
  83.      Hash[HashValue] := X
  84. END;
  85.  
  86.  
  87. FUNCTION  Compression    (    Source,Dest    : BufferPtr;
  88.                               SourceSize     : BufferSize   )    : BufferSize;
  89. VAR  Hash                     : HashTable;
  90.      Key,Bit,Command,Size     : WORD;
  91.      X,Y,Z,Pos                : BufferIndex;
  92. BEGIN
  93.      FOR Key := 0 TO 4095 DO Hash[Key] := -1;
  94.      Dest^[0] := FLAG_Compress;
  95.      X := 0;
  96.      Y := 3;
  97.      Z := 1;
  98.      Bit := 0;
  99.      Command := 0;
  100.      WHILE (X < SourceSize) AND (Y <= SourceSize) DO
  101.           BEGIN
  102.                IF (Bit > 15) THEN
  103.                     BEGIN
  104.                          Dest^[Z] := HI(Command);
  105.                          Dest^[Z+1] := LO(Command);
  106.                          Z := Y;
  107.                          Bit := 0;
  108.                          INC(Y,2)
  109.                     END;
  110.                Size := 1;
  111.                WHILE ((Source^[X] = Source^[X+Size]) AND (Size < $FFF)
  112.                     AND (X+Size < SourceSize)) DO
  113.                          INC(Size);
  114.                IF (Size >= 16)
  115.                     THEN BEGIN
  116.                               Dest^[Y] := 0;
  117.                               Dest^[Y+1] := HI(Size-16);
  118.                               Dest^[Y+2] := LO(Size-16);
  119.                               Dest^[Y+3] := Source^[X];
  120.                               INC(Y,4);
  121.                               INC(X,Size);
  122.                               Command := (Command SHL 1) + 1
  123.                          END
  124.                     ELSE IF (GetMatch(Source,X,SourceSize,Hash,Size,Pos))
  125.                               THEN BEGIN
  126.                                         Key := ((X-Pos) SHL 4) + (Size-3);
  127.                                         Dest^[Y] := HI(Key);
  128.                                         Dest^[Y+1] := LO(Key);
  129.                                         INC(Y,2);
  130.                                         INC(X,Size);
  131.                                         Command := (Command SHL 1) + 1
  132.                                    END
  133.                               ELSE BEGIN
  134.                                         Dest^[Y] := Source^[X];
  135.                                         INC(Y);
  136.                                         INC(X);
  137.                                         Command := Command SHL 1
  138.                                    END;
  139.                INC(Bit);
  140.           END;
  141.      Command := Command SHL (16-Bit);
  142.      Dest^[Z] := HI(Command);
  143.      Dest^[Z+1] := LO(Command);
  144.      IF (Y > SourceSize) THEN
  145.           BEGIN
  146.                MOVE(Source^[0],Dest^[1],SourceSize);
  147.                Dest^[0] := FLAG_Copied;
  148.                Y := SUCC(SourceSize)
  149.           END;
  150.      Compression := Y
  151. END;
  152.  
  153. FUNCTION  Decompression  (    Source,Dest    : BufferPtr;
  154.                               SourceSize     : BufferSize   )    : BufferSize;
  155. VAR  X,Y,Pos                  : BufferIndex;
  156.      Command,Size,K           : WORD;
  157.      Bit                      : BYTE;
  158. BEGIN
  159.      IF (Source^[0] = FLAG_Copied)
  160.           THEN FOR Y := 1 TO PRED(SourceSize) DO
  161.                     Dest^[PRED(Y)] := Source^[Y]
  162.           ELSE BEGIN
  163.                     Y := 0;
  164.                     X := 3;
  165.                     Command := (Source^[1] SHL 8)+Source^[2];
  166.                     Bit := 16;
  167.                     WHILE (X < SourceSize) DO
  168.                          BEGIN
  169.                               IF (Bit = 0) THEN
  170.                                    BEGIN
  171.                                         Command := (Source^[X] SHL 8)
  172.                                                       +Source^[X+1];
  173.                                         Bit := 16;
  174.                                         INC(X,2)
  175.                                    END;
  176.                               IF ((Command AND $8000) = 0)
  177.                                    THEN BEGIN
  178.                                              Dest^[Y] := Source^[X];
  179.                                              INC(X);
  180.                                              INC(Y)
  181.                                         END
  182.                                    ELSE BEGIN
  183.                                              Pos := ((Source^[X] SHL 4)
  184.                                                     +(Source^[X+1] SHR 4));
  185.                                              IF (Pos = 0)
  186.                                                   THEN BEGIN
  187. {----------------------------------------------------------------}
  188.      Size := (Source^[X+1] SHL 8) + Source^[X+2] + 15;
  189.      FOR K := 0 TO Size DO
  190.           Dest^[Y+K] := Source^[X+3];
  191.      INC(X,4);
  192.      INC(Y,Size+1)
  193. {----------------------------------------------------------------}
  194.                                                        END
  195.                                                   ELSE BEGIN
  196. {----------------------------------------------------------------}
  197.      Size := (Source^[X+1] AND $0F)+2;
  198.      FOR K := 0 TO Size DO
  199.           Dest^[Y+K] := Dest^[Y-Pos+K];
  200.      INC(X,2);
  201.      INC(Y,Size+1)
  202. {----------------------------------------------------------------}
  203.                                                        END;
  204.                                         END;
  205.                               Command := Command SHL 1;
  206.                               DEC(Bit)
  207.                          END
  208.                END;
  209.      Decompression := Y
  210. END;
  211.  
  212. END.
  213.  
  214. -------------------------------------------------------------------
  215. End of File # 1
  216.  
  217.  
  218.  
  219. File #2 : A small demonstration
  220. -------------------------------
  221.  
  222.  
  223. {    ###################################################################   }
  224. {    ##                                                               ##   }
  225. {    ##      ##    ##### #####  ##   ##  ##      ## ##  ## ##  ##     ##   }
  226. {    ##      ##      ### ##  ## ## # ## ###     ##  ## ##  ##  ##     ##   }
  227. {    ##      ##     ###  #####  #######  ##    ##   ####   ######     ##   }
  228. {    ##      ##    ###   ##  ## ### ###  ##   ##    ## ##  ##  ##     ##   }
  229. {    ##      ##### ##### ##  ## ##   ## #### ##     ##  ## ##  ##     ##   }
  230. {    ##                                                               ##   }
  231. {    ##   EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM  ##   }
  232. {    ##                                                               ##   }
  233. {    ###################################################################   }
  234. {    ##                                                               ##   }
  235. {    ##   In an earlier posting I've already presented a 680x0        ##   }
  236. {    ##   assembler routine to implement optimized LZRW1 compression. ##   }
  237. {    ##   I've chosen then name LZRW1/KH for this optimized           ##   }
  238. {    ##   algoritm, to distinguish it from the original one.  The     ##   }
  239. {    ##   changes can be found in the maximum length for a match,     ##   }
  240. {    ##   which is 16 in the original algoritm, but 18 in the         ##   }
  241. {    ##   optimized one.  This is not a big change, but nevertheless  ##   }
  242. {    ##   it can increase the compression by 1/8.  Another thing      ##   }
  243. {    ##   I've tried to do is to make this program easy to            ##   }
  244. {    ##   understand.  Although I have some knowledge of C, I always  ##   }
  245. {    ##   find it difficult to understand someone else his programs.  ##   }
  246. {    ##   Especially if they depend on the SHORT CIRCUIT BOOLEAN      ##   }
  247. {    ##   evaluation of C : Test || Test || Test will only be         ##   }
  248. {    ##   executed fully if Test was TRUE the first three times ...   ##   }
  249. {    ##   Took me awhile to figure it out, although it seems quite    ##   }
  250. {    ##   natural now ... Enough of this, let's see some code ...     ##   }
  251. {    ##                                                               ##   }
  252. {    ##   Sorry, no list of people to thank this time ... It hasn't   ##   }
  253. {    ##   changed since my last posting.                              ##   }
  254. {    ##                                                Kurt Haenen    ##   }
  255. {    ##                                                               ##   }
  256. {    ###################################################################   }
  257.  
  258. PROGRAM   CompressionDemo(input,output);
  259.  
  260. USES LZRW1KH;
  261.  
  262. CONST     CompIdentifier : LONGINT      = (((((((ORD('L') SHL 8)+ORD('Z'))
  263.                                    SHL 8)+ORD('R')) SHL 8)+ORD('W')) SHL 8);
  264.  
  265. VAR  SRCFP,DSTFP         : FILE;
  266.      SRCBuf,DSTBuf       : ARRAY [0..16390] OF BYTE;
  267.      SRCSize,DSTSize     : WORD;
  268.      Tmp                 : WORD;
  269.      Identifier          : LONGINT;
  270.      InSize,OutSize      : LONGINT;
  271.  
  272. BEGIN
  273.      IF ((PARAMCOUNT <> 2) AND ((PARAMCOUNT <> 3) OR (PARAMSTR(1) <> '-D')))
  274.              THEN
  275.           BEGIN
  276.                WRITELN;
  277.                WRITELN('USAGE : COMPRESS [-D] <Source File> <Dest File>');
  278.                WRITELN('        (The -D option is case sensitive !)');
  279.                WRITELN;
  280.                HALT
  281.           END;
  282.      IF (PARAMCOUNT = 2)
  283.           THEN BEGIN
  284.                     WRITELN('TRYING TO COMPRESS ',PARAMSTR(1),' TO ',
  285.                                     PARAMSTR(2),'.');
  286.                     ASSIGN(SRCFP,PARAMSTR(1));
  287.                     ASSIGN(DSTFP,PARAMSTR(2));
  288.                     RESET(SRCFP,1);
  289.                     IF (IOResult <> 0) THEN
  290.                          BEGIN
  291.                               WRITELN;
  292.                               WRITELN('FILE ',PARAMSTR(1),' NOT FOUND !');
  293.                               WRITELN;
  294.                               HALT
  295.                          END;
  296.                     REWRITE(DSTFP,1);
  297.                     IF (IOResult <> 0) THEN
  298.                          BEGIN
  299.                               CLOSE(SRCFP);
  300.                               WRITELN;
  301.                               WRITELN('FILE ',PARAMSTR(2),' COULD NOT '+
  302.                                                    'BE OPENED !');
  303.                               WRITELN;
  304.                               HALT
  305.                          END;
  306.                     BLOCKWRITE(DSTFP,CompIdentifier,SIZEOF(LONGINT),Tmp);
  307.                     IF (Tmp <> SIZEOF(LONGINT)) THEN
  308.                          BEGIN
  309.                               CLOSE(SRCFP);
  310.                               CLOSE(DSTFP);
  311.                               ERASE(DSTFP);
  312.                               WRITELN;
  313.                               WRITELN('ERROR WRITING TO ',PARAMSTR(2),' !');
  314.                               WRITELN;
  315.                               HALT
  316.                          END;
  317.                     SRCSize := 16384;
  318.                     InSize := 0;
  319.                     OutSize := SIZEOF(LONGINT);
  320.                     WHILE (SRCSize = 16384) DO
  321.                          BEGIN
  322.                               BLOCKREAD(SRCFP,SRCBuf[0],16384,SRCSize);
  323.                               INC(InSize,SRCSize);
  324.                               WRITE('READ : ',InSize,'  WRITTEN : ',
  325.                                             OutSize,#13);
  326.                               DSTSize := Compression(ADDR(SRCBuf[0]),
  327.                                               ADDR(DSTBuf[0]),SRCSize);
  328.                               BLOCKWRITE(DSTFP,DSTSize,SIZEOF(WORD),Tmp);
  329.                               INC(OutSize,Tmp);
  330.                               WRITE('READ : ',InSize,'  WRITTEN : ',
  331.                                              OutSize,#13);
  332.                               IF (Tmp <> SIZEOF(WORD)) THEN
  333.                                    BEGIN
  334.                                         CLOSE(SRCFP);
  335.                                         CLOSE(DSTFP);
  336.                                         ERASE(DSTFP);
  337.                                         WRITELN;
  338.                                         WRITELN('ERROR WRITING TO ',
  339.                                                PARAMSTR(2),' !');
  340.                                         WRITELN;
  341.                                         HALT
  342.                                    END;
  343.                               BLOCKWRITE(DSTFP,DSTBuf[0],DSTSize,Tmp);
  344.                               INC(OutSize,Tmp);
  345.                               WRITE('READ : ',InSize,'  WRITTEN : ',
  346.                                    OutSize,#13);
  347.                               IF (Tmp <> DSTSize) THEN
  348.                                    BEGIN
  349.                                         CLOSE(SRCFP);
  350.                                         CLOSE(DSTFP);
  351.                                         ERASE(DSTFP);
  352.                                         WRITELN;
  353.                                         WRITELN('ERROR WRITING TO ',
  354.                                                    PARAMSTR(2),' !');
  355.                                         WRITELN;
  356.                                         HALT
  357.                                    END;
  358.                          END;
  359.                     WRITELN;
  360.                     WRITELN('FILE SUCCESFULLY COMPRESSED !');
  361.                     CLOSE(SRCFP);
  362.                     CLOSE(DSTFP)
  363.                END
  364.           ELSE BEGIN
  365.                     WRITELN('TRYING TO DECOMPRESS ',PARAMSTR(2),' TO ',
  366.                                        PARAMSTR(3),'.');
  367.                     ASSIGN(SRCFP,PARAMSTR(2));
  368.                     ASSIGN(DSTFP,PARAMSTR(3));
  369.                     RESET(SRCFP,1);
  370.                     IF (IOResult <> 0) THEN
  371.                          BEGIN
  372.                               WRITELN;
  373.                               WRITELN('FILE ',PARAMSTR(2),' NOT FOUND !');
  374.                               WRITELN;
  375.                               HALT
  376.                          END;
  377.                     REWRITE(DSTFP,1);
  378.                     IF (IOResult <> 0) THEN
  379.                          BEGIN
  380.                               CLOSE(SRCFP);
  381.                               WRITELN;
  382.                               WRITELN('FILE ',PARAMSTR(3),' COULD NOT '+
  383.                                                       'BE OPENED !');
  384.                               WRITELN;
  385.                               HALT
  386.                          END;
  387.                     BLOCKREAD(SRCFP,Identifier,SIZEOF(LONGINT),Tmp);
  388.                     IF (Tmp <> SIZEOF(LONGINT)) THEN
  389.                          BEGIN
  390.                               CLOSE(SRCFP);
  391.                               CLOSE(DSTFP);
  392.                               ERASE(DSTFP);
  393.                               WRITELN;
  394.                               WRITELN('ERROR READING FROM ',PARAMSTR(2),' !');
  395.                               WRITELN;
  396.                               HALT
  397.                          END;
  398.                     IF (Identifier <> CompIdentifier) THEN
  399.                          BEGIN
  400.                               CLOSE(SRCFP);
  401.                               CLOSE(DSTFP);
  402.                               ERASE(DSTFP);
  403.                               WRITELN;
  404.                               WRITELN('FILE ',PARAMSTR(2),
  405.                                             ' IS NOT A COMPRESSED FILE !');
  406.                               WRITELN;
  407.                               HALT
  408.                          END;
  409.                     DSTSize := 16384;
  410.                     InSize := SIZEOF(LONGINT);
  411.                     OutSize := 0;
  412.                     WHILE (DSTSize = 16384) DO
  413.                          BEGIN
  414.                               BLOCKREAD(SRCFP,SRCSize,SIZEOF(WORD),Tmp);
  415.                               IF (Tmp <> SIZEOF(WORD)) THEN
  416.                                    BEGIN
  417.                                         CLOSE(SRCFP);
  418.                                         CLOSE(DSTFP);
  419.                                         ERASE(DSTFP);
  420.                                         WRITELN;
  421.                                         WRITELN('ERROR READING FROM ',
  422.                                               PARAMSTR(2),' !');
  423.                                         WRITELN;
  424.                                         HALT
  425.                                    END;
  426.                               BLOCKREAD(SRCFP,SRCBuf[0],SRCSize,Tmp);
  427.                               INC(InSize,Tmp+SIZEOF(WORD));
  428.                               WRITE('READ : ',InSize,'  WRITTEN : ',
  429.                                         OutSize,#13);
  430.                               IF (Tmp <> SRCSize) THEN
  431.                                    BEGIN
  432.                                         CLOSE(SRCFP);
  433.                                         CLOSE(DSTFP);
  434.                                         ERASE(DSTFP);
  435.                                         WRITELN;
  436.                                         WRITELN('ERROR READING FROM ',
  437.                                                   PARAMSTR(2),' !');
  438.                                         WRITELN;
  439.                                         HALT
  440.                                    END;
  441.                               DSTSize := Decompression(ADDR(SRCBuf[0]),
  442.                                                ADDR(DstBuf[0]),SRCSize);
  443.                               BLOCKWRITE(DSTFP,DSTBuf[0],DSTSize,Tmp);
  444.                               INC(OutSize,Tmp);
  445.                               WRITE('READ : ',InSize,'  WRITTEN : ',
  446.                                             OutSize,#13);
  447.                               IF (Tmp <> DSTSize) THEN
  448.                                    BEGIN
  449.                                         CLOSE(SRCFP);
  450.                                         CLOSE(DSTFP);
  451.                                         ERASE(DSTFP);
  452.                                         WRITELN;
  453.                                         WRITELN('ERROR WRITING TO ',
  454.                                                    PARAMSTR(3),' !');
  455.                                         WRITELN;
  456.                                         HALT
  457.                                    END;
  458.                          END;
  459.                     WRITELN;
  460.                     WRITELN('FILE SUCCESFULLY DECOMPRESSED !');
  461.                     CLOSE(SRCFP);
  462.                     CLOSE(DSTFP)
  463.                END
  464. END.
  465.  
  466. -------------------------------------------------------------------
  467. End of file # 2
  468.  
  469.  
  470. Ok, that was the listing ...   I hope everything is ok.  Some of
  471. you may get some word-wraps etc. because of the long lines I used
  472. in the source.  I tried to make them as readable as possible, but
  473. sometime I had to include some backwards indented blocks, which
  474. are separated from the rest of the program by {----}.  I hope this
  475. still remains readable ... Hope to hear from you all soon ...
  476.  
  477.                                                     Kurt Haenen
  478.