GEMini Atari

home *** CD-ROM | disk | FTP | other *** search

/ GEMini Atari / GEMini_Atari_CD-ROM_Walnut_Creek_December_1993.iso / files / archiver / arc / arcsqs.c < prev next >

Wrap

C/C++ Source or Header | 1993-07-07 | 12KB | 469 lines

/* * $Header: arcsqs.c,v 1.3 88/07/31 18:54:14 hyc Exp $ */ /* ARC - Archive utility - SQUASH (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED This is a quick hack to ARCLZW to make it handle squashed archives. Dan Lanciani (ddl@harvard.*) July 87 */ /* * $Header: arcsqs.c,v 1.3 88/07/31 18:54:14 hyc Exp $ */ #include <stdio.h> #include "arc.h" #if MSDOS char *setmem(); #else #ifndef __STDC__ char *memset(); #endif #endif int getc_unp(); void putc_pak(), putc_unp(); static void putcode(); /* definitions for the new dynamic Lempel-Zev crunching */ #define BITS 13 /* maximum bits per code */ #define HSIZE 10007 /* 80% occupancy */ #define INIT_BITS 9 /* initial number of bits/code */ static int n_bits; /* number of bits/code */ static int maxcode; /* maximum code, given n_bits */ #define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */ static int maxcodemax = 1 << BITS; /* largest possible code (+1) */ static unsigned char buf[BITS]; /* input/output buffer */ static unsigned char lmask[9] = /* left side masks */ {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; static unsigned char rmask[9] = /* right side masks */ {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; static int offset; /* byte offset for code output */ static long in_count; /* length of input */ static long bytes_out; /* length of compressed output */ static unsigned short ent; long htab[HSIZE]; /* hash code table (crunch) */ unsigned short codetab[HSIZE]; /* string code table (crunch) */ static unsigned short *prefix = codetab; /* prefix code table (uncrunch) */ static unsigned char *suffix=(unsigned char *)htab; /* suffix table (uncrunch) */ static int free_ent; /* first unused entry */ static int firstcmp; /* true at start of compression */ unsigned char stack[HSIZE]; /* local push/pop stack */ /* * block compression parameters -- after all codes are used up, * and compression rate changes, start over. */ static int clear_flg; static long ratio; #define CHECK_GAP 10000 /* ratio check interval */ static long checkpoint; /* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */ #define FIRST 257 /* first free entry */ #define CLEAR 256 /* table clear output code */ static void cl_block(t) /* table clear for block compress */ FILE *t; /* our output file */ { long rat; checkpoint = in_count + CHECK_GAP; if (in_count > 0x007fffffL) { /* shift will overflow */ rat = bytes_out >> 8; if (rat == 0) /* Don't divide by zero */ rat = 0x7fffffffL; else rat = in_count / rat; } else rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ if (rat > ratio) ratio = rat; else { ratio = 0; setmem(htab, HSIZE * sizeof(long), 0xff); free_ent = FIRST; clear_flg = 1; putcode(CLEAR, t); } } /***************************************************************** * * Output a given code. * Inputs: * code: A n_bits-bit integer. If == -1, then EOF. This assumes * that n_bits =< (long)wordsize - 1. * Outputs: * Outputs code to the file. * Assumptions: * Chars are 8 bits long. * Algorithm: * Maintain a BITS character long buffer (so that 8 codes will * fit in it exactly). When the buffer fills up empty it and start over. */ static void putcode(code, t) /* output a code */ int code; /* code to output */ FILE *t; /* where to put it */ { int r_off = offset; /* right offset */ int bits = n_bits; /* bits to go */ unsigned char *bp = buf; /* buffer pointer */ int n; /* index */ register int ztmp; if (code >= 0) { /* if a real code *//* Get to the first byte. */ bp += (r_off >> 3); r_off &= 7; /* * Since code is always >= 8 bits, only need to mask the * first hunk on the left. */ ztmp = (code << r_off) & lmask[r_off]; *bp = (*bp & rmask[r_off]) | ztmp; bp++; bits -= (8 - r_off); code >>= (8 - r_off); /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if (bits >= 8) { *bp++ = code; code >>= 8; bits -= 8; } /* Last bits. */ if (bits) *bp = code; offset += n_bits; if (offset == (n_bits << 3)) { bp = buf; bits = n_bits; bytes_out += bits; do putc_pak(*bp++, t); while (--bits); offset = 0; } /* * If the next entry is going to be too big for the code * size, then increase it, if possible. */ if (free_ent > maxcode || clear_flg > 0) { /* Write the whole * buffer, because the * input side won't * discover the size * increase until after * it has read it. */ if (offset > 0) { bp = buf; /* reset pointer for writing */ bytes_out += n = n_bits; while (n--) putc_pak(*bp++, t); } offset = 0; if (clear_flg) { /* reset if clearing */ maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } else {/* else use more bits */ n_bits++; if (n_bits == BITS) maxcode = maxcodemax; else maxcode = MAXCODE(n_bits); } } } else { /* dump the buffer on EOF */ bytes_out += n = (offset + 7) / 8; if (offset > 0) while (n--) putc_pak(*bp++, t); offset = 0; } } /***************************************************************** * * Read one code from the standard input. If EOF, return -1. * Inputs: * cmpin * Outputs: * code or -1 is returned. */ static int getcode(f) /* get a code */ FILE *f; /* file to get from */ { int code; static int offset = 0, size = 0; int r_off, bits; unsigned char *bp = buf; if (clear_flg > 0 || offset >= size || free_ent > maxcode) { /* If the next entry will be too big for the current code * size, then we must increase the size. This implies reading * a new buffer full, too. */ if (free_ent > maxcode) { n_bits++; if (n_bits == BITS) maxcode = maxcodemax; /* won't get any bigger * now */ else maxcode = MAXCODE(n_bits); } if (clear_flg > 0) { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } for (size = 0; size < n_bits; size++) { if ((code = getc_unp(f)) == EOF) break; else buf[size] = code; } if (size <= 0) return -1; /* end of file */ offset = 0; /* Round size down to integral number of codes */ size = (size << 3) - (n_bits - 1); } r_off = offset; bits = n_bits; /* * Get to the first byte. */ bp += (r_off >> 3); r_off &= 7; /* Get first part (low order bits) */ code = (*bp++ >> r_off); bits -= 8 - r_off; r_off = 8 - r_off; /* now, offset into code word */ /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if (bits >= 8) { code |= *bp++ << r_off; r_off += 8; bits -= 8; } /* high order bits. */ code |= (*bp & rmask[bits]) << r_off; offset += n_bits; return code; } /* * compress a file * * Algorithm: use open addressing double hashing (no chaining) on the * prefix code / next character combination. We do a variant of Knuth's * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime * secondary probe. Here, the modular division first probe is gives way * to a faster exclusive-or manipulation. Also do block compression with * an adaptive reset, where the code table is cleared when the compression * ratio decreases, but after the table fills. The variable-length output * codes are re-sized at this point, and a special CLEAR code is generated * for the decompressor. */ void sqinit_cm() /* initialize for compression */ { offset = 0; bytes_out = 0; clear_flg = 0; ratio = 0; in_count = 1; checkpoint = CHECK_GAP; maxcode = MAXCODE(n_bits = INIT_BITS); free_ent = FIRST; setmem(htab, HSIZE * sizeof(long), 0xff); n_bits = INIT_BITS; /* set starting code size */ firstcmp = 1; /* next byte will be first */ } void sqputc_cm(c, t) /* compress a character */ unsigned char c; /* character to compress */ FILE *t; /* where to put it */ { static long fcode; static int hshift; int i; int disp; if (firstcmp) { /* special case for first byte */ ent = c; /* remember first byte */ hshift = 0; for (fcode = (long) HSIZE; fcode < 65536L; fcode *= 2L) hshift++; hshift = 8 - hshift; /* set hash code range bund */ firstcmp = 0; /* no longer first */ return; } in_count++; fcode = (long) (((long) c << BITS) + ent); i = (c << hshift) ^ ent;/* xor hashing */ if (htab[i] == fcode) { ent = codetab[i]; return; } else if (htab[i] < 0) /* empty slot */ goto nomatch; disp = HSIZE - i; /* secondary hash (after G.Knott) */ if (i == 0) disp = 1; probe: if ((i -= disp) < 0) i += HSIZE; if (htab[i] == fcode) { ent = codetab[i]; return; } if (htab[i] > 0) goto probe; nomatch: putcode(ent, t); ent = c; if (free_ent < maxcodemax) { codetab[i] = free_ent++; /* code -> hashtable */ htab[i] = fcode; } else if ((long) in_count >= checkpoint) cl_block(t); } long sqpred_cm(t) /* finish compressing a file */ FILE *t; /* where to put it */ { putcode(ent, t); /* put out the final code */ putcode(-1, t); /* tell output we are done */ return bytes_out; /* say how big it got */ } /* * Decompress a file. This routine adapts to the codes in the file * building the string table on-the-fly; requiring no table to be stored * in the compressed file. The tables used herein are shared with those of * the compress() routine. See the definitions above. */ void sqdecomp(f, t) /* decompress a file */ FILE *f; /* file to read codes from */ FILE *t; /* file to write text to */ { unsigned char *stackp; int finchar; int code, oldcode, incode; n_bits = INIT_BITS; /* set starting code size */ clear_flg = 0; /* * As above, initialize the first 256 entries in the table. */ maxcode = MAXCODE(n_bits = INIT_BITS); for (code = 255; code >= 0; code--) { prefix[code] = 0; suffix[code] = (unsigned char) code; } free_ent = FIRST; finchar = oldcode = getcode(f); if (oldcode == -1) /* EOF already? */ return; /* Get out of here */ putc_unp((char) finchar, t); /* first code must be 8 bits=char */ stackp = stack; while ((code = getcode(f)) > -1) { if (code == CLEAR) { for (code = 255; code >= 0; code--) prefix[code] = 0; clear_flg = 1; free_ent = FIRST - 1; if ((code = getcode(f)) == -1) /* O, untimely death! */ break; } incode = code; /* * Special case for KwKwK string. */ if (code >= free_ent) { if (code > free_ent) { if (warn) { printf("Corrupted compressed file.\n"); printf("Invalid code %d when max is %d.\n", code, free_ent); } nerrs++; return; } *stackp++ = finchar; code = oldcode; } /* * Generate output characters in reverse order */ while (code >= 256) { *stackp++ = suffix[code]; code = prefix[code]; } *stackp++ = finchar = suffix[code]; /* * And put them out in forward order */ do putc_unp(*--stackp, t); while (stackp > stack); /* * Generate the new entry. */ if ((code = free_ent) < maxcodemax) { prefix[code] = (unsigned short) oldcode; suffix[code] = finchar; free_ent = code + 1; } /* * Remember previous code. */ oldcode = incode; } }