home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Fred Fish Collection 1.5
/
ffcollection-1-5-1992-11.iso
/
ff_disks
/
400-499
/
ff434.lzh
/
Backup
/
compress.c
< prev
next >
Wrap
C/C++ Source or Header
|
1991-01-18
|
12KB
|
531 lines
/*
* COMPRESS.C
*
*/
#include "defs.h"
#define ngetchar() oreadchar()
#define nputchar(n) mputc(n)
#ifndef min
#define min(a,b) ((a>b) ? b : a)
#endif
#define BITS 13
#ifdef _DCC
#define HSIZE 9001
#else
#if BITS == 16
#define HSIZE 69001 /* 95% occupancy */
#endif
#if BITS == 15
#define HSIZE 35023 /* 94% occupancy */
#endif
#if BITS == 14
#define HSIZE 18013 /* 91% occupancy */
#endif
#if BITS == 13
#define HSIZE 9001 /* 91% occupancy */
#endif
#if BITS <= 12
#define HSIZE 5003 /* 80% occupancy */
#endif
#endif
typedef long code_int;
typedef long count_int;
typedef unsigned char char_type;
#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
#define INIT_BITS 9 /* initial number of bits/code */
int n_bits; /* number of bits/code */
int maxbits; /* user settable max # bits/code */
code_int maxcode; /* maximum code, given n_bits */
code_int maxmaxcode; /* should NEVER generate this code */
__far count_int htab[HSIZE];
__far uword codetab[HSIZE];
#define htabof(i) htab[i]
#define codetabof(i) codetab[i]
code_int hsize = HSIZE; /* for dynamic table sizing */
#define tab_prefixof(i) codetabof(i)
#define tab_suffixof(i) ((char_type *)(htab))[i]
#define de_stack ((char_type *)&tab_suffixof(1<<BITS))
code_int free_ent; /* first unused entry */
code_int getcode();
void output (code_int);
void cl_block(void);
void cl_hash(count_int);
#define CHECK_GAP 10000 /* ratio check interval */
int block_compress = 1;
int clear_flg;
long ratio;
count_int checkpoint;
/*
* the next two codes should not be changed lightly, as they must not
* lie within the contiguous general code space.
*/
#define FIRST 257 /* first free entry */
#define CLEAR 256 /* table clear output code */
static int offset;
long int in_count = 1; /* length of input */
/*
* Compress a file to memory-buffers and return TRUE if the compressed
* size is smaller than the actual size.
*/
long
CompressFile(name, fsize)
char *name;
long fsize;
{
long fcode;
code_int i = 0;
int c;
code_int ent;
int disp;
code_int hsize_reg;
int hshift;
{
register NODE *node;
for (node = (NODE *)CSList.mlh_Head; node->ln_Succ; node = node->ln_Succ) {
if (WildCmp(node->ln_Name, name)) {
if (Verbose)
printf(" Will not compress %s\n", name);
return(0);
}
}
}
if (WildCmp("*.Z", name) || WildCmp("*.ARC", name) || WildCmp("*.ZOO", name)) {
if (Verbose)
printf(" Will not compress %s\n", name);
return(0);
}
CLen = 0;
CWrite = GetHead(&CList);
if (CWrite == NULL)
CWrite = NewSComp();
CWrite->N = 0;
setmem(htab, sizeof(htab), 0);
setmem(codetab, sizeof(codetab), 0);
hsize = HSIZE;
if ( fsize < (1 << 12) )
hsize = min ( 5003, HSIZE );
else if ( fsize < (1 << 13) )
hsize = min ( 9001, HSIZE );
else if ( fsize < (1 << 14) )
hsize = min ( 18013, HSIZE );
else if ( fsize < (1 << 15) )
hsize = min ( 35023, HSIZE );
else if ( fsize < 47000 )
hsize = min ( 50021, HSIZE );
offset = clear_flg = ratio = 0;
in_count = 1;
checkpoint = CHECK_GAP;
n_bits = INIT_BITS; /* number of bits/code */
maxbits = BITS; /* user settable max # bits/code */
maxcode = MAXCODE(INIT_BITS); /* maximum code, given n_bits */
maxmaxcode = 1 << BITS; /* should NEVER generate this code */
free_ent = ((block_compress) ? FIRST : 256 );
ent = ngetchar();
hshift = 0;
for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L )
hshift++;
hshift = 8 - hshift; /* set hash code range bound */
hsize_reg = hsize;
cl_hash((count_int)hsize_reg); /* clear hash table */
/*printf("hshift %d hsr %d,%d\n", hshift, hsize_reg, hsize);*/
while ((c = ngetchar()) != EOF) {
in_count++;
fcode = (long) (((long) c << maxbits) + ent);
i = ((c << hshift) ^ ent); /* xor hashing */
/*printf("c %02x fcode %08lx i %d ent %08lx\n", c, fcode, i, ent);*/
if (i >= hsize_reg) {
/*printf("compress error A1 : %d on $%02x ent %lx\n", i, c, ent);*/
if (i >= HSIZE) {
puts("XXX1");
continue;
}
}
if (htabof (i) == fcode) {
ent = codetabof(i);
continue;
} else if ((long)htabof (i) < 0) /* empty slot */
goto nomatch;
disp = hsize_reg - i; /* secondary hash (after G. Knott) */
if (i == 0)
disp = 1;
probe:
if ((i -= disp) < 0)
i += hsize_reg;
if (i >= hsize_reg || i < 0) {
/*printf("compress error A2 : %d on $%02x\n", i, c);*/
if (i >= HSIZE) {
puts("XXX2");
continue;
}
}
if (htabof (i) == fcode) {
ent = codetabof(i);
continue;
}
if ((long)htabof (i) > 0)
goto probe;
nomatch:
output ((code_int) ent);
ent = c;
if (free_ent < maxmaxcode) {
codetabof(i) = free_ent++; /* code -> hashtable */
htabof(i) = fcode;
}
else if ((count_int)in_count >= checkpoint && block_compress)
cl_block ();
}
/*
* Put out the final code.
*/
output((code_int)ent);
output((code_int)-1);
return(CLen < fsize);
}
static char buf[BITS];
char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
void
output( code )
code_int code;
{
register int r_off = offset, bits= n_bits;
register char * bp = buf;
if ( code >= 0 ) {
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/*
* Since code is always >= 8 bits, only need to mask the first
* hunk on the left.
*/
*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
bp++;
bits -= (8 - r_off);
code >>= 8 - r_off;
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if ( bits >= 8 ) {
*bp++ = code;
code >>= 8;
bits -= 8;
}
/* Last bits. */
if(bits)
*bp = code;
offset += n_bits;
if (offset == (n_bits << 3)) {
bp = buf;
bits = n_bits;
mwrite(bp, bits);
bp += bits;
bits = 0;
offset = 0;
}
/*
* If the next entry is going to be too big for the code size,
* then increase it, if possible.
*/
if (free_ent > maxcode || (clear_flg > 0)) {
/*
* Write the whole buffer, because the input side won't
* discover the size increase until after it has read it.
*/
if (offset > 0)
mwrite(buf, n_bits);
offset = 0;
if (clear_flg) {
n_bits = INIT_BITS;
maxcode = MAXCODE(INIT_BITS);
clear_flg = 0;
} else {
n_bits++;
if (n_bits == maxbits)
maxcode = maxmaxcode;
else
maxcode = MAXCODE(n_bits);
}
}
} else {
/*
* At EOF, write the rest of the buffer.
*/
if (offset > 0)
mwrite(buf, (offset + 7) / 8);
offset = 0;
}
}
char *
xrindex(s, c) /* For those who don't have it in libc.a */
register char *s, c;
{
char *p;
for (p = NULL; *s; s++) {
if (*s == c)
p = s;
}
return(p);
}
void
cl_block() /* table clear for block compress */
{
register long int rat;
checkpoint = in_count + CHECK_GAP;
if (in_count > 0x007fffff) { /* shift will overflow */
rat = CLen >> 8;
if (rat == 0) { /* Don't divide by zero */
rat = 0x7fffffff;
} else {
rat = in_count / rat;
}
} else {
rat = (in_count << 8) / CLen; /* 8 fractional bits */
}
if (rat > ratio) {
ratio = rat;
} else {
ratio = 0;
cl_hash ( (count_int) hsize );
free_ent = FIRST;
clear_flg = 1;
output ( (code_int) CLEAR );
}
}
void
cl_hash(hsize) /* reset code table */
register count_int hsize;
{
register count_int *htab_p = htab+hsize;
register long i;
register long m1 = -1;
i = hsize - 16;
do { /* might use Sys V memset(3) here */
*(htab_p-16) = m1;
*(htab_p-15) = m1;
*(htab_p-14) = m1;
*(htab_p-13) = m1;
*(htab_p-12) = m1;
*(htab_p-11) = m1;
*(htab_p-10) = m1;
*(htab_p-9) = m1;
*(htab_p-8) = m1;
*(htab_p-7) = m1;
*(htab_p-6) = m1;
*(htab_p-5) = m1;
*(htab_p-4) = m1;
*(htab_p-3) = m1;
*(htab_p-2) = m1;
*(htab_p-1) = m1;
htab_p -= 16;
} while ((i -= 16) >= 0);
for ( i += 16; i > 0; i-- )
*--htab_p = m1;
}
void
UnCompressFile(insize)
long insize;
{
register char_type *stackp;
register int finchar;
register code_int code, oldcode, incode;
/*
* As above, initialize the first 256 entries in the table.
*/
setmem(htab, sizeof(htab), 0);
setmem(codetab, sizeof(codetab), 0);
offset = clear_flg = ratio = 0;
in_count = 1;
checkpoint = CHECK_GAP;
n_bits = INIT_BITS; /* number of bits/code */
maxbits = BITS; /* user settable max # bits/code */
maxcode = MAXCODE(INIT_BITS); /* maximum code, given n_bits */
maxmaxcode = 1 << BITS; /* should NEVER generate this code */
for ( code = 255; code >= 0; code-- ) {
tab_prefixof(code) = 0;
tab_suffixof(code) = (char_type)code;
}
free_ent = ((block_compress) ? FIRST : 256 );
finchar = oldcode = getcode();
if (oldcode == -1) /* EOF already? */
return; /* Get out of here */
oputc((char)finchar); /* first code must be 8 bits = char */
stackp = de_stack;
while ((code = getcode()) > -1) {
if ((code == CLEAR) && block_compress) {
for (code = 255; code >= 0; code--)
tab_prefixof(code) = 0;
clear_flg = 1;
free_ent = FIRST - 1;
if ((code = getcode()) == -1) /* O, untimely death! */
break;
}
incode = code;
/*
* Special case for KwKwK string.
*/
if (code >= free_ent) {
*stackp++ = finchar;
code = oldcode;
}
/*
* Generate output characters in reverse order
*/
while ( code >= 256 ) {
*stackp++ = tab_suffixof(code);
code = tab_prefixof(code);
}
*stackp++ = finchar = tab_suffixof(code);
/*
* And put them out in forward order
*/
do
oputc (*--stackp);
while (stackp > de_stack);
/*
* Generate the new entry.
*/
if ((code=free_ent) < maxmaxcode) {
tab_prefixof(code) = (unsigned short)oldcode;
tab_suffixof(code) = finchar;
free_ent = code+1;
}
/*
* Remember previous code.
*/
oldcode = incode;
}
}
code_int
getcode()
{
/*
* On the VAX, it is important to have the register declarations
* in exactly the order given, or the asm will break.
*/
register code_int code;
static int offset = 0, size = 0;
static char_type buf[BITS];
register int r_off, bits;
register char_type *bp = buf;
if (clear_flg > 0 || offset >= size || free_ent > maxcode) {
/*
* If the next entry will be too big for the current code
* size, then we must increase the size. This implies reading
* a new buffer full, too.
*/
if ( free_ent > maxcode ) {
n_bits++;
if ( n_bits == maxbits )
maxcode = maxmaxcode; /* won't get any bigger now */
else
maxcode = MAXCODE(n_bits);
}
if ( clear_flg > 0) {
maxcode = MAXCODE (n_bits = INIT_BITS);
clear_flg = 0;
}
size = oread(buf, n_bits);
if (size <= 0)
return -1; /* end of file */
offset = 0;
size = (size << 3) - (n_bits - 1);
}
r_off = offset;
bits = n_bits;
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/* Get first part (low order bits) */
code = (*bp++ >> r_off);
bits -= (8 - r_off);
r_off = 8 - r_off; /* now, offset into code word */
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if ( bits >= 8 ) {
code |= *bp++ << r_off;
r_off += 8;
bits -= 8;
}
/* high order bits. */
code |= (*bp & rmask[bits]) << r_off;
offset += n_bits;
return code;
}