home *** CD-ROM | disk | FTP | other *** search
- From: ddl@husc6.UUCP (Dan Lanciani)
- Newsgroups: comp.sources.misc
- Subject: arc that UnSquashes 2/2
- Message-ID: <2934@ncoast.UUCP>
- Date: 18 Jul 87 00:39:22 GMT
- Sender: allbery@ncoast.UUCP
- Organization: Harvard University Computer Services
- Lines: 685
- Approved: allbery@ncoast.UUCP
- X-Archive: comp.sources.misc/8707/55
-
- -----------continue here------------
-
- static unsigned INT eolist(index) /* find last duplicate */
- unsigned INT index;
- {
- INT temp;
-
- while(temp=string_tab[index].next) /* while more duplicates */
- index = temp;
-
- return index;
- }
-
- /* The hash() routine is used to find a spot in the hash table for a new
- entry. It performs a "hash and linear probe" lookup, using h() to
- calculate the starting hash value and eolist() to perform the linear
- probe. This routine DOES NOT detect a table full condition. That
- MUST be checked for elsewhere.
- */
-
- static unsigned INT hash(pred,foll) /* find spot in the string table */
- unsigned INT pred; /* code for preceeding string */
- unsigned char foll; /* char following string */
- {
- unsigned INT local, tempnext; /* scratch storage */
- struct entry *ep; /* allows faster table handling */
-
- local = (*h)(pred,foll); /* get initial hash value */
-
- if(!string_tab[local].used) /* if that spot is free */
- return local; /* then that's all we need */
-
- else /* else a collision has occured */
- { local = eolist(local); /* move to last duplicate */
-
- /* We must find an empty spot. We start looking 101 places
- down the table from the last duplicate.
- */
-
- tempnext = (local+101) & 0x0FFF;
- ep = &string_tab[tempnext]; /* initialize pointer */
-
- while(ep->used) /* while empty spot not found */
- { if(++tempnext==TABSIZE) /* if we are at the end */
- { tempnext = 0; /* wrap to beginning of table*/
- ep = string_tab;
- }
- else ++ep; /* point to next element in table */
- }
-
- /* local still has the pointer to the last duplicate, while
- tempnext has the pointer to the spot we found. We use
- this to maintain the chain of pointers to duplicates.
- */
-
- string_tab[local].next = tempnext;
-
- return tempnext;
- }
- }
-
- /* The unhash() function is used to search the hash table for a given key.
- Like hash(), it performs a hash and linear probe search. It returns
- either the number of the entry (if found) or NOT_FND (if not found).
- */
-
- static unsigned INT unhash(pred,foll) /* search string table for a key */
- unsigned INT pred; /* code of preceeding string */
- unsigned char foll; /* character following string */
- {
- unsigned INT local, offset; /* scratch storage */
- struct entry *ep; /* this speeds up access */
-
- local = (*h)(pred,foll); /* initial hash */
-
- while(1)
- { ep = &string_tab[local]; /* speed up table access */
-
- if((ep->predecessor==pred) && (ep->follower==foll))
- return local; /* we have a match */
-
- if(!ep->next) /* if no more duplicates */
- return NOT_FND; /* then key is not listed */
-
- local = ep->next; /* move on to next duplicate */
- }
- }
-
- /* The init_tab() routine is used to initialize our hash table.
- You realize, of course, that "initialize" is a complete misnomer.
- */
-
- static INT init_tab() /* set ground state in hash table */
- {
- unsigned INT i; /* table index */
- INT upd_tab();
-
- setmem((char *)string_tab,sizeof(string_tab),0);
-
- for(i=0; i<256; i++) /* list all single byte strings */
- upd_tab(NO_PRED,i);
-
- inbuf = EMPTY; /* nothing is in our buffer */
- }
-
- /* The upd_tab routine is used to add a new entry to the string table.
- As previously stated, no checks are made to ensure that the table
- has any room. This must be done elsewhere.
- */
-
- INT upd_tab(pred,foll) /* add an entry to the table */
- unsigned INT pred; /* code for preceeding string */
- unsigned INT foll; /* character which follows string */
- {
- struct entry *ep; /* pointer to current entry */
-
- /* calculate offset just once */
-
- ep = &string_tab[hash(pred,foll)];
-
- ep->used = TRUE; /* this spot is now in use */
- ep->next = 0; /* no duplicates after this yet */
- ep->predecessor = pred; /* note code of preceeding string */
- ep->follower = foll; /* note char after string */
- }
-
- /* This algorithm encoded a file into twelve bit strings (three nybbles).
- The gocode() routine is used to read these strings a byte (or two)
- at a time.
- */
-
- static INT gocode(fd) /* read in a twelve bit code */
- FILE *fd; /* file to get code from */
- {
- unsigned INT localbuf, returnval;
-
- if(inbuf==EMPTY) /* if on a code boundary */
- { if((localbuf=getc_unp(fd))==EOF) /* get start of next code */
- return EOF; /* pass back end of file status */
- localbuf &= 0xFF; /* mask down to true byte value */
- if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */
- return EOF; /* this should never happen */
- inbuf &= 0xFF; /* mask down to true byte value */
-
- returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F);
- inbuf &= 0x000F; /* leave partial code pending */
- }
-
- else /* buffer contains first nybble */
- { if((localbuf=getc_unp(fd))==EOF)
- return EOF;
- localbuf &= 0xFF;
-
- returnval = localbuf + ((inbuf<<8)&0xF00);
- inbuf = EMPTY; /* note no hanging nybbles */
- }
- return returnval; /* pass back assembled code */
- }
-
- static INT push(c) /* push char onto stack */
- INT c; /* character to push */
- {
- stack[sp] = ((char) c); /* coerce integer into a char */
-
- if(++sp >= TABSIZE)
- abort("Stack overflow\n");
- }
-
- static INT pop() /* pop character from stack */
- {
- if(sp>0)
- return ((INT) stack[--sp]); /* leave ptr at next empty slot */
-
- else return EMPTY;
- }
-
- /***** LEMPEL-ZEV DECOMPRESSION *****/
-
- static INT code_count; /* needed to detect table full */
- static unsigned INT code; /* where we are so far */
- static INT firstc; /* true only on first character */
-
- INT init_ucr(new) /* get set for uncrunching */
- INT new; /* true to use new hash function */
- {
- if(new) /* set proper hash function */
- h = newh;
- else h = oldh;
-
- sp = 0; /* clear out the stack */
- init_tab(); /* set up atomic code definitions */
- code_count = TABSIZE - 256; /* note space left in table */
- firstc = 1; /* true only on first code */
- }
-
- INT getc_ucr(f) /* get next uncrunched byte */
- FILE *f; /* file containing crunched data */
- {
- unsigned INT c; /* a character of input */
- INT code, newcode;
- static INT oldcode, finchar;
- struct entry *ep; /* allows faster table handling */
-
- if(firstc) /* first code is always known */
- { firstc = FALSE; /* but next will not be first */
- oldcode = gocode(f);
- return finchar = string_tab[oldcode].follower;
- }
-
- if(!sp) /* if stack is empty */
- { if((code=newcode=gocode(f))==EOF)
- return EOF;
-
- ep = &string_tab[code]; /* initialize pointer */
-
- if(!ep->used) /* if code isn't known */
- { code = oldcode;
- ep = &string_tab[code]; /* re-initialize pointer */
- push(finchar);
- }
-
- while(ep->predecessor!=NO_PRED)
- { push(ep->follower); /* decode string backwards */
- code = ep->predecessor;
- ep = &string_tab[code];
- }
-
- push(finchar=ep->follower); /* save first character also */
-
- /* The above loop will terminate, one way or another,
- with string_tab[code].follower equal to the first
- character in the string.
- */
-
- if(code_count) /* if room left in string table */
- { upd_tab(oldcode,finchar);
- --code_count;
- }
-
- oldcode = newcode;
- }
-
- return pop(); /* return saved character */
- }
- SHAR_EOF
- fi # end of overwriting check
- if test -f 'arcm.h'
- then
- echo shar: will not over-write existing file "'arcm.h'"
- else
- cat << \SHAR_EOF > 'arcm.h'
- /*
- * $Header: arcm.h,v 1.2 86/07/15 07:53:40 turner Exp $
- */
-
- /*
- * $Log: arcm.h,v $
- * Revision 1.2 86/07/15 07:53:40 turner
- *
- * Revision 1.1 86/06/26 15:01:25 turner
- * initial version
- *
- */
-
- /*
- *
- * ARC archive utility - standard MACRO insert file.
- *
- * parameters:
- *
- */
-
- #define ARCMARK 26 /* special archive marker */
- #define ARCVER 9 /* archive header version code */
- #define STRLEN 100 /* system standard string length */
- #define FNLEN 13 /* file name length */
- #define MAXARG 25 /* maximum number of arguments */
-
- #define ARC 1
- #define XARC 0
- SHAR_EOF
- fi # end of overwriting check
- if test -f 'arcmatch.c'
- then
- echo shar: will not over-write existing file "'arcmatch.c'"
- else
- cat << \SHAR_EOF > 'arcmatch.c'
- static char *RCSid = "$Header: arcmatch.c,v 1.2 86/07/15 07:53:42 turner Exp $";
-
- /*
- * $Log: arcmatch.c,v $
- * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp
- * Bludgeoned into submission for VAX 11/780 BSD4.2
- * (ugly code, but fewer core dumps)
- *
- * Revision 1.2 86/07/15 07:53:42 turner
- *
- * Revision 1.1 86/06/26 15:00:34 turner
- * initial version
- *
- */
-
- /* ARC - Archive utility - ARCMATCH
-
- $define(tag,$$segment(@1,$$index(@1,=)+1))#
- $define(version,Version $tag(
- TED_VERSION DB =2.17), created on $tag(
- TED_DATE DB =12/17/85) at $tag(
- TED_TIME DB =20:32:18))#
- $undefine(tag)#
- $version
-
- (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
-
- By: Thom Henderson
-
- Description:
- This file contains service routines needed to maintain an archive.
-
- Language:
- Computer Innovations Optimizing C86
- */
- #include <stdio.h>
- #include "arc.h"
-
- INT match(n,t) /* test name against template */
- char *n; /* name to test */
- char *t; /* template to test against */
- {
-
- #if MSDOS
- upper(n); upper(t); /* avoid case problems */
- #endif
-
- /* first match name part */
-
- while((*n && *n!='.') || (*t && *t!='.'))
- { if(*n!=*t && *t!='?') /* match fail? */
- { if(*t!='*') /* wildcard fail? */
- return 0; /* then no match */
- else /* else jump over wildcard */
- { while(*n && *n!='.')
- n++;
- while(*t && *t!='.')
- t++;
- break; /* name part matches wildcard */
- }
- }
- else /* match good for this char */
- { n++; /* advance to next char */
- t++;
- }
- }
-
- if(*n && *n=='.') n++; /* skip extension delimiters */
- if(*t && *t=='.') t++;
-
- /* now match name part */
-
- while(*n || *t)
- { if(*n!=*t && *t!='?') /* match fail? */
- { if(*t!='*') /* wildcard fail? */
- return 0; /* then no match */
- else return 1; /* else good enough */
- }
- else /* match good for this char */
- { n++; /* advance to next char */
- t++;
- }
- }
-
- return 1; /* match worked */
- }
-
- INT rempath(nargs,arg) /* remove paths from filenames */
- INT nargs; /* number of names */
- char *arg[]; /* pointers to names */
- {
- char *i, *rindex(); /* string index, reverse indexer */
- INT n; /* index */
-
- for(n=0; n<nargs; n++) /* for each supplied name */
- { if(!(i=rindex(arg[n],'\\'))) /* search for end of path */
- if(!(i=rindex(arg[n],'/')))
- i=rindex(arg[n],':');
- if(i) /* if path was found */
- arg[n] = i+1; /* then skip it */
- }
- }
- SHAR_EOF
- fi # end of overwriting check
- if test -f 'arcmisc.c'
- then
- echo shar: will not over-write existing file "'arcmisc.c'"
- else
- cat << \SHAR_EOF > 'arcmisc.c'
- #include <stdio.h>
- #include "arc.h"
-
- /* split up a file name (subroutine for makefnam)
- *
- * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp
- * Bludgeoned into submission for VAX 11/780 BSD4.2
- * (ugly code, but fewer core dumps)
- */
-
- static INT _makefn(source,dest)
- unsigned char *source;
- unsigned char *dest;
- {
- INT j;
-
- setmem (dest, 17, 0); /* clear result field */
- if (strlen (source) > 1 && source[1] == ':')
- for (j = 0; j < 2;)
- dest[j++] = *source++;
- for (j = 3; *source && *source != '.'; ++source)
- if (j < 11)
- dest[j++] = *source;
- for (j = 12; *source; ++source)
- if (j < 16)
- dest[j++] = *source;
- }
- /* make a file name using a template
- */
-
- char *makefnam(rawfn,template,result)
- unsigned char *rawfn; /* the original file name */
- unsigned char *template; /* the template data */
- unsigned char *result; /* where to place the result */
- {
- unsigned char et[17],er[17];
-
- _makefn(template,et);
- _makefn(rawfn,er);
- *result=0; /* assure no data */
- strcat(result,er[0]?er:et);
- strcat(result,er[3]?er+3:et+3);
- strcat(result,er[12]?er+12:et+12);
- return ((char *)&result[0]);
- }
-
- INT freedir(dirs)
- register struct direct **dirs;
- {
- register INT ii;
-
- if(dirs == (struct direct **)0)
- return(-1);
- for(ii = 0; dirs[ii] != (struct direct *)0; ii++)
- free(dirs[ii]);
- free(dirs);
- return(0);
- }
-
- #if MSDOS
- #include <dir.h>
-
- INT alphasort(dirptr1, dirptr2)
- struct direct **dirptr1, **dirptr2;
- {
- return(strcmp((*dirptr1)->d_name, (*dirptr2)->d_name));
- }
-
- #endif
- SHAR_EOF
- fi # end of overwriting check
- if test -f 'arcpack.c'
- then
- echo shar: will not over-write existing file "'arcpack.c'"
- else
- cat << \SHAR_EOF > 'arcpack.c'
- static char *RCSid = "$Header: arcpack.c,v 1.2 86/07/15 07:53:48 turner Exp $";
-
- /*
- * $Log: arcpack.c,v $
- * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp
- * Bludgeoned into submission for VAX 11/780 BSD4.2
- * (ugly code, but fewer core dumps)
- *
- * Revision 1.2 86/07/15 07:53:48 turner
- *
- * Revision 1.1 86/06/26 15:00:37 turner
- * initial version
- *
- */
-
- /* ARC - Archive utility - ARCPACK
-
- $define(tag,$$segment(@1,$$index(@1,=)+1))#
- $define(version,Version $tag(
- TED_VERSION DB =3.37), created on $tag(
- TED_DATE DB =02/03/86) at $tag(
- TED_TIME DB =22:58:01))#
- $undefine(tag)#
- $version
-
- (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
-
- By: Thom Henderson
-
- Description:
- This file contains the routines used to compress a file
- when placing it in an archive.
-
- Language:
- Computer Innovations Optimizing C86
- */
- #include <stdio.h>
- #include "arc.h"
-
- /* stuff for non-repeat packing */
-
- #define DLE 0x90 /* repeat sequence marker */
-
- static unsigned char state; /* current packing state */
-
- /* non-repeat packing states */
-
- #define NOHIST 0 /* don't consider previous input*/
- #define SENTCHAR 1 /* lastchar set, no lookahead yet */
- #define SENDNEWC 2 /* run over, send new char next */
- #define SENDCNT 3 /* newchar set, send count next */
-
- /* packing results */
-
- static long stdlen; /* length for standard packing */
- static INT crcval; /* CRC check value */
-
- INT pack(f,t,hdr) /* pack file into an archive */
- FILE *f, *t; /* source, destination */
- struct heads *hdr; /* pointer to header data */
- {
- INT c; /* one character of stream */
- long ncrlen; /* length after packing */
- long huflen; /* length after squeezing */
- long lzwlen; /* length after crunching */
- long pred_sq(), file_sq(); /* stuff for squeezing */
- long pred_cm(); /* dynamic crunching cleanup */
- char tnam[STRLEN]; /* temporary name buffer */
- char *makefnam(); /* filename fixer upper */
- FILE *crn = NULL; /* temporary crunch file */
- INT getch();
- INT getc_ncr();
- INT putc_pak();
-
- /* first pass - see which method is best */
-
- if(!nocomp) /* if storage kludge not active */
- { if(note)
- { printf(" analyzing, "); fflush(stdout);}
-
- if(arctemp) /* use temp area if specified */
- sprintf(tnam,"%s.CRN",arctemp);
- else makefnam("$ARCTEMP.CRN",arcname,tnam);
- #if MSDOS
- crn = fopen(tnam,"wrb");
- #endif
- #if BSD | ST
- crn = fopen(tnam,"w+");
- #endif
- state = NOHIST; /* initialize ncr packing */
- stdlen = ncrlen = 0; /* reset size counters */
- crcval = 0; /* initialize CRC check value */
- setcode(); /* initialize encryption */
-
- init_cm(f,crn); /* initialize for crunching */
- init_sq(); /* initialize for squeeze scan */
-
- while((c=getc_ncr(f))!=EOF) /* for each byte of file */
- { ncrlen++; /* one more packed byte */
- scan_sq(c); /* see what squeezing can do */
- putc_cm(c,crn); /* see what crunching can do */
- }
- huflen = pred_sq(); /* finish up after squeezing */
- lzwlen = pred_cm(crn); /* finish up after crunching */
- }
- else /* else kludge the method */
- { stdlen = 0; /* make standard look best */
- ncrlen = huflen = lzwlen = 1;
- }
-
- /* standard set-ups common to all methods */
-
- fseek(f,0L,0); /* rewind input */
- hdr->crc = crcval; /* note CRC check value */
- hdr->length = stdlen; /* set actual file length */
- state = NOHIST; /* reinitialize ncr packing */
- setcode(); /* reinitialize encryption */
-
- /* choose and use the shortest method */
-
- if(stdlen<=ncrlen && stdlen<=huflen && stdlen<=lzwlen)
- { if(kludge) /*DEBUG*/
- printf("(%ld) ",lzwlen-stdlen);
- if(note)
- { printf("storing, "); fflush(stdout);}/* store w/out compression */
- hdrver = 2; /* note packing method */
- stdlen = crcval = 0; /* recalc these for kludge */
- while((c=getch(f))!=EOF) /* store it straight */
- putc_pak(c,t);
- hdr->crc = crcval;
- hdr->length = hdr->size = stdlen;
- }
-
- else if(ncrlen<huflen && ncrlen<lzwlen)
- { if(kludge) /*DEBUG*/
- printf("(%ld) ",lzwlen-ncrlen);
- if(note)
- { printf("packing, ");
- fflush(stdout);} /* pack w/repeat suppression */
- hdrver = 3; /* note packing method */
- hdr->size = ncrlen; /* set data length */
- while((c=getc_ncr(f))!=EOF)
- putc_pak(c,t);
- }
-
- else if(huflen<lzwlen)
- { if(kludge) /*DEBUG*/
- printf("(%ld) ",lzwlen-huflen);
- if(note)
- { printf("squeezing, "); fflush(stdout);}
- hdrver = 4; /* note packing method */
- hdr->size = file_sq(f,t); /* note final size */
- }
-
- else
- { if(kludge) /*DEBUG*/
- printf("(%ld) ",huflen-lzwlen);
- if(note)
- { printf("crunching, "); fflush(stdout);}
- hdrver = 8;
- hdr->size = lzwlen; /* size should not change */
- if(crn) /* if temp was created */
- { fseek(crn,0L,0); /* then copy over crunched temp */
- while((c=fgetc(crn))!=EOF)
- putc_tst(c,t);
- }
- else /* else re-crunch */
- { init_cm(f,t);
- while((c=getc_ncr(f))!=EOF)
- putc_cm(c,t);
- pred_cm(t); /* finish up after crunching */
- }
- }
-
- /* standard cleanups common to all methods */
-
- if(crn) /* get rid of crunch temporary */
- { fclose(crn);
- if(unlink(tnam) && warn)
- { printf("Cannot delete temporary file %s\n",tnam);
- nerrs++;
- }
- }
- if(note)
- printf("done.\n");
- }
-
- /* Non-repeat compression - text is passed through normally, except that
- a run of more than two is encoded as:
-
- <char> <DLE> <count>
-
- Special case: a count of zero indicates that the DLE is really a DLE,
- not a repeat marker.
- */
-
- INT getc_ncr(f) /* get bytes with collapsed runs */
- FILE *f; /* file to get from */
- {
- static INT lastc; /* value returned on last call */
- static INT repcnt; /* repetition counter */
- static INT c; /* latest value seen */
-
- switch(state) /* depends on our state */
- {
- case NOHIST: /* no relevant history */
- state = S
-