The Datafile PD-CD 1 Issue 2

home *** CD-ROM | disk | FTP | other *** search

/ The Datafile PD-CD 1 Issue 2 / PDCD-1 - Issue 02.iso / _comms / comms / _uuexplode / !uuexplode_c_uuexplode < prev next >

Wrap

Text File | 1993-08-31 | 23.3 KB | 666 lines

/* * uuexplode-1.5.c (based on kiss 1.0) * Author kiss 1.0: Kevin Yang * Code cleanup and v1.5: Michael Bergman (euambn@eua.ericsson.se) * Released to the Public Domain, no warranty whatsoever implied. * * VMS/Archimedes port : Martin Glanvill (mcg@waikato.ac.nz) * This C program (UNIX) takes an unlimited number of uuencoded files * and removes garbage lines. The uuencoded lines are decoded and written * to specific output files. It is quite verbose as opposed to e.g. * uuconvert and tells you when things go well and when they don't. * It can also handle garbage between the `/SPC line and the end line and * warns if it suspects there is garbage between the last short data line * and the SPC-line. * * New in v 1.3: * Better format of warning messages, e.g. line numbers provided. * Bogus space lines in data and at end of a uucode block is handled. * See details below in ExplodeFile() if you're interested in exactly how * this is done. * New is v 1.4: * Secure option implemented. * New in v 1.5: * Changed string and I/O buffer size. 128 char string buffers * is not enough for long path names in news headers. This made * the line numbers in warning messages be wrong. * * WARNING: * If there is garbage between the short data line (if any) and the * SPC-line, the output file *might* be corrupted, but most likely not! * * The format of the uucode is assumed to be more or less as described * in uuencode(5) in the online UNIX manual with some exceptions: * An M-line can be longer than necessary. Some coders put an extra * checksum (character) after each line, this is ignored by uuexplode. * It is assumed though that all lines have the same number of checksum * chars after them. * The last data line before end-line can be either SPC\n or `\n * since SPC is equivalent to ` when decoding (both give 0). * * Any char in the range [a-~] is illegal uucode to this program * until someone has shown that there are COMMONLY used uuencode * programs that produce correct uucode with small letters in it. * There are such uuencode, e.g. for the Macintosh(?), which use the * the range [>-}] by adding 96 to [0-29] and 32 to [30-63]. * Note that this code is transparent to the standard uudecode. * * It is assumed that the end line of a uucode-block is in the * same input file as the corresponding begin and that the parts * are in correct order (of course). * * Some diagnostics mean that the current output file is removed. * Others leave the output file, but check the input to be sure! * * To speed up the file handling, the file I/O buffers are re-set. * I chose 16 KB because it is 4 whole pages in a SunOS 4.x system. * The file I/O is by default buffered. If it is not in your system, * change to setvbuf() instead. This is also the appropriate action * to take if you don't have setbuffer(). * * To compile: "cc -O -o uuexplode uuexplode-1.x.c". * -------------------------------------------------------- *-------------------------------------------------------- * Modifications: MCG - increase buffer to 256k * VMS: cc/opt=inline uuexplode.c * link uuexplode * *-------------| LOGIN.COM insert |------------------ *$ ext*ract :== "$<path>uuexplode" *$![<path> is the directory in which you place the final .exe file] *------------------------------------------------------------------- * then use as per UNIX..... */ #include <stdio.h> #include <ctype.h> #include <stdlib.h> #define OUTPATH getenv("Out$path") /* Some garbage lines such as paths are very long, almost 200 chars */ #define LEN 256 #define BUFFER_SIZE 262144 #define D(x) #define DISPLAY if (verbose) printf /* #define DISPLAY(X) if (verbose) printf(X) */ /* Most variables are declared globally, in order to * speed up function calls. */ static char input_buf[LEN], b[LEN] = " "; /* Empty marker. If b is decoded when it is empty, nothing will happen since length is 0 */ static char out_buf[LEN], dest[LEN], dummy[LEN]; static char *ibuf = input_buf, *bp = b, *obuf = out_buf, *tmp, *p; static char *filebuf1 = NULL, *filebuf2 = NULL; /* Used for I/O buffering */ static int line, len, mlen, dlen, count, mode, newdata, hasdata, bquote, bquoteline, end, verbose=1, secure=0, ambiguous, truncation=0, t, i, x, y, z; static int siz = LEN; static FILE *ofp; /* Forward declaratons, defined after main() */ int ExplodeFile(); int ExplodeLine(); int ExplodeLineCheck(); /* This function returns 1 if the string s only has * chars which are valid in uuencoded data, 0 otherwise. * The string is assumed not to have any ASCII < 32. */ int uuchk(char *s) { int i, top; top = len-(mlen-62); for(i=1; i<top; i++) /* Check only those chars that are data */ if (*s++ >= 'a') return 0; /* This string is normally not valid uucode */ return 1; } main(int argc, char *argv[]) { FILE *ifp; char *pname; pname = argv[0]; while (argc > 1 && *argv[1] == '-') /* There is an option */ { if (*(argv[1]+1) == 'q') /* Quiet option */ { verbose = 0; argc--; argv++; } else if (*(argv[1]+1) == 'h') /* Help option, print usage and quit */ { printf("Usage: %s [-q | -h | -s] [file1 [file2...]]\n", argv[0]); printf("-q quiet\n-h usage\n-s secure, check all data lines\n"); printf("When you get warnings check the input.\n"); return; } else if (*(argv[1]+1) == 's') /* Secure option */ { secure = 1; argc--; argv++; } } D(printf("verbose=%d, secure=%d\n", verbose, secure)); if (argc < 2) { line = 0; /* Not that one has much use of linenumbers in stdin... */ ExplodeFile(stdin, "stdin"); /* Standard input, probably a pipe */ } else { if ((filebuf1 = malloc(BUFFER_SIZE)) == NULL || (filebuf2 = malloc(BUFFER_SIZE)) == NULL) fprintf(stderr, "%s: buffer allocation failed.\n", pname); for (; --argc && ++argv; ) { if ((ifp = fopen(*argv,"r")) == NULL) { perror(*argv); continue; } if (filebuf1 != NULL) setbuf(ifp, filebuf1); line = 0; /* reset line number */ ExplodeFile(ifp, *argv); /* uudecodes the data in this input file*/ /* Uncomment the next line when you want to save */ /* disk space, it simply deletes the input file */ /* unlink(*argv); */ fclose(ifp); } } return; } /* This function scans the input file until all parts are decoded. */ int ExplodeFile(FILE *ifp, char *fname) { hasdata = mlen = 0; /* On EOF, fgets returns NULL, not its first argument */ while (fgets(ibuf, siz, ifp) != NULL) { while (1) { line++; if (! strncmp(ibuf, "begin ", 6) && isdigit(ibuf[6]) && isdigit(ibuf[7]) && isdigit(ibuf[8])) { break; /* Found a begin line */ } if (fgets(ibuf, siz, ifp) == NULL) /* EOF was reached */ { if (! hasdata) printf("%s: no begin line\n", fname); return; } } /* Extract filename and file mode */ if ((sscanf(ibuf, "begin %o%[ ]%s", &mode, dummy, dest)) != 3) { len = strlen(ibuf); ibuf[len-1] = 0; /* Replace the \n with a 0 */ printf("%s: %s: Invalid mode or filename\n", fname, ibuf); continue; /* Search for next begin line */ } hasdata = 1; /* Found first valid begin line */ if (verbose) { printf("Extracting <%s>\n", dest); /* By default stdout is line buffered so I must flush */ fflush(stdout); } /* prepare output file */ if ((ofp = fopen(strcat(OUTPATH,dest), "w")) == NULL) { perror(dest); continue; } /* if (chmod(dest, mode) == -1) Set file-mode perror(dest); */ if (filebuf2 != NULL) setbuf(ofp, filebuf2); /* The line after the begin is assumed to be a valid data line. * If it is an M-line, the length is saved, unless it is < 62, which * is considered a fatal error. * If it begins with [!-L], it is decoded as the short data line, * if the next line is not SPC/`, a warning is issued. * If it is a SPC or `, the outputfile is simply closed. * (The code below was added to handle very small uuencoded files.) */ if (fgets(ibuf, siz, ifp) == NULL) /* EOF encountered */ { DISPLAY("\nEOF after begin\n"); fclose(ofp); /* unlink(dest); Remove the output file and */ return; /* proceed with next input file */ } line++; len = strlen(ibuf); if (*ibuf == 'M') { if ((mlen = len) < 62) { DISPLAY("\ninvalid first data line %d\n", line); fclose(ofp); /* unlink(dest); */ continue; /* Search for next begin line */ } if (! secure) ExplodeLine(ibuf); else if (! ExplodeLineCheck(ibuf)) { DISPLAY("\nillegal chars in first data line %d\n", line); fclose(ofp); /* unlink(dest); */ continue; /* Search for next begin line */ } } else if (*ibuf <= 'L' && *ibuf > ' ') /* No special checks here! */ { if (! secure) ExplodeLine(ibuf); else if (! ExplodeLineCheck(ibuf)) { DISPLAY("\nillegal chars in short data line %d\n", line); fclose(ofp); continue; /* Search for next begin line */ } /* Now read in line after short data line */ if (fgets(ibuf, siz, ifp) == NULL) { DISPLAY("\nwarning: EOF after short data line\n"); fclose(ofp); return; /* Proceed with next input file */ } len = strlen(ibuf); /* This next line depends on lazy evaluation */ if (len != 2 && len != 3 || *ibuf != ' ' && *ibuf != '`') { DISPLAY("\nwarning: garbage at end %d\n", line+1); /* Back up, it may have been a new begin line */ fseek(ifp, (long)-strlen(ibuf), 1); fclose(ofp); } else { line++; DISPLAY("-- done\n"); fclose(ofp); } continue; /* Search for next begin line */ } else if ((*ibuf == ' ' || *ibuf == '`') && (len == 2 || len == 3)) /* uuencoded empty file */ { DISPLAY("-- done\n"); fclose(ofp); continue; /* Search for next begin line */ } else { DISPLAY("\ngarbage at first line %d\n", line); fclose(ofp); /* unlink(dest); */ continue; /* Search for next begin line */ } /* When a [!-L] line is found, a length and char check is made to * validate it as the short data line. If the next valid uucode line * is SPC, the line is decoded. If it is not, a warning is printed, * but the line is still decoded (most likely correctly). Check the * input file to be sure. * * Whenever a valid M-line is found, it is assumed that any previously * seen potentially valid short lines were in fact garbage, this is * done by setting ambiguous=0. * * When a SPC is found, one of two strategies are used: * 1. If a short line has been seen, this SPC is assumed valid and I * never bother about the end line. * 2. If not, look for the end-line. If next valid uucode line is * end -- finish output file, no warnings * M-line -- SPC was bogus, continue & issue warning. * SPC -- continue & issue warning * EOF or begin -- finish output file with "end line not found" * short data line -- SPC was bogus, continue & issue warning * * The reason for the last behaviour is that P(no short data line) ~= 2% * (see below) and P(bogus short line) is very small. So if a valid * short line is seen after a SPC, it is more likely that it is valid * and the SPC was bogus. * A problem here is how to know if the file has ` or SPC as last * 0 length data line. It is one or the other and if it is SPC, I can * always just ignore spurious '-lines and vice versa. I decided not * to distinguish between them. This is a bit vaulnerable, but saves * overhead. * * The above SPC-line checking is new in v 1.3 and was added to handle * cases like this (which do occur sometimes): * M-lines * [garbage] * bogus SPC * [garbage] * M-lines (belonging to the same file) * * As a bonus, it now also handles (I've never seen this happen): * M-lines * [garbage] * bogus SPC * [garbage] * short data line * SPC * * Of course things like this cannot be correctly decoded: * M-line * bogus SPC * [garbage] * bogus short line * [garbage] * M-line (belonging to the same file) * but the prob. that this would happen is rather small! * * There is still only *one* pathological case where this function * would issue no warning at all and still produce corrupted output: * M-lines * bogus short-line (exactly one, falling through the checks) * space/` * [garbage] * M-lines (belonging to the same file) * This case is obviously theoretically undetectable, so in this case I * don't gain anything by looking for the end-line. * * X = original filesize % 45 * Assumption: X is uniform(0..44) (This seems reasonable) * P(no short-line) = P(X=0) = 1/45 ~= 2% * Even if no short line is found in a block, no warning about this * is printed. This was changed because in this case I always look for * the end line. If it is found, the output is very likely correct. * If not, a warning "no end line" is given, so the previuos warning * is now obsolete. * * If there is a garbage line beginning with M and of correct length, * the output will be corrupted. A check for this is turned on with * -s which looks for any chars that cannot belong to a valid M-line. * This check is done in ExplodeLineCheck() writing to an array * instead of directly to the file. If all goes well, the array is * written to the file. * This solution costs less if the M-line is valid (which is probable) * than checking the line first and then uudecoding it. */ /* Reset the buffer for short data lines to empty */ *bp = ' '; /* Reset a couple of indicators */ count = ambiguous = truncation = 0; bquote = bquoteline = 0; end = 0; while (fgets(ibuf, siz, ifp) != NULL) { if ((newdata = ! strncmp(ibuf, "begin ", 6) && isdigit(ibuf[6]) && isdigit(ibuf[7]) && isdigit(ibuf[8])) ) /* No SPC line in this block, or no end line */ { /* Return the begin line to input stream */ fseek(ifp, (long)-strlen(ibuf), 1); ExplodeLine(bp); break; /* get out of inner while loop */ } line++; len = strlen(ibuf); if (*ibuf <= 'L' && *ibuf > ' ') /* It may be a short data line */ { dlen = *ibuf-32; /* Actual data length, bytes */ t = dlen/3; if (dlen%3) t++; t = t << 2; if (len == t+2+(mlen-62)) /* Allows for extra chars checksum? */ if (uuchk(ibuf)) /* All chars are valid */ { tmp = bp; /* Keep this line in b[] */ bp = ibuf; ibuf = tmp; if (bquote) /* There is a bogus SPC line above */ { DISPLAY("\nwarning: bogus SPC-line %d ", bquoteline); bquote = bquoteline = 0; /* I must also reset the truncation flag in * case "truncated" message has been displayed. */ truncation = 0; } ambiguous++; /* No of seen valid short-lines between */ /* last M-line and first SPC-line */ continue; /* Get next input line */ } if (*bp != ' ') count++; /* * This was a garbage line after a potential short line, * get next input line */ continue; } else if ((*ibuf == ' ' || *ibuf == '`' ) && (len == 2 || len == 3)) { bquote = 1; /* Mark that a SPC line has been seen */ if (bquoteline) /* There was a previous SPC line */ { DISPLAY("\nwarning: bogus SPC-line %d ", bquoteline); truncation = 0; } bquoteline = line; /* Save the line number */ if (ambiguous) /* At least one short line has been seen */ { if (count > 0) DISPLAY("\nwarning: garbage at end %d ", line); if (ambiguous > 1) DISPLAY("\nambiguous data at end %d ", line); ExplodeLine(bp); break; /* Finished, ignore the end line */ } } else if ((end = !strncmp(ibuf, "end\n", 4)) && ! ambiguous && bquote) { /* Mark that the end line has been seen, end==1 */ break; /* Finished, get out */ } else if (*ibuf == 'M') /* This is probably a valid data line */ { if (len != mlen) { if (verbose && uuchk(ibuf)) /* All chars are valid */ { if (! truncation) { printf("\nwarning: truncated data line %d ", line); fflush(stdout); truncation = 1; } else { printf("%d ", line); fflush(stdout); } } } else { mode = 1; /* Used temporary as a flag */ if (! secure) ExplodeLine(ibuf); else { mode = ExplodeLineCheck(ibuf); D(if (! mode) printf("\nIllegal char %d", line)); } if (mode) { /* Reset the garbage indicators */ count = ambiguous = 0; if (bquote) /* There is a bogus SPC line above */ { DISPLAY("\nwarning: bogus SPC-line %d ", bquoteline); bquoteline = bquote = 0; /* I must also reset the truncation flag in case * the "truncated" message has been displayed. It * would otherwise be "broken off" by this message. */ truncation = 0; } } } } } /*while*/ fclose(ofp); if (! ambiguous && ! end) DISPLAY("\nwarning: no end line "); if (! bquote) {DISPLAY("\nend of uucode not found\n");} else {DISPLAY("-- done\n");} } /*while*/ } /* This function decodes one line of information, written by Kevin Yang. * The code is slightly different from that of uuconvert.c * or uudecode.c. I use pointers instead of array indices * to avoid time consuming address multiplications. The * condition tests are modified and eliminates 2 tests * for every 4 bytes. Characters are decoded only when * one of the three conditions matches, in which complicated * mathematical computation are eliminated for the last few * bytes. * I hope this can speed up the decoding! */ #define DECODE(C) (((C) - ' ') & 077) ExplodeLine(char *str) { i = DECODE(*str); /* Always 0 for a line beginning with SPC or ` */ p = ++str; while (i > 0) { if (i >= 3) { x = (DECODE(*p) << 2); p++; x |= (DECODE(*p) >> 4); y = (DECODE(*p) << 4); p++; y |= (DECODE(*p) >> 2); z = (DECODE(*p) << 6); p++; z |= (DECODE(*p)); putc(x, ofp); putc(y, ofp); putc(z, ofp); } else if (i >= 2) { x = (DECODE(*p) << 2); p++; x |= (DECODE(*p) >> 4); y = (DECODE(*p) << 4); p++; y |= (DECODE(*p) >> 2); putc(x, ofp); putc(y, ofp); } else if (i >= 1) { x = (DECODE(*p) << 2); p++; x |= (DECODE(*p) >> 4); putc(x, ofp); } str += 4; p = str; i -= 3; } /*while*/ } /* This array is used to hold decoded data. The entire buffer is * written to the output file when the line is decoded. * It is assumed that a full data line is 45 bytes. */ unsigned char data[50]; /* * Returns: * 1 -- OK, wrote to file * 0 -- this is no data line, illegal chars found */ int ExplodeLineCheck(char *str) { unsigned char *d; i = DECODE(*str); /* Always 0 for a line beginning with SPC or ` */ p = ++str; d = data; while (i > 0) { if (i >= 3) { x = (DECODE(*p) << 2); p++; x |= (DECODE(*p) >> 4); y = (DECODE(*p) << 4); p++; y |= (DECODE(*p) >> 2); z = (DECODE(*p) << 6); p++; z |= (DECODE(*p)); *d++ = x; *d++ = y; *d++ = z; /* Check if these chars were valid uucode */ if (*p >= 'a' || *(p-1) >= 'a' || *(p-2) >= 'a' || *(p-3) >= 'a') return(0); } else if (i >= 2) { x = (DECODE(*p) << 2); p++; x |= (DECODE(*p) >> 4); y = (DECODE(*p) << 4); p++; y |= (DECODE(*p) >> 2); *d++ = x; *d++ = y; /* Check if these chars were valid uucode */ if (*p >= 'a' || *(p-1) >= 'a' || *(p-2) >= 'a') return(0); } else if (i >= 1) { x = (DECODE(*p) << 2); p++; x |= (DECODE(*p) >> 4); *d++ = x; if (*p >= 'a' || *(p-1) >= 'a') return(0); } str += 4; p = str; i -= 3; } /*while*/ *d = 0; /* End the string */ fwrite(data, 1, d-data, ofp); return(1); }