home *** CD-ROM | disk | FTP | other *** search
- /*
- * uuexplode-1.5.c (based on kiss 1.0)
- * Author kiss 1.0: Kevin Yang
- * Code cleanup and v1.5: Michael Bergman (euambn@eua.ericsson.se)
- * Released to the Public Domain, no warranty whatsoever implied.
- *
- * VMS/Archimedes port : Martin Glanvill (mcg@waikato.ac.nz)
- * This C program (UNIX) takes an unlimited number of uuencoded files
- * and removes garbage lines. The uuencoded lines are decoded and written
- * to specific output files. It is quite verbose as opposed to e.g.
- * uuconvert and tells you when things go well and when they don't.
- * It can also handle garbage between the `/SPC line and the end line and
- * warns if it suspects there is garbage between the last short data line
- * and the SPC-line.
- *
- * New in v 1.3:
- * Better format of warning messages, e.g. line numbers provided.
- * Bogus space lines in data and at end of a uucode block is handled.
- * See details below in ExplodeFile() if you're interested in exactly how
- * this is done.
- * New is v 1.4:
- * Secure option implemented.
- * New in v 1.5:
- * Changed string and I/O buffer size. 128 char string buffers
- * is not enough for long path names in news headers. This made
- * the line numbers in warning messages be wrong.
- *
- * WARNING:
- * If there is garbage between the short data line (if any) and the
- * SPC-line, the output file *might* be corrupted, but most likely not!
- *
- * The format of the uucode is assumed to be more or less as described
- * in uuencode(5) in the online UNIX manual with some exceptions:
- * An M-line can be longer than necessary. Some coders put an extra
- * checksum (character) after each line, this is ignored by uuexplode.
- * It is assumed though that all lines have the same number of checksum
- * chars after them.
- * The last data line before end-line can be either SPC\n or `\n
- * since SPC is equivalent to ` when decoding (both give 0).
- *
- * Any char in the range [a-~] is illegal uucode to this program
- * until someone has shown that there are COMMONLY used uuencode
- * programs that produce correct uucode with small letters in it.
- * There are such uuencode, e.g. for the Macintosh(?), which use the
- * the range [>-}] by adding 96 to [0-29] and 32 to [30-63].
- * Note that this code is transparent to the standard uudecode.
- *
- * It is assumed that the end line of a uucode-block is in the
- * same input file as the corresponding begin and that the parts
- * are in correct order (of course).
- *
- * Some diagnostics mean that the current output file is removed.
- * Others leave the output file, but check the input to be sure!
- *
- * To speed up the file handling, the file I/O buffers are re-set.
- * I chose 16 KB because it is 4 whole pages in a SunOS 4.x system.
- * The file I/O is by default buffered. If it is not in your system,
- * change to setvbuf() instead. This is also the appropriate action
- * to take if you don't have setbuffer().
- *
- * To compile: "cc -O -o uuexplode uuexplode-1.x.c".
- * --------------------------------------------------------
- *--------------------------------------------------------
- * Modifications: MCG - increase buffer to 256k
- * VMS: cc/opt=inline uuexplode.c
- * link uuexplode
- *
- *-------------| LOGIN.COM insert |------------------
- *$ ext*ract :== "$<path>uuexplode"
- *$![<path> is the directory in which you place the final .exe file]
- *-------------------------------------------------------------------
- * then use as per UNIX.....
- */
-
-
- #include <stdio.h>
- #include <ctype.h>
- #include <stdlib.h>
-
- #define OUTPATH getenv("Out$path")
- /* Some garbage lines such as paths are very long, almost 200 chars */
- #define LEN 256
- #define BUFFER_SIZE 262144
-
- #define D(x)
- #define DISPLAY if (verbose) printf
- /* #define DISPLAY(X) if (verbose) printf(X) */
-
- /* Most variables are declared globally, in order to
- * speed up function calls.
- */
-
- static char input_buf[LEN],
- b[LEN] = " "; /* Empty marker. If b is decoded when it is empty,
- nothing will happen since length is 0 */
- static char out_buf[LEN], dest[LEN], dummy[LEN];
-
- static char *ibuf = input_buf, *bp = b, *obuf = out_buf, *tmp, *p;
- static char *filebuf1 = NULL,
- *filebuf2 = NULL; /* Used for I/O buffering */
- static int line, len, mlen, dlen, count,
- mode, newdata, hasdata, bquote, bquoteline, end,
- verbose=1, secure=0, ambiguous, truncation=0,
- t, i, x, y, z;
- static int siz = LEN;
- static FILE *ofp;
-
- /* Forward declaratons, defined after main() */
- int ExplodeFile();
- int ExplodeLine();
- int ExplodeLineCheck();
-
-
- /* This function returns 1 if the string s only has
- * chars which are valid in uuencoded data, 0 otherwise.
- * The string is assumed not to have any ASCII < 32.
- */
- int uuchk(char *s)
- {
- int i, top;
-
- top = len-(mlen-62);
- for(i=1; i<top; i++) /* Check only those chars that are data */
- if (*s++ >= 'a') return 0; /* This string is normally not valid uucode */
- return 1;
- }
-
-
- main(int argc, char *argv[])
- {
- FILE *ifp;
- char *pname;
-
- pname = argv[0];
- while (argc > 1 && *argv[1] == '-') /* There is an option */
- {
- if (*(argv[1]+1) == 'q') /* Quiet option */
- {
- verbose = 0;
- argc--;
- argv++;
- }
- else if (*(argv[1]+1) == 'h') /* Help option, print usage and quit */
- {
- printf("Usage: %s [-q | -h | -s] [file1 [file2...]]\n", argv[0]);
- printf("-q quiet\n-h usage\n-s secure, check all data lines\n");
- printf("When you get warnings check the input.\n");
- return;
- }
- else if (*(argv[1]+1) == 's') /* Secure option */
- {
- secure = 1;
- argc--;
- argv++;
- }
- }
- D(printf("verbose=%d, secure=%d\n", verbose, secure));
- if (argc < 2)
- {
- line = 0; /* Not that one has much use of linenumbers in stdin... */
- ExplodeFile(stdin, "stdin"); /* Standard input, probably a pipe */
- }
- else
- {
- if ((filebuf1 = malloc(BUFFER_SIZE)) == NULL
- || (filebuf2 = malloc(BUFFER_SIZE)) == NULL)
- fprintf(stderr, "%s: buffer allocation failed.\n", pname);
-
- for (; --argc && ++argv; )
- {
- if ((ifp = fopen(*argv,"r")) == NULL)
- {
- perror(*argv);
- continue;
- }
- if (filebuf1 != NULL)
- setbuf(ifp, filebuf1);
- line = 0; /* reset line number */
- ExplodeFile(ifp, *argv); /* uudecodes the data in this input file*/
-
- /* Uncomment the next line when you want to save */
- /* disk space, it simply deletes the input file */
- /* unlink(*argv); */
- fclose(ifp);
- }
- }
- return;
- }
-
-
- /* This function scans the input file until all parts are decoded. */
-
- int ExplodeFile(FILE *ifp, char *fname)
- {
- hasdata = mlen = 0;
-
- /* On EOF, fgets returns NULL, not its first argument */
- while (fgets(ibuf, siz, ifp) != NULL)
- {
- while (1)
- {
- line++;
- if (! strncmp(ibuf, "begin ", 6)
- && isdigit(ibuf[6])
- && isdigit(ibuf[7])
- && isdigit(ibuf[8]))
- {
- break; /* Found a begin line */
- }
- if (fgets(ibuf, siz, ifp) == NULL) /* EOF was reached */
- {
- if (! hasdata) printf("%s: no begin line\n", fname);
- return;
- }
- }
-
- /* Extract filename and file mode */
- if ((sscanf(ibuf, "begin %o%[ ]%s", &mode, dummy, dest)) != 3)
- {
- len = strlen(ibuf);
- ibuf[len-1] = 0; /* Replace the \n with a 0 */
- printf("%s: %s: Invalid mode or filename\n", fname, ibuf);
- continue; /* Search for next begin line */
- }
- hasdata = 1; /* Found first valid begin line */
- if (verbose)
- {
- printf("Extracting <%s>\n", dest);
- /* By default stdout is line buffered so I must flush */
- fflush(stdout);
- }
-
- /* prepare output file */
- if ((ofp = fopen(strcat(OUTPATH,dest), "w")) == NULL)
- {
- perror(dest);
- continue;
- }
- /* if (chmod(dest, mode) == -1) Set file-mode
- perror(dest); */
- if (filebuf2 != NULL)
- setbuf(ofp, filebuf2);
-
- /* The line after the begin is assumed to be a valid data line.
- * If it is an M-line, the length is saved, unless it is < 62, which
- * is considered a fatal error.
- * If it begins with [!-L], it is decoded as the short data line,
- * if the next line is not SPC/`, a warning is issued.
- * If it is a SPC or `, the outputfile is simply closed.
- * (The code below was added to handle very small uuencoded files.)
- */
- if (fgets(ibuf, siz, ifp) == NULL) /* EOF encountered */
- {
- DISPLAY("\nEOF after begin\n");
- fclose(ofp);
- /* unlink(dest); Remove the output file and */
- return; /* proceed with next input file */
- }
- line++;
- len = strlen(ibuf);
- if (*ibuf == 'M')
- {
- if ((mlen = len) < 62)
- {
- DISPLAY("\ninvalid first data line %d\n", line);
- fclose(ofp);
- /* unlink(dest); */
- continue; /* Search for next begin line */
- }
- if (! secure)
- ExplodeLine(ibuf);
- else if (! ExplodeLineCheck(ibuf))
- {
- DISPLAY("\nillegal chars in first data line %d\n", line);
- fclose(ofp);
- /* unlink(dest); */
- continue; /* Search for next begin line */
- }
- }
- else if (*ibuf <= 'L' && *ibuf > ' ') /* No special checks here! */
- {
- if (! secure)
- ExplodeLine(ibuf);
- else if (! ExplodeLineCheck(ibuf))
- {
- DISPLAY("\nillegal chars in short data line %d\n", line);
- fclose(ofp);
- continue; /* Search for next begin line */
- }
-
- /* Now read in line after short data line */
- if (fgets(ibuf, siz, ifp) == NULL)
- {
- DISPLAY("\nwarning: EOF after short data line\n");
- fclose(ofp);
- return; /* Proceed with next input file */
- }
- len = strlen(ibuf);
- /* This next line depends on lazy evaluation */
- if (len != 2 && len != 3 || *ibuf != ' ' && *ibuf != '`')
- {
- DISPLAY("\nwarning: garbage at end %d\n", line+1);
- /* Back up, it may have been a new begin line */
- fseek(ifp, (long)-strlen(ibuf), 1);
- fclose(ofp);
- }
- else
- {
- line++;
- DISPLAY("-- done\n");
- fclose(ofp);
- }
- continue; /* Search for next begin line */
- }
- else if ((*ibuf == ' ' || *ibuf == '`') && (len == 2 || len == 3))
- /* uuencoded empty file */
- {
- DISPLAY("-- done\n");
- fclose(ofp);
- continue; /* Search for next begin line */
- }
- else
- {
- DISPLAY("\ngarbage at first line %d\n", line);
- fclose(ofp);
- /* unlink(dest); */
- continue; /* Search for next begin line */
- }
-
- /* When a [!-L] line is found, a length and char check is made to
- * validate it as the short data line. If the next valid uucode line
- * is SPC, the line is decoded. If it is not, a warning is printed,
- * but the line is still decoded (most likely correctly). Check the
- * input file to be sure.
- *
- * Whenever a valid M-line is found, it is assumed that any previously
- * seen potentially valid short lines were in fact garbage, this is
- * done by setting ambiguous=0.
- *
- * When a SPC is found, one of two strategies are used:
- * 1. If a short line has been seen, this SPC is assumed valid and I
- * never bother about the end line.
- * 2. If not, look for the end-line. If next valid uucode line is
- * end -- finish output file, no warnings
- * M-line -- SPC was bogus, continue & issue warning.
- * SPC -- continue & issue warning
- * EOF or begin -- finish output file with "end line not found"
- * short data line -- SPC was bogus, continue & issue warning
- *
- * The reason for the last behaviour is that P(no short data line) ~= 2%
- * (see below) and P(bogus short line) is very small. So if a valid
- * short line is seen after a SPC, it is more likely that it is valid
- * and the SPC was bogus.
- * A problem here is how to know if the file has ` or SPC as last
- * 0 length data line. It is one or the other and if it is SPC, I can
- * always just ignore spurious '-lines and vice versa. I decided not
- * to distinguish between them. This is a bit vaulnerable, but saves
- * overhead.
- *
- * The above SPC-line checking is new in v 1.3 and was added to handle
- * cases like this (which do occur sometimes):
- * M-lines
- * [garbage]
- * bogus SPC
- * [garbage]
- * M-lines (belonging to the same file)
- *
- * As a bonus, it now also handles (I've never seen this happen):
- * M-lines
- * [garbage]
- * bogus SPC
- * [garbage]
- * short data line
- * SPC
- *
- * Of course things like this cannot be correctly decoded:
- * M-line
- * bogus SPC
- * [garbage]
- * bogus short line
- * [garbage]
- * M-line (belonging to the same file)
- * but the prob. that this would happen is rather small!
- *
- * There is still only *one* pathological case where this function
- * would issue no warning at all and still produce corrupted output:
- * M-lines
- * bogus short-line (exactly one, falling through the checks)
- * space/`
- * [garbage]
- * M-lines (belonging to the same file)
- * This case is obviously theoretically undetectable, so in this case I
- * don't gain anything by looking for the end-line.
- *
- * X = original filesize % 45
- * Assumption: X is uniform(0..44) (This seems reasonable)
- * P(no short-line) = P(X=0) = 1/45 ~= 2%
- * Even if no short line is found in a block, no warning about this
- * is printed. This was changed because in this case I always look for
- * the end line. If it is found, the output is very likely correct.
- * If not, a warning "no end line" is given, so the previuos warning
- * is now obsolete.
- *
- * If there is a garbage line beginning with M and of correct length,
- * the output will be corrupted. A check for this is turned on with
- * -s which looks for any chars that cannot belong to a valid M-line.
- * This check is done in ExplodeLineCheck() writing to an array
- * instead of directly to the file. If all goes well, the array is
- * written to the file.
- * This solution costs less if the M-line is valid (which is probable)
- * than checking the line first and then uudecoding it.
- */
-
- /* Reset the buffer for short data lines to empty */
- *bp = ' ';
- /* Reset a couple of indicators */
- count = ambiguous = truncation = 0;
- bquote = bquoteline = 0;
- end = 0;
- while (fgets(ibuf, siz, ifp) != NULL)
- {
-
- if ((newdata = ! strncmp(ibuf, "begin ", 6)
- && isdigit(ibuf[6])
- && isdigit(ibuf[7])
- && isdigit(ibuf[8])) ) /* No SPC line in this block, or
- no end line */
- { /* Return the begin line to input stream */
- fseek(ifp, (long)-strlen(ibuf), 1);
- ExplodeLine(bp);
- break; /* get out of inner while loop */
- }
- line++;
-
- len = strlen(ibuf);
- if (*ibuf <= 'L' && *ibuf > ' ') /* It may be a short data line */
- {
- dlen = *ibuf-32; /* Actual data length, bytes */
- t = dlen/3;
- if (dlen%3) t++;
- t = t << 2;
- if (len == t+2+(mlen-62)) /* Allows for extra chars checksum? */
- if (uuchk(ibuf)) /* All chars are valid */
- {
- tmp = bp; /* Keep this line in b[] */
- bp = ibuf;
- ibuf = tmp;
- if (bquote) /* There is a bogus SPC line above */
- {
- DISPLAY("\nwarning: bogus SPC-line %d ", bquoteline);
- bquote = bquoteline = 0;
- /* I must also reset the truncation flag in
- * case "truncated" message has been displayed.
- */
- truncation = 0;
- }
- ambiguous++; /* No of seen valid short-lines between */
- /* last M-line and first SPC-line */
- continue; /* Get next input line */
- }
- if (*bp != ' ') count++;
- /*
- * This was a garbage line after a potential short line,
- * get next input line
- */
- continue;
- }
- else if ((*ibuf == ' ' || *ibuf == '`' ) && (len == 2 || len == 3))
- {
- bquote = 1; /* Mark that a SPC line has been seen */
- if (bquoteline) /* There was a previous SPC line */
- {
- DISPLAY("\nwarning: bogus SPC-line %d ", bquoteline);
- truncation = 0;
- }
- bquoteline = line; /* Save the line number */
- if (ambiguous) /* At least one short line has been seen */
- {
- if (count > 0)
- DISPLAY("\nwarning: garbage at end %d ", line);
- if (ambiguous > 1)
- DISPLAY("\nambiguous data at end %d ", line);
- ExplodeLine(bp);
- break; /* Finished, ignore the end line */
- }
- }
- else if ((end = !strncmp(ibuf, "end\n", 4)) && ! ambiguous && bquote)
- {
- /* Mark that the end line has been seen, end==1 */
- break; /* Finished, get out */
- }
- else if (*ibuf == 'M') /* This is probably a valid data line */
- {
- if (len != mlen)
- {
- if (verbose && uuchk(ibuf)) /* All chars are valid */
- {
- if (! truncation)
- {
- printf("\nwarning: truncated data line %d ", line);
- fflush(stdout);
- truncation = 1;
- }
- else
- {
- printf("%d ", line);
- fflush(stdout);
- }
- }
- }
- else
- {
- mode = 1; /* Used temporary as a flag */
- if (! secure)
- ExplodeLine(ibuf);
- else
- {
- mode = ExplodeLineCheck(ibuf);
- D(if (! mode) printf("\nIllegal char %d", line));
- }
- if (mode)
- {
- /* Reset the garbage indicators */
- count = ambiguous = 0;
- if (bquote) /* There is a bogus SPC line above */
- {
- DISPLAY("\nwarning: bogus SPC-line %d ", bquoteline);
- bquoteline = bquote = 0;
- /* I must also reset the truncation flag in case
- * the "truncated" message has been displayed. It
- * would otherwise be "broken off" by this message.
- */
- truncation = 0;
- }
- }
- }
- }
- } /*while*/
- fclose(ofp);
-
- if (! ambiguous && ! end) DISPLAY("\nwarning: no end line ");
- if (! bquote)
- {DISPLAY("\nend of uucode not found\n");}
- else
- {DISPLAY("-- done\n");}
- } /*while*/
- }
-
-
- /* This function decodes one line of information, written by Kevin Yang.
- * The code is slightly different from that of uuconvert.c
- * or uudecode.c. I use pointers instead of array indices
- * to avoid time consuming address multiplications. The
- * condition tests are modified and eliminates 2 tests
- * for every 4 bytes. Characters are decoded only when
- * one of the three conditions matches, in which complicated
- * mathematical computation are eliminated for the last few
- * bytes.
- * I hope this can speed up the decoding!
- */
- #define DECODE(C) (((C) - ' ') & 077)
-
- ExplodeLine(char *str)
- {
- i = DECODE(*str); /* Always 0 for a line beginning with SPC or ` */
- p = ++str;
-
- while (i > 0)
- {
- if (i >= 3)
- {
- x = (DECODE(*p) << 2); p++;
- x |= (DECODE(*p) >> 4);
- y = (DECODE(*p) << 4); p++;
- y |= (DECODE(*p) >> 2);
- z = (DECODE(*p) << 6); p++;
- z |= (DECODE(*p));
- putc(x, ofp);
- putc(y, ofp);
- putc(z, ofp);
- }
- else if (i >= 2)
- {
- x = (DECODE(*p) << 2); p++;
- x |= (DECODE(*p) >> 4);
- y = (DECODE(*p) << 4); p++;
- y |= (DECODE(*p) >> 2);
- putc(x, ofp);
- putc(y, ofp);
- }
- else if (i >= 1)
- {
- x = (DECODE(*p) << 2); p++;
- x |= (DECODE(*p) >> 4);
- putc(x, ofp);
- }
-
- str += 4;
- p = str;
- i -= 3;
- } /*while*/
- }
-
-
- /* This array is used to hold decoded data. The entire buffer is
- * written to the output file when the line is decoded.
- * It is assumed that a full data line is 45 bytes.
- */
- unsigned char data[50];
-
- /*
- * Returns:
- * 1 -- OK, wrote to file
- * 0 -- this is no data line, illegal chars found
- */
- int ExplodeLineCheck(char *str)
- {
- unsigned char *d;
-
- i = DECODE(*str); /* Always 0 for a line beginning with SPC or ` */
- p = ++str;
- d = data;
- while (i > 0)
- {
- if (i >= 3)
- {
- x = (DECODE(*p) << 2); p++;
- x |= (DECODE(*p) >> 4);
- y = (DECODE(*p) << 4); p++;
- y |= (DECODE(*p) >> 2);
- z = (DECODE(*p) << 6); p++;
- z |= (DECODE(*p));
- *d++ = x; *d++ = y; *d++ = z;
- /* Check if these chars were valid uucode */
- if (*p >= 'a' || *(p-1) >= 'a' || *(p-2) >= 'a' || *(p-3) >= 'a')
- return(0);
- }
- else if (i >= 2)
- {
- x = (DECODE(*p) << 2); p++;
- x |= (DECODE(*p) >> 4);
- y = (DECODE(*p) << 4); p++;
- y |= (DECODE(*p) >> 2);
- *d++ = x; *d++ = y;
- /* Check if these chars were valid uucode */
- if (*p >= 'a' || *(p-1) >= 'a' || *(p-2) >= 'a')
- return(0);
- }
- else if (i >= 1)
- {
- x = (DECODE(*p) << 2); p++;
- x |= (DECODE(*p) >> 4);
- *d++ = x;
- if (*p >= 'a' || *(p-1) >= 'a')
- return(0);
- }
- str += 4;
- p = str;
- i -= 3;
- } /*while*/
- *d = 0; /* End the string */
- fwrite(data, 1, d-data, ofp);
- return(1);
- }
-
-