- Subject: v11i066: Word counts, checksums, etc.
- Newsgroups: comp.sources.unix
- Sender: sources
- Approved: rs@uunet.UU.NET
- Submitted-by: Alan Silverstein <hpda!hpfcla!hpfcdt!ajs>
- Posting-number: Volume 11, Issue 66
- Archive-name: vitals
- This program was developed by Hewlett-Packard and will be part of our
- HP-UX product offering. We have found it useful. It is most useful
- when most widely and commonly shared, so here it is.
- We have not tested it except on HP-UX, which is mainly AT&T-compatible.
- However, it should be pretty portable. Caveat emptor. Oh, and the
- usual disclaimer that I'm not really an official HP spokesperson.
- Alan Silverstein, Hewlett-Packard Systems Software Operation, Fort Collins,
- Colorado; {ihnp4 | hplabs}!hpfcla!ajs; 303-229-3053; (lat-long on request :-)
- # This is a shell archive. Remove anything before this line,
- # then unpack it by saving it in a file and typing "sh file".
- #
- # This archive contains:
- # Makefile vitals.1 vitals.c
- #
- # Error checking via wc(1) will be performed.
- echo x - Makefile
- cat >Makefile <<'@EOF'
- all: vitals
- install: all vitals.1
- @echo install in appropriate directories.
- vitals: vitals.c
- $(CC) $(CFLAGS) -o vitals vitals.c
- @EOF
- echo Assuming Makefile is OK -- the moderator did not repack the kit...
- echo x - vitals.1
- cat >vitals.1 <<'@EOF'
- .\" $Header: vitals.1,v 1.1 87/08/10 12:07:47 $
- .TH VITALS 1 ""
- .ad b
- vitals \- crc, sum, line, word, and character counts
- .B vitals
- [
- .B \-rslwcb
- ] [
- .I file ...
- ]
- .I Vitals
- checks data integrity by
- computing vital statistics related to the data in the given
- file(s) or standard input (by default).
- The statistics include a four-digit hex CRC, a 16-bit byte sum (similar to
- .IR sum (1)
- without the block count)
- and line, word, and character counts (similar to
- .IR wc (1)).
- One line is printed for each input file or standard input, consisting of
- five statistics fields
- followed by the file name, if known.
- If a file is specified as "\fB\-\fR", the program reads standard
- input at that
- point and shows "\fB\-\fR" as the file name.
- .SS Options
- .TP 15
- .B \-r
- Compute only the CRC: a true 16-bit cyclic redundancy count
- that can
- detect, among other things, exchanged characters.
- .TP
- .B \-s
- Compute only the byte sum: the modulo-2\(**\(**16, unsigned, circular sum
- of all the bytes, each taken as an
- .B int
- (therefore normally in the range 0-255).
- .TP
- .B \-l
- Compute only the line count: the number of newline characters in the input.
- .TP
- .B \-w
- Compute only the word count: the number of
- character sequences
- delimited by blanks, tabs, or newlines.
- As in
- .IR wc (1),
- non-printable characters are totally ignored when looking for words.
- .TP
- .B \-c
- Compute only the character count.
- .TP
- .B \-b
- Print only file basenames, not full path names.
- .PP
- These options can be used in any combination.
- The default is
- .BR \-rslwc ,
- that is, all but
- .BR \-b .
- .PP
- .I Vitals
- is more efficient than separate commands tied together
- with a shell script because the input data is read only once.
- Unlike
- .IR wc (1),
- this program does not compute totals, since it is intended for fast data
- validation, not size counting.
- If any file open or read fails, the program writes a message to standard
- error when the problem is encountered, and continues with the next file.
- It ultimately returns non-zero if any error occurs, else zero.
- .I Vitals
- was developed by Hewlett-Packard.
- sum(1), wc(1).
- @EOF
if test "`wc -lwc <vitals.1`" != ' 80 375 2154'
- then
- echo ERROR: wc results of vitals.1 are `wc -lwc <vitals.1` should be 80 375 2154
- fi
- chmod 444 vitals.1
- echo x - vitals.c
- cat >vitals.c <<'@EOF'
- static char *HPUX_ID = "@(#)27.1 85/02/04";
- /* HPUX_ID: @(#)vitals.c 27.1 85/02/04 */
- /*
- * Compute vital statistics of data: crc, sum, line, word, and character
- * counts. See the manual entry for details.
- *
- * Compile with -DTABLE to produce an alternate program, which prints a
- * CRC table for use in this program.
- *
- * The CRC code was lifted from a USENET posting:
- *
- * hpfcla:net.sources / hcrvax!sft / 8:17 pm Nov 15, 1984
- *
- * The following program is a command to calculate the CRC of files.
- * It is useful for the same purposes as sum(1). It calculates the
- * true CRC16 (unlike CP/M utilities that say they calculate CRCs
- * but really just hash). Crc detects more errors than old sum(1);
- * for example, it detects exchanges of characters. It is also
- * (now) in the public domain.
- *
- * CRC16 polynomial: x**0 + x**2 + x**15 + x**16 (0xA001)
- * (CCITT polynomial: x**0 + x**5 + x**12 + x**16 (0x8408))
- * Initial condition: 0
- *
- * D. Hugh Redelmeier, 1983 April 15; latest change: 1984 April 2.
- */
- #include <stdio.h>
- char *USAGE = "usage: %s [-rslwcb] [files...]\n";
- #define proc /* null; easy to grep for procs */
- #define chNull ('\0')
- #define chNewline ('\n')
- #define sbNull ((char *) NULL)
- #define fileNull ((FILE *) NULL)
- #define false 0
- #define true 1
- char *myname; /* how command invoked */
- int rflag = false; /* -r (crc) option */
- int sflag = false; /* -s (sum) option */
- int lflag = false; /* -l (lines) option */
- int wflag = false; /* -w (words) option */
- int cflag = false; /* -c (chars) option */
- int bflag = false; /* -b (basenames) opt */
- int retval = 0; /* return value */
- char *defaultargs[] = {"-", sbNull}; /* read stdin by default */
- #define PrintErr(part1,part2) \
- fprintf (stderr, "%s: %s %s\n", myname, part1, part2);
- /************************************************************************
- * M A I N
- *
- * Initialize, check arguments, open files, and call another routine to
- * do each file.
- */
- proc main (argc, argv)
- register int argc;
- register char **argv;
- {
- extern int optind; /* for getopt() */
- int optchar; /* from getopt() */
- register FILE *filep; /* file to read */
- #ifdef TABLE /* just print table */
- PrintTable();
- #else
- /*
- */
- myname = *argv;
- while ((optchar = getopt (argc, argv, "rslwcb")) != EOF)
- switch (optchar)
- {
- case 'r': rflag = true; break;
- case 's': sflag = true; break;
- case 'l': lflag = true; break;
- case 'w': wflag = true; break;
- case 'c': cflag = true; break;
- case 'b': bflag = true; break;
- default: fprintf (stderr, USAGE, myname); exit (1);
- }
- if (! (rflag || sflag || lflag || wflag || cflag))
- rflag = sflag = lflag = wflag = cflag = true;
- argc -= optind;
- argv += optind;
- if (argc < 1) /* use default arguments */
- argv = defaultargs;
- /*
- *
- * Be careful to keep stdin open for filenames of "-".
- * Argc is not altered; if < 1, it means no file args were given.
- */
- for ( ; *argv != sbNull; argv++) /* each argument */
- {
- if (strcmp (*argv, "-") == 0) /* read stdin */
- filep = stdin;
- else if ((filep = fopen (*argv, "r")) == fileNull)
- {
- PrintErr ("can't open", *argv);
- retval = 1;
- continue;
- }
- DoFile (filep, (argc < 1), *argv);
- if (filep != stdin) /* keep stdin open for reuse */
- fclose (filep);
- }
- exit (retval);
- #endif /* not TABLE */
- } /* main */
- #ifdef TABLE
- /************************************************************************
- * P R I N T T A B L E
- *
- * Print table needed for CRC computation, as a C array declaration.
- * The output can then be included in this program. It would be easy
- * to just build the table in memory each time the program is run, but
- * what the heck -- this way is a little more complicated, but already
- * done, and shaves off a bit of startup time.
- *
- * Assumes unsigned and short are at least 16 bits.
- */
- proc PrintTable()
- {
- register unsigned index = 0; /* place in table */
- register unsigned entry; /* table entry */
- register int count; /* for changing entry */
- printf ("unsigned short CRCtable [256] = {");
- while (true)
- {
- if ((index % 8) == 0) /* time for new line */
- putchar (chNewline);
- for (entry = index, count = 8; (count--) > 0; )
- {
- if (entry & 1) /* low bit set */
- entry = (entry >> 1) ^ 0xA001;
- else
- entry >>= 1;
- }
- printf ("\t0x%4.4x", entry);
- if (++index == 256)
- break;
- putchar (',');
- }
- printf ("\n};\n");
- } /* PrintTable */
- #else /* not TABLE */
- /************************************************************************
- *
- * CRCtable[], output from PrintTable(), is used for CRC calculation.
- * Structures are used for circular mod-2**16 byte sums. They assume
- * that two shorts == one long!
- */
- unsigned short CRCtable [256] = {
- 0x0000, 0xc0c1, 0xc181, 0x0140, 0xc301, 0x03c0, 0x0280, 0xc241,
- 0xc601, 0x06c0, 0x0780, 0xc741, 0x0500, 0xc5c1, 0xc481, 0x0440,
- 0xcc01, 0x0cc0, 0x0d80, 0xcd41, 0x0f00, 0xcfc1, 0xce81, 0x0e40,
- 0x0a00, 0xcac1, 0xcb81, 0x0b40, 0xc901, 0x09c0, 0x0880, 0xc841,
- 0xd801, 0x18c0, 0x1980, 0xd941, 0x1b00, 0xdbc1, 0xda81, 0x1a40,
- 0x1e00, 0xdec1, 0xdf81, 0x1f40, 0xdd01, 0x1dc0, 0x1c80, 0xdc41,
- 0x1400, 0xd4c1, 0xd581, 0x1540, 0xd701, 0x17c0, 0x1680, 0xd641,
- 0xd201, 0x12c0, 0x1380, 0xd341, 0x1100, 0xd1c1, 0xd081, 0x1040,
- 0xf001, 0x30c0, 0x3180, 0xf141, 0x3300, 0xf3c1, 0xf281, 0x3240,
- 0x3600, 0xf6c1, 0xf781, 0x3740, 0xf501, 0x35c0, 0x3480, 0xf441,
- 0x3c00, 0xfcc1, 0xfd81, 0x3d40, 0xff01, 0x3fc0, 0x3e80, 0xfe41,
- 0xfa01, 0x3ac0, 0x3b80, 0xfb41, 0x3900, 0xf9c1, 0xf881, 0x3840,
- 0x2800, 0xe8c1, 0xe981, 0x2940, 0xeb01, 0x2bc0, 0x2a80, 0xea41,
- 0xee01, 0x2ec0, 0x2f80, 0xef41, 0x2d00, 0xedc1, 0xec81, 0x2c40,
- 0xe401, 0x24c0, 0x2580, 0xe541, 0x2700, 0xe7c1, 0xe681, 0x2640,
- 0x2200, 0xe2c1, 0xe381, 0x2340, 0xe101, 0x21c0, 0x2080, 0xe041,
- 0xa001, 0x60c0, 0x6180, 0xa141, 0x6300, 0xa3c1, 0xa281, 0x6240,
- 0x6600, 0xa6c1, 0xa781, 0x6740, 0xa501, 0x65c0, 0x6480, 0xa441,
- 0x6c00, 0xacc1, 0xad81, 0x6d40, 0xaf01, 0x6fc0, 0x6e80, 0xae41,
- 0xaa01, 0x6ac0, 0x6b80, 0xab41, 0x6900, 0xa9c1, 0xa881, 0x6840,
- 0x7800, 0xb8c1, 0xb981, 0x7940, 0xbb01, 0x7bc0, 0x7a80, 0xba41,
- 0xbe01, 0x7ec0, 0x7f80, 0xbf41, 0x7d00, 0xbdc1, 0xbc81, 0x7c40,
- 0xb401, 0x74c0, 0x7580, 0xb541, 0x7700, 0xb7c1, 0xb681, 0x7640,
- 0x7200, 0xb2c1, 0xb381, 0x7340, 0xb101, 0x71c0, 0x7080, 0xb041,
- 0x5000, 0x90c1, 0x9181, 0x5140, 0x9301, 0x53c0, 0x5280, 0x9241,
- 0x9601, 0x56c0, 0x5780, 0x9741, 0x5500, 0x95c1, 0x9481, 0x5440,
- 0x9c01, 0x5cc0, 0x5d80, 0x9d41, 0x5f00, 0x9fc1, 0x9e81, 0x5e40,
- 0x5a00, 0x9ac1, 0x9b81, 0x5b40, 0x9901, 0x59c0, 0x5880, 0x9841,
- 0x8801, 0x48c0, 0x4980, 0x8941, 0x4b00, 0x8bc1, 0x8a81, 0x4a40,
- 0x4e00, 0x8ec1, 0x8f81, 0x4f40, 0x8d01, 0x4dc0, 0x4c80, 0x8c41,
- 0x4400, 0x84c1, 0x8581, 0x4540, 0x8701, 0x47c0, 0x4680, 0x8641,
- 0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040
- };
- struct shorts { /* as used, order is irrelevant */
- unsigned short high, low;
- };
- typedef union { /* to map one long to two shorts */
- unsigned long sum;
- struct shorts shorts;
- } convert;
- /************************************************************************
- * D O F I L E
- *
- * Read one file, calculate values, and print an output line.
- * Sets retval if necessary. ch is int, not char, to be sure
- * it can distinguish 0xff and EOF. As a result, all normal
- * values of ch should be positive, 0..255.
- */
- proc DoFile (filep, nullname, filename)
- register FILE *filep; /* file to read */
- int nullname; /* ignore filename? */
- char *filename; /* name of filep */
- {
- register int ch; /* current char */
- register unsigned crc = 0; /* crc sum */
- register long sum = 0L; /* byte sum */
- register long lines = 0L; /* line count */
- register long words = 0L; /* word count */
- register long chars = 0L; /* char count */
- convert conv; /* for 16-sum */
- register int notword = true; /* not in word? */
- /*
- *
- * CRCtable[] values have 16 bits, so the masking is necessary before each
- * repeated index into the array. sum is allowed to increment to more than
- * 16 bits; overflow is handled later. Line and char counts are accumulated
- * regardless whether they are needed; it's faster not to check. Words are
- * counted in the same strange way as wc(1), ignoring special chars.
- */
- while ((ch = getc (filep)) != EOF)
- {
- if (rflag)
- crc = (crc >> 8) ^ (CRCtable [(crc ^ ch) & 0xff]);
- if (sflag)
- sum += ch;
- if (ch == chNewline)
- lines++;
- chars++;
- if (wflag)
- {
- if ((' ' < ch) && (ch < '\177')) /* word char */
- {
- if (notword) /* start of word */
- {
- words++;
- notword = false;
- }
- }
- else if ((ch == ' ') || (ch == '\t') || (ch == chNewline))
- {
- notword = true;
- }
- }
- } /* while */
- /*
- */
- if (ferror (filep))
- {
- PrintErr ("read failed from", nullname ? "stdin" : filename);
- retval = 1;
- }
- else
- {
- conv.sum = sum; /* for adding back overflow */
- if (rflag) printf (" %4.4x", crc);
- if (sflag) printf (" %5u", conv.shorts.high + conv.shorts.low);
- if (lflag) printf (" %6ld", lines);
- if (wflag) printf (" %6ld", words);
- if (cflag) printf (" %6ld", chars);
- if (! nullname) /* have name to print */
- {
- if (bflag) /* basename only */
- {
- char *cp = filename;
- while (*cp != chNull) /* till end of name */
- if (*cp++ == '/') /* directory level */
- filename = cp; /* set past '/' */
- }
- printf (" %s", filename);
- }
- putchar (chNewline);
- } /* else */
- } /* DoFile */
- #endif /* not TABLE */
- @EOF
