home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
unix
/
volume11
/
vitals
< prev
next >
Wrap
Text File
|
1987-09-20
|
14KB
|
480 lines
Subject: v11i066: Word counts, checksums, etc.
Newsgroups: comp.sources.unix
Sender: sources
Approved: rs@uunet.UU.NET
Submitted-by: Alan Silverstein <hpda!hpfcla!hpfcdt!ajs>
Posting-number: Volume 11, Issue 66
Archive-name: vitals
This program was developed by Hewlett-Packard and will be part of our
HP-UX product offering. We have found it useful. It is most useful
when most widely and commonly shared, so here it is.
We have not tested it except on HP-UX, which is mainly AT&T-compatible.
However, it should be pretty portable. Caveat emptor. Oh, and the
usual disclaimer that I'm not really an official HP spokesperson.
Alan Silverstein, Hewlett-Packard Systems Software Operation, Fort Collins,
Colorado; {ihnp4 | hplabs}!hpfcla!ajs; 303-229-3053; (lat-long on request :-)
# This is a shell archive. Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# This archive contains:
# Makefile vitals.1 vitals.c
#
# Error checking via wc(1) will be performed.
echo x - Makefile
cat >Makefile <<'@EOF'
all: vitals
install: all vitals.1
@echo install in appropriate directories.
vitals: vitals.c
$(CC) $(CFLAGS) -o vitals vitals.c
@EOF
echo Assuming Makefile is OK -- the moderator did not repack the kit...
echo x - vitals.1
cat >vitals.1 <<'@EOF'
.\" $Header: vitals.1,v 1.1 87/08/10 12:07:47 $
.TH VITALS 1 ""
.ad b
.SH NAME
vitals \- crc, sum, line, word, and character counts
.SH SYNOPSIS
.B vitals
[
.B \-rslwcb
] [
.I file ...
]
.SH DESCRIPTION
.I Vitals
checks data integrity by
computing vital statistics related to the data in the given
file(s) or standard input (by default).
The statistics include a four-digit hex CRC, a 16-bit byte sum (similar to
.IR sum (1)
without the block count)
and line, word, and character counts (similar to
.IR wc (1)).
One line is printed for each input file or standard input, consisting of
five statistics fields
followed by the file name, if known.
If a file is specified as "\fB\-\fR", the program reads standard
input at that
point and shows "\fB\-\fR" as the file name.
.SS Options
.TP 15
.B \-r
Compute only the CRC: a true 16-bit cyclic redundancy count
that can
detect, among other things, exchanged characters.
.TP
.B \-s
Compute only the byte sum: the modulo-2\(**\(**16, unsigned, circular sum
of all the bytes, each taken as an
.B int
(therefore normally in the range 0-255).
.TP
.B \-l
Compute only the line count: the number of newline characters in the input.
.TP
.B \-w
Compute only the word count: the number of
character sequences
delimited by blanks, tabs, or newlines.
As in
.IR wc (1),
non-printable characters are totally ignored when looking for words.
.TP
.B \-c
Compute only the character count.
.TP
.B \-b
Print only file basenames, not full path names.
.PP
These options can be used in any combination.
The default is
.BR \-rslwc ,
that is, all but
.BR \-b .
.PP
.I Vitals
is more efficient than separate commands tied together
with a shell script because the input data is read only once.
Unlike
.IR wc (1),
this program does not compute totals, since it is intended for fast data
validation, not size counting.
.SH DIAGNOSTICS
If any file open or read fails, the program writes a message to standard
error when the problem is encountered, and continues with the next file.
It ultimately returns non-zero if any error occurs, else zero.
.SH AUTHOR
.I Vitals
was developed by Hewlett-Packard.
.SH "SEE ALSO"
sum(1), wc(1).
@EOF
if test "`wc -lwc <vitals.1`" != ' 80 375 2154'
then
echo ERROR: wc results of vitals.1 are `wc -lwc <vitals.1` should be 80 375 2154
fi
chmod 444 vitals.1
echo x - vitals.c
cat >vitals.c <<'@EOF'
static char *HPUX_ID = "@(#)27.1 85/02/04";
/* HPUX_ID: @(#)vitals.c 27.1 85/02/04 */
/*
* Compute vital statistics of data: crc, sum, line, word, and character
* counts. See the manual entry for details.
*
* Compile with -DTABLE to produce an alternate program, which prints a
* CRC table for use in this program.
*
* The CRC code was lifted from a USENET posting:
*
* hpfcla:net.sources / hcrvax!sft / 8:17 pm Nov 15, 1984
*
* The following program is a command to calculate the CRC of files.
* It is useful for the same purposes as sum(1). It calculates the
* true CRC16 (unlike CP/M utilities that say they calculate CRCs
* but really just hash). Crc detects more errors than old sum(1);
* for example, it detects exchanges of characters. It is also
* (now) in the public domain.
*
* CRC16 polynomial: x**0 + x**2 + x**15 + x**16 (0xA001)
* (CCITT polynomial: x**0 + x**5 + x**12 + x**16 (0x8408))
* Initial condition: 0
*
* D. Hugh Redelmeier, 1983 April 15; latest change: 1984 April 2.
*/
#include <stdio.h>
char *USAGE = "usage: %s [-rslwcb] [files...]\n";
#define proc /* null; easy to grep for procs */
#define chNull ('\0')
#define chNewline ('\n')
#define sbNull ((char *) NULL)
#define fileNull ((FILE *) NULL)
#define false 0
#define true 1
char *myname; /* how command invoked */
int rflag = false; /* -r (crc) option */
int sflag = false; /* -s (sum) option */
int lflag = false; /* -l (lines) option */
int wflag = false; /* -w (words) option */
int cflag = false; /* -c (chars) option */
int bflag = false; /* -b (basenames) opt */
int retval = 0; /* return value */
char *defaultargs[] = {"-", sbNull}; /* read stdin by default */
#define PrintErr(part1,part2) \
fprintf (stderr, "%s: %s %s\n", myname, part1, part2);
/************************************************************************
* M A I N
*
* Initialize, check arguments, open files, and call another routine to
* do each file.
*/
proc main (argc, argv)
register int argc;
register char **argv;
{
extern int optind; /* for getopt() */
int optchar; /* from getopt() */
register FILE *filep; /* file to read */
#ifdef TABLE /* just print table */
PrintTable();
#else
/*
* CHECK ARGUMENTS:
*/
myname = *argv;
while ((optchar = getopt (argc, argv, "rslwcb")) != EOF)
switch (optchar)
{
case 'r': rflag = true; break;
case 's': sflag = true; break;
case 'l': lflag = true; break;
case 'w': wflag = true; break;
case 'c': cflag = true; break;
case 'b': bflag = true; break;
default: fprintf (stderr, USAGE, myname); exit (1);
}
if (! (rflag || sflag || lflag || wflag || cflag))
rflag = sflag = lflag = wflag = cflag = true;
argc -= optind;
argv += optind;
if (argc < 1) /* use default arguments */
argv = defaultargs;
/*
* OPEN AND DO EACH INPUT FILE:
*
* Be careful to keep stdin open for filenames of "-".
* Argc is not altered; if < 1, it means no file args were given.
*/
for ( ; *argv != sbNull; argv++) /* each argument */
{
if (strcmp (*argv, "-") == 0) /* read stdin */
filep = stdin;
else if ((filep = fopen (*argv, "r")) == fileNull)
{
PrintErr ("can't open", *argv);
retval = 1;
continue;
}
DoFile (filep, (argc < 1), *argv);
if (filep != stdin) /* keep stdin open for reuse */
fclose (filep);
}
exit (retval);
#endif /* not TABLE */
} /* main */
#ifdef TABLE
/************************************************************************
* P R I N T T A B L E
*
* Print table needed for CRC computation, as a C array declaration.
* The output can then be included in this program. It would be easy
* to just build the table in memory each time the program is run, but
* what the heck -- this way is a little more complicated, but already
* done, and shaves off a bit of startup time.
*
* Assumes unsigned and short are at least 16 bits.
*/
proc PrintTable()
{
register unsigned index = 0; /* place in table */
register unsigned entry; /* table entry */
register int count; /* for changing entry */
printf ("unsigned short CRCtable [256] = {");
while (true)
{
if ((index % 8) == 0) /* time for new line */
putchar (chNewline);
for (entry = index, count = 8; (count--) > 0; )
{
if (entry & 1) /* low bit set */
entry = (entry >> 1) ^ 0xA001;
else
entry >>= 1;
}
printf ("\t0x%4.4x", entry);
if (++index == 256)
break;
putchar (',');
}
printf ("\n};\n");
} /* PrintTable */
#else /* not TABLE */
/************************************************************************
* CRC TABLE AND BYTE SUM STRUCTS
*
* CRCtable[], output from PrintTable(), is used for CRC calculation.
* Structures are used for circular mod-2**16 byte sums. They assume
* that two shorts == one long!
*/
unsigned short CRCtable [256] = {
0x0000, 0xc0c1, 0xc181, 0x0140, 0xc301, 0x03c0, 0x0280, 0xc241,
0xc601, 0x06c0, 0x0780, 0xc741, 0x0500, 0xc5c1, 0xc481, 0x0440,
0xcc01, 0x0cc0, 0x0d80, 0xcd41, 0x0f00, 0xcfc1, 0xce81, 0x0e40,
0x0a00, 0xcac1, 0xcb81, 0x0b40, 0xc901, 0x09c0, 0x0880, 0xc841,
0xd801, 0x18c0, 0x1980, 0xd941, 0x1b00, 0xdbc1, 0xda81, 0x1a40,
0x1e00, 0xdec1, 0xdf81, 0x1f40, 0xdd01, 0x1dc0, 0x1c80, 0xdc41,
0x1400, 0xd4c1, 0xd581, 0x1540, 0xd701, 0x17c0, 0x1680, 0xd641,
0xd201, 0x12c0, 0x1380, 0xd341, 0x1100, 0xd1c1, 0xd081, 0x1040,
0xf001, 0x30c0, 0x3180, 0xf141, 0x3300, 0xf3c1, 0xf281, 0x3240,
0x3600, 0xf6c1, 0xf781, 0x3740, 0xf501, 0x35c0, 0x3480, 0xf441,
0x3c00, 0xfcc1, 0xfd81, 0x3d40, 0xff01, 0x3fc0, 0x3e80, 0xfe41,
0xfa01, 0x3ac0, 0x3b80, 0xfb41, 0x3900, 0xf9c1, 0xf881, 0x3840,
0x2800, 0xe8c1, 0xe981, 0x2940, 0xeb01, 0x2bc0, 0x2a80, 0xea41,
0xee01, 0x2ec0, 0x2f80, 0xef41, 0x2d00, 0xedc1, 0xec81, 0x2c40,
0xe401, 0x24c0, 0x2580, 0xe541, 0x2700, 0xe7c1, 0xe681, 0x2640,
0x2200, 0xe2c1, 0xe381, 0x2340, 0xe101, 0x21c0, 0x2080, 0xe041,
0xa001, 0x60c0, 0x6180, 0xa141, 0x6300, 0xa3c1, 0xa281, 0x6240,
0x6600, 0xa6c1, 0xa781, 0x6740, 0xa501, 0x65c0, 0x6480, 0xa441,
0x6c00, 0xacc1, 0xad81, 0x6d40, 0xaf01, 0x6fc0, 0x6e80, 0xae41,
0xaa01, 0x6ac0, 0x6b80, 0xab41, 0x6900, 0xa9c1, 0xa881, 0x6840,
0x7800, 0xb8c1, 0xb981, 0x7940, 0xbb01, 0x7bc0, 0x7a80, 0xba41,
0xbe01, 0x7ec0, 0x7f80, 0xbf41, 0x7d00, 0xbdc1, 0xbc81, 0x7c40,
0xb401, 0x74c0, 0x7580, 0xb541, 0x7700, 0xb7c1, 0xb681, 0x7640,
0x7200, 0xb2c1, 0xb381, 0x7340, 0xb101, 0x71c0, 0x7080, 0xb041,
0x5000, 0x90c1, 0x9181, 0x5140, 0x9301, 0x53c0, 0x5280, 0x9241,
0x9601, 0x56c0, 0x5780, 0x9741, 0x5500, 0x95c1, 0x9481, 0x5440,
0x9c01, 0x5cc0, 0x5d80, 0x9d41, 0x5f00, 0x9fc1, 0x9e81, 0x5e40,
0x5a00, 0x9ac1, 0x9b81, 0x5b40, 0x9901, 0x59c0, 0x5880, 0x9841,
0x8801, 0x48c0, 0x4980, 0x8941, 0x4b00, 0x8bc1, 0x8a81, 0x4a40,
0x4e00, 0x8ec1, 0x8f81, 0x4f40, 0x8d01, 0x4dc0, 0x4c80, 0x8c41,
0x4400, 0x84c1, 0x8581, 0x4540, 0x8701, 0x47c0, 0x4680, 0x8641,
0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040
};
struct shorts { /* as used, order is irrelevant */
unsigned short high, low;
};
typedef union { /* to map one long to two shorts */
unsigned long sum;
struct shorts shorts;
} convert;
/************************************************************************
* D O F I L E
*
* Read one file, calculate values, and print an output line.
* Sets retval if necessary. ch is int, not char, to be sure
* it can distinguish 0xff and EOF. As a result, all normal
* values of ch should be positive, 0..255.
*/
proc DoFile (filep, nullname, filename)
register FILE *filep; /* file to read */
int nullname; /* ignore filename? */
char *filename; /* name of filep */
{
register int ch; /* current char */
register unsigned crc = 0; /* crc sum */
register long sum = 0L; /* byte sum */
register long lines = 0L; /* line count */
register long words = 0L; /* word count */
register long chars = 0L; /* char count */
convert conv; /* for 16-sum */
register int notword = true; /* not in word? */
/*
* READ FILE AND COMPUTE SUMS:
*
* CRCtable[] values have 16 bits, so the masking is necessary before each
* repeated index into the array. sum is allowed to increment to more than
* 16 bits; overflow is handled later. Line and char counts are accumulated
* regardless whether they are needed; it's faster not to check. Words are
* counted in the same strange way as wc(1), ignoring special chars.
*/
while ((ch = getc (filep)) != EOF)
{
if (rflag)
crc = (crc >> 8) ^ (CRCtable [(crc ^ ch) & 0xff]);
if (sflag)
sum += ch;
if (ch == chNewline)
lines++;
chars++;
if (wflag)
{
if ((' ' < ch) && (ch < '\177')) /* word char */
{
if (notword) /* start of word */
{
words++;
notword = false;
}
}
else if ((ch == ' ') || (ch == '\t') || (ch == chNewline))
{
notword = true;
}
}
} /* while */
/*
* REPORT ERROR OR PRINT TOTALS:
*/
if (ferror (filep))
{
PrintErr ("read failed from", nullname ? "stdin" : filename);
retval = 1;
}
else
{
conv.sum = sum; /* for adding back overflow */
if (rflag) printf (" %4.4x", crc);
if (sflag) printf (" %5u", conv.shorts.high + conv.shorts.low);
if (lflag) printf (" %6ld", lines);
if (wflag) printf (" %6ld", words);
if (cflag) printf (" %6ld", chars);
if (! nullname) /* have name to print */
{
if (bflag) /* basename only */
{
char *cp = filename;
while (*cp != chNull) /* till end of name */
if (*cp++ == '/') /* directory level */
filename = cp; /* set past '/' */
}
printf (" %s", filename);
}
putchar (chNewline);
} /* else */
} /* DoFile */
#endif /* not TABLE */
@EOF
if test "`wc -lwc <vitals.c`" != ' 338 1577 9781'
then
echo ERROR: wc results of vitals.c are `wc -lwc <vitals.c` should be 338 1577 9781
fi
chmod 444 vitals.c
exit 0