home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Fred Fish Collection 1.5
/
ffcollection-1-5-1992-11.iso
/
ff_progs
/
txttools
/
bgrep.lzh
/
BGREP
/
BGREP.C
< prev
next >
Wrap
C/C++ Source or Header
|
1991-10-31
|
9KB
|
457 lines
/* bgrep --- grep using Boyer-Moore pattern matcher */
/*
* All the wrapper code (argument and file handling, printing, etc.) by
* Arnold Robbins, gatech!arnold
*
* Boyer-Moore pattern matching, coded by Roy Mongiovi, gatech!gitpyr!roy
* and edited some by Arnold Robbins.
*
* No warranty of suitability for any purpose, either expressed
* or implied, is made.
*/
#include <stdio.h>
#include <ctype.h>
#define TRUE 1
#define FALSE 0
#define MAXPATS 120 /* (arbitrary maximum number of patterns */
#define MAXLINE (256 + 1)
/*
* command line options
*/
int Allbut = FALSE; /* print lines that don't match pattern */
int Exact = FALSE; /* only print lines that match exactly */
int Countlines = FALSE; /* only print a count of matching lines */
int Listnames = FALSE; /* only list file names that match */
int Numberlines = FALSE; /* print relative line number */
int Silent = FALSE; /* don't print anything, just exit */
int Monocase = FALSE; /* ignore case distinctions */
/*
* variables
*/
long Curline = 0; /* current file input line */
long Lines_matched = 0; /* how many lines matched the pattern */
int Lotsafiles = FALSE; /* are there more than one file? */
int Pat_length[MAXPATS]; /* length of pattern */
int Line_length = 0; /* length of line */
int Couldnt_open_files = FALSE; /* one or more files could not be opened */
int Exit_val = 0; /* return code status */
int Curpat = 0; /* current pattern comparing against */
int Numpats = 0; /* total number of patterns */
char Inbuf[MAXLINE]; /* input buffer */
char Pattern[MAXPATS][MAXLINE]; /* pattern to be matched; init = NULs */
char *Program = NULL; /* program name */
int Argc; /* make argc and argv global */
char **Argv;
extern char *basename(); /* leaf file of a pathname */
main (argc, argv, envp)
int argc;
char **argv, **envp;
{
register int i, j;
char *index();
Program = basename (argv[0]);
Argc = argc;
Argv = argv;
parse_args (); /* deal with command line */
if (Pattern[0][0] == '\0') /* not from -f or -e */
{
if (Argv[0] == NULL) /* no string given */
usage ();
setpats (Argv[0]);
Argc--;
Argv++;
}
for (Curpat = 0; Curpat <= Numpats; Curpat++)
{
if (Monocase)
mapdown (Pattern[Curpat]);
Pat_length[Curpat] = strlen (Pattern[Curpat]);
initialize (); /* set up necessary tables */
}
Lotsafiles = (Argc > 1); /* more than one file left */
if (Argc == 0) /* search stdin */
process ("-");
else
for (i = 0; Argv[i] != NULL; i++)
process (Argv[i]);
if (! Silent && Countlines)
printf ("%ld\n", Lines_matched);
if (Lines_matched == 0)
exit (1);
else if (Couldnt_open_files)
exit (2);
else
exit (0);
}
/* process --- start doing the work on each file */
process (infile)
register char *infile;
{
FILE *fp;
int c;
int success;
long prev_lines_matched = Lines_matched; /* save count */
Curline = 0; /* reset for each file */
if (infile[0] == '-' && infile[1] == '\0')
fp = stdin;
else if ((fp = fopen (infile, "r")) == NULL)
{
Couldnt_open_files = TRUE;
perror (infile);
return;
}
while (fgets (Inbuf, sizeof Inbuf, fp) != NULL)
{
Curline++;
if (Monocase)
mapdown (Inbuf);
Line_length = strlen (Inbuf);
/* first, throw away rest of a truncated input line */
if (Inbuf[Line_length - 1] != '\n')
while ((c = getc (fp)) != '\n')
;
else
Inbuf[--Line_length] = '\0';
/* newline is there, nuke it */
for (Curpat = 0; Curpat <= Numpats; Curpat++)
{
if (success = match ())
Lines_matched++;
/* do any necessary output */
if (! Silent && ! Countlines && ! Listnames &&
((success != FALSE) ^ (Allbut != FALSE)))
/* either success, or Allbut, but not both,
and not neither */
{
if (Lotsafiles)
printf ("%s: ", infile);
if (Numberlines)
printf ("%ld: ", Curline);
printf ("%s\n", Inbuf);
}
}
}
fclose (fp);
if (! Silent && Listnames && prev_lines_matched < Lines_matched)
printf ("%s\n", infile);
}
/* parse_args --- check out command line arguments */
parse_args ()
{
register int i,j;
if (Argc == 1)
usage ();
for (Argc--, Argv++; Argv[0] != NULL && Argv[0][0] == '-'; Argc--, Argv++)
{
int cheat = FALSE;
for (j = 1; Argv[0][j] != '\0'; j++)
{
switch (Argv[0][j]) {
case 'c':
Countlines = TRUE;
break;
case 'e':
strcpy (Pattern[0], Argv[1]);
Pattern[0][sizeof Pattern[0] - 1] = '\0';
cheat = TRUE;
continue;
case 'f':
patfromfile (Argv[1]);
cheat = TRUE;
continue;
case 'i':
case 'y':
Monocase = TRUE;
break;
case 'l':
Listnames = TRUE;
break;
case 'n':
Numberlines = TRUE;
break;
case 's':
Silent = TRUE;
break;
case 'v':
Allbut = TRUE;
break;
case 'x':
Exact = TRUE;
break;
default:
usage ();
break;
}
}
if (cheat)
{
cheat = FALSE;
Argc--;
Argv++;
/* boy is this stuff a kludge! */
}
}
/* check for argument conflicts */
if (
(Silent &&
(Allbut || Exact || Countlines || Listnames ||
Numberlines))
||
(Allbut && Exact)
||
(Countlines && Listnames)
)
{
fprintf (stderr, "%s: argument conflict -- see the man page\n",
Program);
usage (); /* will exit */
}
}
/* mapdown --- remove case distinctions in a string */
mapdown (str)
register char *str;
{
register int i;
for (i = 0; str[i] != '\0'; i++)
if (isupper (str[i]))
str[i] = tolower (str[i]);
}
/* return basename part of a pathname, if '/'s are present */
char *basename (str)
register char *str;
{
register int i = 0;
register int j = 0;
for (; str[i] != '\0'; i++)
if (str[i] == '/')
j = i;
if (j != 0)
return (& str[++j]);
else
return (str);
}
/* usage --- print usage message and die */
usage ()
{
fprintf (stderr, "usage: %s [-vxclnisef] [string] [files]\n",
Program);
exit (2);
}
/* index --- do index by hand so it'll work on any Unix */
char *index (str, c)
char *str, c;
{
for (; *str; str++)
if (*str == c)
return (str);
return (NULL);
}
/* patfromfile --- retrieve the pattern from the named file */
patfromfile (infile)
char *infile;
{
register int i, j;
register FILE *fp;
register int c;
if ((fp = fopen (infile, "r")) == NULL ||
(c = getc (fp)) == EOF)
{
perror (infile); /* be like standard grep */
exit (2);
}
else
ungetc (c, fp);
for (i = 0; fgets (Pattern[i], MAXLINE, fp) != NULL; i++)
{
if (i >= 120)
{
fprintf (stderr, "%s: Only %d strings allowed\n",
Program, MAXPATS);
exit (2);
}
j = strlen (Pattern[i]);
if (Pattern[i][j - 1] == '\n')
Pattern[i][--j] = '\0';
}
Numpats = i - 1;
fclose (fp);
}
/* setpats --- set up the patterns from a string */
setpats (str)
register char *str;
{
register int i, j;
while (*str == '\n' || *str == '\r')
str++;
for (i = j = 0; *str; str++)
{
if (*str == '\n')
{
Pattern[i][j] = '\0';
j = 0;
i++;
}
else
Pattern[i][j++] = *str;
}
Numpats = i;
}
/* begin magic stuff for Boyer-Moore pattern matching */
int D1[MAXPATS][128];
int D2[MAXPATS][128];
int F[MAXPATS][128];
initialize ()
{
register int i, t;
for (i = 0; i < 128; i++)
D1[Curpat][i] = Pat_length[Curpat];
for (i = 0; i < Pat_length[Curpat]; i++)
D1[Curpat][Pattern[Curpat][i]] = Pat_length[Curpat] - i - 1;
for (i = 0; i < Pat_length[Curpat]; i++)
D2[Curpat][i] = (Pat_length[Curpat] << 1) - i - 1;
for (i = (t = Pat_length[Curpat]) - 1; i >= 0; i--, t--)
for (F[Curpat][i] = t; t < Pat_length[Curpat]
&& Pattern[Curpat][i] != Pattern[Curpat][t];
t = F[Curpat][t])
if (Pat_length[Curpat] - i - 1 < D2[Curpat][t])
D2[Curpat][t] = Pat_length[Curpat] - i - 1;
for (i = 0; i <= t; i++)
if (Pat_length[Curpat] + t - i < D2[Curpat][i])
D2[Curpat][i] = Pat_length[Curpat] + t - i;
}
/* match --- do Boyer-Moore pattern search on input buffer */
match ()
{
register int i, j;
if (Exact && Pat_length[Curpat] != Line_length)
return FALSE;
i = Pat_length[Curpat] - 1;
while (i < Line_length)
{
j = Pat_length[Curpat] - 1;
while (j >= 0)
{
if (Inbuf[i] == Pattern[Curpat][j])
i--, j--;
else
break;
}
if (j < 0)
{
/* found a match */
return TRUE;
/*
* note: if we were going to seach for further matches
* on the input line, we would do this:
*
* i += Pat_length[Curpat] + 1;
*
* which shifts right by Pat_length[Curpat] + 1 places
*/
}
else
{
j = (D1[Curpat][Inbuf[i]] >= D2[Curpat][j]) ?
D1[Curpat][Inbuf[i]]
:
D2[Curpat][j];
i += j;
/* shift right by j places */
}
}
return FALSE;
}
#ifdef AMIGA
perror (msg)
char *msg;
{
if (msg && *msg) {
fprintf (stderr, "%s: ", msg);
}
fprintf (stderr, "<unknown error>\n");
}
#endif