home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Hot Shareware 35
/
hot35.iso
/
ficheros
/
9TXT
/
ZE32V270.ZIP
/
GREP.C_
/
GREP.C
Wrap
C/C++ Source or Header
|
1998-04-02
|
21KB
|
761 lines
/*
*
* The information in this document is subject to change
* without notice and should not be construed as a commitment
* by Digital Equipment Corporation or by DECUS.
*
* Neither Digital Equipment Corporation, DECUS, nor the authors
* assume any responsibility for the use or reliability of this
* document or the described software.
*
* Copyright (C) 1980, DECUS
*
* General permission to copy or modify, but not for profit, is
* hereby granted, provided that the above copyright notice is
* included and reference made to the fact that reproduction
* privileges were granted by DECUS.
*
*/
#include <dir.h>
#include <dos.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
/*
* grep.
*
* Runs on the Decus compiler or on vms.
* Converted for BDS compiler (under CP/M-80), 20-Jan-83, by Chris Kern.
*
* Converted to IBM PC with CI-C86 C Compiler June 1983 by David N. Smith
*
* On vms, define as:
*
* grep :== "$disk:[account]grep" (native)
* grep :== "$disk:[account]grep grep" (Decus)
*
* See below for more information.
*
*/
/*****************************************************************/
/* Changed by JAJ 5/96 to write to STDOUT and not to STDERR so */
/* that the output is easier to capture to file. Also added a */
/* basic form of wildcard support was added */
/*****************************************************************/
/*****************************************************************/
/* Changed by BKB 5/97 to support wildcards along with a path */
/* Now prints the path along with the filename */
/* Now is case sensitive by default, ignore case with -i option */
/* All non ANSI-C code is rewritten */
/* Don't run if no files is given (don't use stdin) */
/*****************************************************************/
/*****************************************************************/
/* Changed by BKB 11/97 to support searching subdirectories */
/*****************************************************************/
/*****************************************************************/
/* Changed by BKB 03/98 */
/* Fixed a bug when searching for a string in uppercace and */
/* the ignore_case flag is set */
/* Now don't print the name of the program to stdout, in order */
/* to prevent Zeus from opening the exe file, when using */
/* StandardOutputPrevious */
/*****************************************************************/
static void usage(char *s);
static void help(char **hp);
static void cant(char *s);
static void compile(char *source);
static void badpat(char *message, char *source, char *stop);
static int match(void);
static void grep(char *fn);
static char *split_file_name(char *file_name);
static int find_files(char *pattern, char *path);
static void error(char *s);
static void file(char *s);
static void store(char op);
static char *cclass(char *source, char *src);
static char *pmatch(char *line, char *pattern);
static char *grep_strtok(char **str, char *match, char *found);
static char grep_tolower(char ch);
static char *documentation[] =
{
"grep searches files for a given pattern. Execute by",
" grep [flags] regular_expression file_list",
"",
"Flags are single characters preceeded by '-':",
" -c Only a count of matching lines is printed",
" -f Print file name for matching lines switch, see below",
" -n Each line is preceeded by its line number",
" -v Only print non-matching lines",
" -i Ignore case",
" -d Search subdirectories",
"",
/* -- JAJ "The file_list is a list of files (wildcards are acceptable on RSX modes).", */
"The file_list is a list of files (wildcards are acceptable modes).",
"Eg. [file1.c file2.c] or [*.c *.h] or [\prog\*.c;*.h]",
0
};
char *patdoc[] =
{
"The regular_expression defines the pattern to search for."
"Blank lines never match."
"The expression should be quoted to prevent file-name translation.",
"x An ordinary character (not mentioned below) matches that character.",
"'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
"'^' A circumflex at the beginning of an expression matches the",
" beginning of a line.",
"'$' A dollar-sign at the end of an expression matches the end of a line.",
"'.' A period matches any character except \"new-line\".",
"':a' A colon matches a class of characters described by the following",
"':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
"':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
"': ' other control characters, such as new-line.",
"'*' An expression followed by an asterisk matches zero or more",
" occurrances of that expression: \"fo*\" matches \"f\", \"fo\"",
" \"foo\", etc.",
"'+' An expression followed by a plus sign matches one or more",
" occurrances of that expression: \"fo+\" matches \"fo\", etc.",
"'-' An expression followed by a minus sign optionally matches",
" the expression.",
"'[]' A string enclosed in square brackets matches any character in",
" that string, but no others. If the first character in the",
" string is a circumflex, the expression matches any character",
" except \"new-line\" and the characters in the string. For",
" example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
" matches \"abc\" but not \"axb\". A range of characters may be",
" specified by two characters separated by \"-\". Note that,",
" [a-z] matches alphabetics, while [z-a] never matches.",
"The concatenation of regular expressions is a regular expression.",
0
};
#define LMAX 512
#define PMAX 256
#define CHAR 1
#define BOL 2
#define EOL 3
#define ANY 4
#define CLASS 5
#define NCLASS 6
#define STAR 7
#define PLUS 8
#define MINUS 9
#define ALPHA 10
#define DIGIT 11
#define NALPHA 12
#define PUNCT 13
#define RANGE 14
#define ENDPAT 15
static int cflag;
static int fflag;
static int nflag;
static int vflag;
static int nfile;
static int ignore_case_flag = 0; /* BKB */
static int search_subdirectories = 0;
static int debug = 0; /* Set for debug code */
static char *pp;
static char lbuf[LMAX];
static char pbuf[PMAX];
/*******************************************************/
int main(int argc, char *argv[])
{
char *p, *t;
char *path;
char found;
int c, i;
int gotpattern;
if(argc <= 1)
usage("No arguments");
if(argc == 2 && argv[1][0] == '?' && argv[1][1] == 0)
{
help(documentation);
help(patdoc);
return 1;
}
printf("\nGrep ");
for(c = 1; c < argc; c++)
printf("%s ", argv[c]);
printf("\n");
nfile = argc - 1;
gotpattern = 0;
c = 0;
for(i = 1; i < argc; ++i)
{
p = argv[i];
if(*p == '-')
{
++p;
while((c = *p++) != 0)
{
switch(c)
{
case '?':
help(documentation);
break;
case 'c':
++cflag;
break;
case 't':
++debug;
break;
case 'f':
++fflag;
break;
case 'n':
++nflag;
break;
case 'v':
++vflag;
break;
case 'i':
ignore_case_flag++;
break;
case 'd':
search_subdirectories++;
break;
default:
usage("Unknown flag");
break;
}
}
argv[i] = 0;
--nfile;
}
else if(!gotpattern)
{
compile(p);
argv[i] = 0;
++gotpattern;
--nfile;
}
}
if(!gotpattern)
usage("No pattern");
if(nfile == 0)
usage("No files"); /* Let grep terminate if no filename is set (don't use stdin) BKB */
else
{
i = 0;
for(c = 1; c < argc; c++)
{
if(argv[c] != NULL)
{
t = p = argv[c];
p = grep_strtok(&t, ";", &found);
path = split_file_name(p);
while(p)
{
i += find_files(p, path);
p = grep_strtok(&t, ";", &found);
}
}
}
if(i == 0)
usage("No files found");
}
return 0;
}
static char *split_file_name(char *file_name)
{
static char path[256]; /* Path must be static since it is used by main BKB */
char *s;
int a;
for(a = 0; file_name[a] != '\0'; a++)
path[a] = toupper(file_name[a]);
path[a] = '\0';
/* Remove all char's from the end until the first '\\' or ':' is met, BKB */
for(a = strlen(path); path[a] != '\\' && path[a] != ':' && a >= 0; a--)
path[a] = '\0';
path[a] = '\0';
/* Put the search pattern into file_name */
for(s = &file_name[strlen(file_name) - 1]; *s != '\\' && *s != ':' && *s != '\0'; s--);
if(*s != '\0')
strcpy(file_name, s);
else
{ /* If no path is specified get the current working directory */
if(!getcwd(path, 256))
error("Unable to get dir");
if(strlen(path) == 3 && path[1] == ':')
path[2] = '\0';
}
return path;
}
/*******************************************************/
static void file(char *s)
{
/*-- JAJ better file handling for Zeus */
/* printf("File %s:\n", s); */
printf("%-13s |", s);
}
/*******************************************************/
static void cant(char *s)
{
/*-- JAJ pipe to stdout so Zeus can see it */
/* fprintf(stderr, "%s: cannot open\n", s); */
fprintf(stdout, "%s: cannot open\n", s);
}
/*******************************************************/
static void help(char **hp) /* Give good help */
{
register char **dp;
for(dp = hp; *dp; dp++)
printf("%s\n", *dp);
}
/*******************************************************/
static void usage(char *s)
{
/*-- JAJ pipe to stdout so Zeus can see it
// fprintf(stderr, "?GREP-E-%s\n", s);
// fprintf(stderr,
// "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n"); */
fprintf(stdout, "?GREP-E-%s\n", s);
fprintf(stdout, "Usage: grep [-cfnvid] pattern [file ...]. grep ? for help\n");
exit(1);
}
/*******************************************************/
static void compile(char *source) /* Pattern to compile */
{ /* Compile the pattern into global pbuf[] */
register char *s; /* Source string pointer */
register char *lp = NULL; /* Last pattern pointer */
register int c; /* Current character */
int o; /* Temp */
char *spp; /* Save beginning of pattern */
s = source;
if(debug)
printf("Pattern = \"%s\"\n", s);
pp = pbuf;
while((c = *s++) != 0)
{ /* STAR, PLUS and MINUS are special. */
if(c == '*' || c == '+' || c == '-')
{
if(pp == pbuf || (o = pp[-1]) == BOL || o == EOL ||
o == STAR || o == PLUS || o == MINUS)
badpat("Illegal occurrance op.", source, s);
store(ENDPAT);
store(ENDPAT);
spp = pp; /* Save pattern end */
while(--pp > lp) /* Move pattern down */
*pp = pp[-1]; /* one byte */
*pp = (c == '*') ? STAR : (c == '-') ? MINUS : PLUS;
pp = spp; /* Restore pattern end */
continue;
}
/* All the rest. */
lp = pp; /* Remember start */
switch(c)
{
case '^':
store(BOL);
break;
case '$':
store(EOL);
break;
case '.':
store(ANY);
break;
case '[':
s = cclass(source, s);
break;
case ':':
if(*s)
{
c = *s++;
switch(c)
{
case 'a':
case 'A':
store(ALPHA);
break;
case 'd':
case 'D':
store(DIGIT);
break;
case 'n':
case 'N':
store(NALPHA);
break;
case ' ':
store(PUNCT);
break;
default:
badpat("Unknown : type", source, s);
break;
}
break;
}
else
badpat("No : type", source, s);
case '\\':
if(*s)
c = *s++;
default:
store(CHAR);
store(c);
break;
}
}
store(ENDPAT);
store(0); /* Terminate string */
if(debug)
{
for(lp = pbuf; lp < pp;)
{
if((c = (*lp++ & 0377)) < ' ')
printf("\\%o ", c);
else
printf("%c ", c);
}
printf("\n");
}
}
/*******************************************************/
static char *cclass(char *source, char *src)
/* Compile a class (within []) */
{
register char *s; /* Source pointer */
register char *cp; /* Pattern start */
register int c; /* Current character */
int o; /* Temp */
s = src;
o = CLASS;
if(*s == '^')
{
++s;
o = NCLASS;
}
store(o);
cp = pp;
store(0); /* Byte count */
while((c = *s++) != 0 && c != ']')
{
if(c == '\\') /* Store quoted char */
{
if((c = *s++) == '\0') /* Gotta get something */
badpat("Class terminates badly", source, s);
else
store(c);
}
else if(c == '-' && (pp - cp) > 1 && *s != ']' && *s != '\0')
{
c = pp[-1]; /* Range start */
pp[-1] = RANGE; /* Range signal */
store(c); /* Re-store start */
c = *s++; /* Get end char and*/
store(c); /* Store it */
}
else
{
store(c); /* Store normal char */
}
}
if(c != ']')
badpat("Unterminated class", source, s);
if((c = (int )(pp - cp)) >= 256)
badpat("Class too large", source, s);
if(c == 0)
badpat("Empty class", source, s);
*cp = c;
return s;
}
/*******************************************************/
static void store(char op)
{
if(pp >= &pbuf[PMAX])
error("Pattern too complex\n");
*pp++ = op;
}
/*******************************************************/
static void badpat(char *message, char *source, char *stop)
{
/*-- JAJ pipe to stdout so Zeus can see it
// fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
// fprintf(stderr, "-GREP-E-Stopped at byte %d, '%c'\n",
// stop-source, stop[-1]); */
fprintf(stdout, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
fprintf(stdout, "-GREP-E-Stopped at byte %d, '%c'\n", stop-source, stop[-1]);
error("?GREP-E-Bad pattern\n");
}
static void grep(char *fn)
{ /* Scan the file for the pattern in pbuf[] */
register int lno = 0, count = 0, m;
FILE *fp = fopen(fn, "r");
if(!fp)
cant(fn);
while (fgets(lbuf, LMAX, fp))
{
++lno;
m = match();
if((m && !vflag) || (!m && vflag))
{
++count;
if(!cflag)
{
if(fflag && fn)
{
file(fn);
/*-- JAJ better file handling for Zeus */
/* fn = 0; */
}
if(nflag)
printf(" %5d | ", lno);
if(lbuf[strlen(lbuf) - 1] == '\n') /* Remove '\n' from line before it is printed */
lbuf[strlen(lbuf) - 1] = '\0';
printf("%s\n", lbuf);
}
}
}
if(cflag)
{
if(fflag && fn)
file(fn);
printf(" Count: %d\n", count);
}
fclose(fp);
}
/*******************************************************/
static int match(void)
/* Match the current line (in lbuf[]), return 1 if it does. */
{
register char *l; /* Line pointer */
for(l = lbuf; *l; l++)
{
if(pmatch(l, pbuf))
return(1);
}
return(0);
}
/*******************************************************/
static char *pmatch(char *line, char *pattern)
{
register char *l; /* Current line pointer */
register char *p; /* Current pattern pointer */
register char c; /* Current character */
char *e; /* End for STAR and PLUS match */
int op; /* Pattern operation */
int n; /* Class counter */
char *are; /* Start of STAR match */
l = line;
if(debug > 1)
printf("pmatch(\"%s\")\n", line);
p = pattern;
while ((op = *p++) != ENDPAT)
{
if(debug > 1)
printf("byte[%d] = 0%o, '%c', op = 0%o\n", l-line, *l, *l, op);
switch(op)
{
case CHAR:
if(grep_tolower(*l) != grep_tolower(*p++))
return(0);
l++;
break;
case BOL:
if(l != lbuf)
return(0);
break;
case EOL:
if(*l != '\0')
return(0);
break;
case ANY:
if(*l++ == '\0')
return(0);
break;
case DIGIT:
if((c = *l++) < '0' || (c > '9'))
return(0);
break;
case ALPHA:
c = grep_tolower(*l);
l++;
if(c < 'a' || c > 'z')
return(0);
break;
case NALPHA:
c = grep_tolower(*l);
l++;
if(c >= 'a' && c <= 'z')
break;
else if(c < '0' || c > '9')
return(0);
break;
case PUNCT:
c = *l++;
if(c == 0 || c > ' ')
return(0);
break;
case CLASS:
case NCLASS:
c = grep_tolower(*l);
l++;
n = *p++ & 0377;
do
{
if(*p == RANGE)
{
p += 3;
n -= 2;
if(c >= p[-2] && c <= p[-1])
break;
}
else if(c == *p++)
break;
}
while (--n > 1);
if((op == CLASS) == (n <= 1))
return(0);
if(op == CLASS)
p += n - 2;
break;
case MINUS:
e = pmatch(l, p); /* Look for a match */
while (*p++ != ENDPAT); /* Skip over pattern */
if(e) /* Got a match? */
l = e; /* Yes, update string */
break; /* Always succeeds */
case PLUS: /* One or more ... */
if((l = pmatch(l, p)) == 0)
return(0); /* Gotta have a match */
case STAR: /* Zero or more ... */
are = l; /* Remember line start */
while(*l != 0 && (e = pmatch(l, p)) != 0)
l = e; /* Get longest match */
while (*p++ != ENDPAT); /* Skip over pattern */
while (l >= are)
{ /* Try to match rest */
if((e = pmatch(l, p)) != 0)
return(e);
--l; /* Nope, try earlier */
}
return(0); /* Nothing else worked */
default:
printf("Bad op code %d\n", op);
error("Cannot happen -- match\n");
}
}
return(l);
}
/*******************************************************/
static void error(char *s)
{
/*-- JAJ pipe to stdout so Zeus can see it
// fprintf(stderr, "%s", s); */
fprintf(stdout, "%s", s);
exit(1);
}
static int find_files(char *pattern, char *path)
{
struct ffblk ffblk;
int done;
char *str;
char *t_path;
int count = 0;
str = malloc(256);
t_path = malloc(256);
if(!str || !t_path)
error("Out of mem");
if(search_subdirectories)
{ /* First scan all subdirectories when the -d option is set */
strcpy(str, path);
strcat(str, "\\*.*");
done = findfirst(str, &ffblk, FA_DIREC);
while(!done)
{
if(ffblk.ff_attrib & FA_DIREC)
{
if(strcmp(ffblk.ff_name, ".") && strcmp(ffblk.ff_name, ".."))
{
strcpy(t_path, path);
strcat(t_path, "\\");
strcat(t_path, ffblk.ff_name);
count += find_files(pattern, t_path);
}
}
done = findnext(&ffblk);
}
}
strcpy(str, path);
strcat(str, "\\");
strcat(str, pattern);
done = findfirst(str, &ffblk, 0);
while(!done)
{
strcpy(t_path, path);
strcat(t_path, "\\");
strcat(t_path, ffblk.ff_name);
grep(t_path);
count++;
done = findnext(&ffblk);
}
free(str);
free(t_path);
return count;
}
static char *grep_strtok(char **str, char *match, char *found)
{
char *s = *str, *s2, *s3 = *str;
if(*str == NULL)
return NULL;
while((s2 = strchr(match, *s)) == NULL)
s++;
*found = *s2;
if(*s == '\0')
*str = NULL;
else
{
*s = '\0';
*str = &s[1];
}
return s3;
}
static char grep_tolower(char ch)
{
return ignore_case_flag ? tolower(ch) : ch;
}