home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Fred Fish Collection 1.5
/
ffcollection-1-5-1992-11.iso
/
ff_progs
/
txttools
/
bawk.lzh
/
BAWK
/
BAWK.C
< prev
next >
Wrap
C/C++ Source or Header
|
1991-08-16
|
13KB
|
678 lines
/*
* Bawk main program
*/
#define MAIN 1
#include <stdio.h>
#include "bawk.h"
static char *pattern_arg = NULL; /* Command line bawk program pattern */
static int ungetc_arg = 0;
static char eof_seen = 0;
static int max_field_count = 0;
/*
* Main program
*/
main( argc, argv )
register int argc;
register char **argv;
{
register char gotrules = 0, didfile = 0, getstdin = 0;
register char rule_file_flag = 0;
DBUG_ENTER("main");
/*
* Initialize global variables:
*/
Beginact = 0;
Endact = 0;
Rules = 0;
Rulep = 0;
Filename = 0;
Linecount = 0;
Saw_break = 0;
Stackptr = Stackbtm - 1;
Stacktop = Stackbtm + MAXSTACKSZ;
Nextvar = Vartab;
init_pop_array();
strcpy( Fieldsep, " \t" );
strcpy( Recordsep, "\n" );
/*
* Parse command line
*/
while ( --argc )
{
if ( **(++argv) == '-' )
{
/*
* Process dash options.
*/
switch ( tolower( argv[0][1] ) )
{
case '#':
DBUG_PUSH(&argv[0][2]);
continue;
case 'f':
if(!gotrules) {
rule_file_flag++;
argv++;
argc--;
} else
usage();
break;
case 0:
if(!gotrules)
rule_file_flag++;
getstdin++;
break;
default: usage();
}
}
if ( gotrules )
{
/*
* Already read rules file - assume this is
* is a text file for processing.
*/
if ( ++didfile == 1 && Beginact )
doaction( Beginact );
if ( getstdin )
{
getstdin--;
newfile( 0 );
}
else
newfile( *argv );
process();
}
else
{
if(rule_file_flag) {
if ( getstdin )
{
getstdin--;
newfile( 0 );
}
else
newfile( *argv );
} else
pattern_arg = *argv;
compile();
pattern_arg = NULL;
gotrules = 1;
}
}
if ( !gotrules )
usage();
if ( ! didfile )
{
/*
* Didn't process any files yet - process stdin.
*/
newfile( 0 );
if ( Beginact )
doaction( Beginact );
process();
}
if ( Endact )
doaction( Endact );
DBUG_RETURN(0);
}
/*
* Regular expression/action file compilation routines.
*/
void compile()
{
/*
* Compile regular expressions and C actions into Rules struct,
* reading from current input file "Fileptr".
*/
register int c;
register EXPR_NODE *root;
DBUG_ENTER("compile");
while ( (c = getcharacter()) != -1 )
{
if ( c==' ' || c=='\t' || c=='\n' )
/* swallow whitespace */
;
else if ( c=='#' )
{
/*
* Swallow comments
*/
while ( (c=getcharacter()) != -1 && c!='\n' )
;
}
else if ( c=='{' )
{
DBUG_PRINT("compile",("action"));
/*
* Compile the action string into a parse tree
*/
ungetcharacter( (char) '{' );
if ( Rulep && Rulep->action )
{
Rulep->nextrule = (RULE *)
get_clear_memory( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
}
if ( !Rulep )
{
/*
* This is the first action encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = (RULE *)
get_clear_memory( sizeof( *Rulep ) );
}
Rulep->action = act_compile( Workbuf );
}
else if ( c==',' )
{
DBUG_PRINT("compile",("stop pattern"));
/*
* It's (hopefully) the second part of a two-part
* pattern string. Swallow the comma and start
* compiling an action string.
*/
if ( !Rulep || !Rulep->pattern.start )
error( "stop pattern without a start",
RE_ERROR );
if ( Rulep->pattern.stop )
error( "already have a stop pattern",
RE_ERROR );
Rulep->pattern.stop = pat_compile( Workbuf );
}
else
{
/*
* Assume it's a regular expression pattern
*/
DBUG_PRINT("compile",("start pattern"));
ungetcharacter( (char) c );
root = pat_compile( Workbuf );
if ( *Workbuf == T_BEGIN )
{
/*
* Saw a "BEGIN" keyword - compile following
* action into special "Beginact" parse tree.
*/
Beginact = act_compile( Workbuf );
continue;
}
if ( *Workbuf == T_END )
{
/*
* Saw an "END" keyword - compile following
* action into special "Endact" parse tree.
*/
Endact = act_compile( Workbuf );
continue;
}
if ( Rulep )
{
/*
* Already saw a pattern/action - link in
* another Rules structure.
*/
Rulep->nextrule = (RULE *)
get_clear_memory( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
}
if ( !Rulep )
{
/*
* This is the first pattern encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = (RULE *)
get_clear_memory( sizeof( *Rulep ) );
}
if ( Rulep->pattern.start )
error( "already have a start pattern",
RE_ERROR );
Rulep->pattern.start = root;
}
}
for(Rulep = Rules; Rulep; Rulep = Rulep->nextrule)
{
if(!Rulep->action) {
pattern_arg = "{printf \"%s\n\", $0}";
Rulep->action = act_compile( Workbuf );
pattern_arg = NULL;
}
}
endfile();
DBUG_VOID_RETURN;
}
/*
* Text file main processing loop.
*/
void process()
{
/*
* Read a line at a time from current input file at "Fileptr",
* then apply each rule in the Rules chain to the input line.
*/
register int i;
DBUG_ENTER("process");
Recordcount = 0;
while ( getline() )
{
/*
* Parse the input line.
*/
if(! *Recordsep )
strcpy(Fieldsep," \t\n");
Fieldcount = parse( Linebuf, Fields, Fieldsep );
DBUG_PRINT("process",( "parsed %d words:", Fieldcount ));
DBUG_EXECUTE("process",for(i=0; i<Fieldcount; ++i )DBUG_PRINT("process",("<%s>",Fields[i])););
Rulep = Rules;
while(Rulep)
{
if ( ! Rulep->pattern.start )
{
/*
* No pattern given - perform action on
* every input line.
*/
doaction( Rulep->action );
}
else if ( Rulep->pattern.startseen )
{
/*
* Start pattern already found - perform
* action then check if line matches
* stop pattern.
*/
doaction( Rulep->action );
if ( dopattern( Rulep->pattern.stop ) )
Rulep->pattern.startseen = 0;
}
else if ( dopattern( Rulep->pattern.start ) )
{
/*
* Matched start pattern - perform action.
* If a stop pattern was given, set "start
* pattern seen" flag and process every input
* line until stop pattern found.
*/
doaction( Rulep->action );
if ( Rulep->pattern.stop )
Rulep->pattern.startseen = 1;
}
Rulep = Rulep->nextrule;
}
}
DBUG_VOID_RETURN;
}
/*
* Miscellaneous functions
*/
parse( str, wrdlst, delim )
register char *str;
char *wrdlst[];
char *delim;
{
/*
* Parse the string of words in "str" into the word list at "wrdlst".
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
* Returns the number of words parsed.
*/
register int wrdcnt;
register char *cp, *wrdcp, c;
char wrdbuf[ MAXLINELEN+1 ];
DBUG_ENTER("parse");
wrdcnt = 0;
while ( *str )
{
while(c = *str++)
{
cp = delim;
while(*cp && c != *cp)
cp++;
if(! *cp)
break;
}
str--;
if ( !*str )
break;
wrdcp = wrdbuf;
while(c = *str++)
{
cp = delim;
while(*cp && c != *cp)
cp++;
if(*cp)
break;
*wrdcp++ = c;
}
str--;
*wrdcp = 0;
/*
* NOTE: allocate a MAXLINELEN sized buffer for every
* word, just in case user wants to copy a larger string
* into a field.
*/
if(wrdcnt == max_field_count)
{
wrdlst[ wrdcnt ] = getmemory( MAXLINELEN+1 );
max_field_count++;
}
strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
}
DBUG_RETURN(wrdcnt);
}
void unparse( wrdlst, wrdcnt, str, delim )
char *wrdlst[];
register int wrdcnt;
register char *str;
char *delim;
{
/*
* Replace all the words in "str" with the words in "wrdlst",
* maintaining the same word seperation distance as found in
* the string.
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
*/
register int wc;
register char *sp, *cp, c;
char strbuf[ MAXLINELEN+1 ], *start;
DBUG_ENTER("unparse");
wc = 0; /* next word in "wrdlst" */
sp = strbuf; /* points to our local string */
start = str; /* save start address of "str" for later... */
while ( *str )
{
/*
* Copy the field delimiters from the original string to
* our local version.
*/
while(c = *str++)
{
cp = delim;
while(*cp && c != *cp)
cp++;
if(!*cp)
break;
*sp++ = c;
}
str--;
if ( !*str )
break;
/*
* Skip over the field in the original string and...
*/
while(c = *str++)
{
cp = delim;
while(*cp && c != *cp)
cp++;
if(*cp)
break;
}
str--;
if ( wc < wrdcnt )
{
/*
* ...copy in the field in the wordlist instead.
*/
cp = wrdlst[ wc++ ];
while(*sp++ = *cp++);
sp--;
}
}
/*
* Tie off the local string, then copy it back to caller's string.
*/
*sp = 0;
strcpy( start, strbuf );
DBUG_VOID_RETURN;
}
char *
getmemory( len )
register unsigned len;
{
register char *cp;
DBUG_ENTER("getmemory");
if ( cp=malloc( len ) )
DBUG_RETURN(cp);
error( "out of memory", MEM_ERROR );
DBUG_RETURN(NULL);
}
char *
get_clear_memory( len )
register unsigned len;
{
register char *cp;
DBUG_ENTER("getmemory");
if ( cp=calloc( 1, len ) )
DBUG_RETURN(cp);
error( "out of memory", MEM_ERROR );
DBUG_RETURN(NULL);
}
EXPR_NODE *get_expr_node(operator)
char operator;
{
register EXPR_NODE *node;
DBUG_ENTER("get_expr_node");
node = (EXPR_NODE *) getmemory(sizeof(EXPR_NODE));
node->left = node->right = NULL;
node->operator = operator;
DBUG_PRINT("get_expr_node",("operator = '%s'",token_name[operator]));
DBUG_RETURN(node);
}
void newfile( s )
register char *s;
{
DBUG_ENTER("newfile");
Linecount = 0;
if ( Filename = s )
{
#ifdef BDS_C
if ( fopen( s, Fileptr = Curfbuf ) == -1 )
#else
if ( !(Fileptr = fopen( s, "r" )) )
#endif
error( "file not found", FILE_ERROR );
}
else
{
/*
* No file name given - process standard input.
*/
Fileptr = stdin;
Filename = "standard input";
}
DBUG_VOID_RETURN;
}
getline()
{
/*
* Read a record from current input file.
*/
register int rtn, len = 0;
register char *cp = Linebuf, *last_nl, *sep = Recordsep;
DBUG_ENTER("getline");
if(eof_seen)
{
endfile();
DBUG_RETURN(0);
}
if(*sep)
{
while((*cp++ = rtn = getcharacter()) != *sep++ && rtn != -1)
{
while(*sep)
{
if(rtn == *sep++)
break;
}
if( ++len == MAXLINELEN )
error("Input record too long", RECORD_ERROR);
sep = Recordsep;
}
} else /* Treat an empty line as record separator. */
{
while(1)
{
last_nl = cp;
while((*cp++ = rtn = getcharacter()) != '\n' &&
rtn != -1)
{
if( ++len == MAXLINELEN )
error("Input record too long",
RECORD_ERROR);
}
if(((cp - last_nl) == 1) || (rtn == -1))
break;
}
}
*(--cp) = 0;
if ( rtn == -1 )
{
if(len)
eof_seen = 1;
else
{
endfile();
DBUG_RETURN(0);
}
}
++Recordcount;
DBUG_RETURN(1);
}
int getcharacter()
{
/*
* Read a character from curren input file.
* WARNING: your getc() must convert lines that end with CR+LF
* to LF and CP/M's EOF character (^Z) to a -1.
* Also, getc() must return a -1 when attempting to read from
* an unopened file.
*/
register int c;
DBUG_ENTER("getcharacter");
if(pattern_arg) {
if(ungetc_arg) {
c = ungetc_arg;
ungetc_arg = 0;
} else if(*pattern_arg)
c = *pattern_arg++;
else
c = EOF;
} else {
#ifdef BDS_C
/*
* BDS C doesn't do CR+LF to LF and ^Z to -1 conversions
* <gag>
*/
if ( (c = getc( Fileptr )) == '\r' )
{
if ( (c = getc( Fileptr )) != '\n' )
{
ungetc( c );
c = '\r';
}
}
else if ( c == 26 ) /* ^Z */
c = -1;
#else
c = getc( Fileptr );
#endif
if ( c=='\n' )
++Linecount;
}
DBUG_PRINT("getcharacter",("'%c'", c));
DBUG_RETURN(c);
}
ungetcharacter( c )
register char c;
{
/*
* Push a character back into the input stream.
* If the character is a record seperator, or a newline character,
* the record and line counters are adjusted appropriately.
*/
DBUG_ENTER("ungetcharacter");
if ( c == *Recordsep )
--Recordcount;
if ( c=='\n' )
--Linecount;
DBUG_PRINT("ungetcharacter",("'%c'", c));
if(pattern_arg)
DBUG_RETURN(ungetc_arg = c);
DBUG_RETURN(ungetc( c, Fileptr ));
}
void endfile()
{
DBUG_ENTER("endfile");
fclose( Fileptr );
eof_seen = 0;
Filename = NULL;
Linecount = 0;
DBUG_VOID_RETURN;
}
void error( s, severe )
register char *s;
register int severe;
{
DBUG_ENTER("error");
if ( Filename )
fprintf( stderr, "%s:", Filename );
if ( Linecount )
fprintf( stderr, " line %d:", Linecount );
fprintf( stderr, " %s\n", s );
if ( severe )
exit( severe );
DBUG_VOID_RETURN;
}
void usage()
{
DBUG_ENTER("usage");
error( "Usage: bawk { action | - | -f <actfile> } <file> ...",
USAGE_ERROR );
DBUG_VOID_RETURN;
}