home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
unix
/
volume27
/
jam
/
part03
< prev
next >
Wrap
Text File
|
1993-11-14
|
62KB
|
2,657 lines
Newsgroups: comp.sources.unix
From: seiwald@vix.com (Christopher Seiwald)
Subject: v27i083: jam - just another make, Part03/05
References: <1.753385306.22859@gw.home.vix.com>
Sender: unix-sources-moderator@gw.home.vix.com
Approved: vixie@gw.home.vix.com
Submitted-By: seiwald@vix.com (Christopher Seiwald)
Posting-Number: Volume 27, Issue 83
Archive-Name: jam/part03
Submitted-by: seiwald@vix.com
Archive-name: jam - make(1) redux/part03
#!/bin/sh
# This is part 03 of jam - make(1) redux
# ============= lists.c ==============
if test -f 'lists.c' -a X"$1" != X"-c"; then
echo 'x - skipping lists.c (File already exists)'
else
echo 'x - extracting lists.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'lists.c' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X# include "jam.h"
X# include "newstr.h"
X# include "lists.h"
X
X/*
X * lists.c - maintain lists of strings
X *
X * The whole of jam relies on lists of strings as a datatype. This
X * module, in conjunction with newstr.c, handles these relatively
X * efficiently.
X *
X * External routines:
X *
X * list_new() - tack a string onto the end of a list of strings
X * list_copy() - copy a whole list of strings
X * list_sublist() - copy a subset of a list of strings
X * list_free() - free a list of strings
X * list_print() - print a list of strings to stdout
X *
X * This implementation essentially uses a singly linked list, but
X * guarantees that the head element of every list has a valid pointer
X * to the tail of the list, so the new elements can efficiently and
X * properly be appended to the end of a list.
X *
X * To avoid massive allocation, list_free() just tacks the whole freed
X * chain onto freelist and list_new() looks on freelist first for an
X * available list struct. list_free() does not free the strings in the
X * chain: it lazily lets list_new() do so.
X */
X
Xstatic LIST *freelist = 0; /* junkpile for list_free() */
X
X/*
X * list_new() - tack a string onto the end of a list of strings
X */
X
XLIST *
Xlist_new( head, string )
XLIST *head;
Xchar *string;
X{
X LIST *l;
X
X if( DEBUG_LISTS )
X printf( "list > %s <\n", string );
X
X /* Get list struct from freelist, if one available. */
X /* Otherwise allocate. */
X /* If from freelist, must free string first */
X
X if( freelist )
X {
X l = freelist;
X freestr( l->string );
X freelist = freelist->next;
X }
X else
X {
X l = (LIST *)malloc( sizeof( *l ) );
X }
X
X /* If first on chain, head points here. */
X /* If adding to chain, tack us on. */
X /* Tail must point to this new, last element. */
X
X if( !head ) head = l;
X else head->tail->next = l;
X head->tail = l;
X l->next = 0;
X
X l->string = string;
X
X return head;
X}
X
X/*
X * list_copy() - copy a whole list of strings
X */
X
XLIST *
Xlist_copy( l, nl )
XLIST *l;
XLIST *nl;
X{
X for( ; nl; nl = list_next( nl ) )
X l = list_new( l, copystr( nl->string ) );
X
X return l;
X}
X
X/*
X * list_sublist() - copy a subset of a list of strings
X */
X
XLIST *
Xlist_sublist( l, start, count )
XLIST *l;
X{
X LIST *nl = 0;
X
X for( ; l && start--; l = list_next( l ) )
X ;
X
X for( ; l && count--; l = list_next( l ) )
X nl = list_new( nl, copystr( l->string ) );
X
X return nl;
X}
X
X/*
X * list_free() - free a list of strings
X */
X
Xvoid
Xlist_free( head )
XLIST *head;
X{
X /* Just tack onto freelist. */
X
X if( head )
X {
X head->tail->next = freelist;
X freelist = head;
X }
X}
X
X/*
X * list_print() - print a list of strings to stdout
X */
X
Xvoid
Xlist_print( l )
XLIST *l;
X{
X for( ; l; l = list_next( l ) )
X printf( "%s ", l->string );
X}
SHAR_EOF
chmod 0444 lists.c ||
echo 'restore of lists.c failed'
Wc_c="`wc -c < 'lists.c'`"
test 2752 -eq "$Wc_c" ||
echo 'lists.c: original size 2752, current size' "$Wc_c"
fi
# ============= lists.h ==============
if test -f 'lists.h' -a X"$1" != X"-c"; then
echo 'x - skipping lists.h (File already exists)'
else
echo 'x - extracting lists.h (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'lists.h' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X/*
X * lists.h - the LIST structure and routines to manipulate them
X */
X
X/*
X * LIST - list of strings
X */
X
Xtypedef struct _list LIST;
X
Xstruct _list {
X LIST *next;
X LIST *tail; /* only valid in head node */
X char *string; /* private copy */
X} ;
X
XLIST *list_copy();
XLIST *list_new();
Xvoid list_free();
Xvoid list_print();
XLIST *list_sublist();
X
X# define list_next( l ) ((l)->next)
SHAR_EOF
chmod 0444 lists.h ||
echo 'restore of lists.h failed'
Wc_c="`wc -c < 'lists.h'`"
test 427 -eq "$Wc_c" ||
echo 'lists.h: original size 427, current size' "$Wc_c"
fi
# ============= make.c ==============
if test -f 'make.c' -a X"$1" != X"-c"; then
echo 'x - skipping make.c (File already exists)'
else
echo 'x - extracting make.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'make.c' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X/*
X * make.c - bring a target up to date, once rules are in place
X *
X * This modules controls the execution of rules to bring a target and
X * its dependencies up to date. It is invoked after the targets, rules,
X * et. al. described in rules.h are created by the interpreting of the
X * jam files.
X *
X * External routines:
X * make() - make a target, given its name
X *
X * Internal routines:
X * make0() - bind and scan everything to make a TARGET
X * make1() - execute commands to update a TARGET
X * make1a() - execute all actions to build a target
X * make1b() - execute single command to update a target
X * make1c() - execute a (piecemeal) piece of a command to update a target
X * make1u() - remove targets after interrupted command
X * makexlist() - turn a list of targets into a LIST, for $(<) and $(>)
X */
X
X# include "jam.h"
X
X# include "lists.h"
X# include "parse.h"
X# include "variable.h"
X# include "rules.h"
X
X# include "search.h"
X# include "newstr.h"
X# include "make.h"
X# include "headers.h"
X# include "execcmd.h"
X
Xstatic void make0();
Xstatic void make1();
Xstatic int make1a();
Xstatic int make1b();
Xstatic int make1c();
Xstatic int make1chunk();
Xstatic void make1u();
Xstatic LIST *makexlist();
X
X# define max( a,b ) ((a)>(b)?(a):(b))
X
Xtypedef struct {
X int temp;
X int updating;
X int dontknow;
X int targets;
X int made;
X} COUNTS ;
X
X# define DONTCARE 0
X# define DOCARE 1
X
Xstatic char *target_fate[] =
X{
X "init",
X "making",
X "ok",
X "touched",
X "temp",
X "missing",
X "old",
X "update",
X "can't"
X} ;
X
X# define spaces(x) ( " " + 16 - ( x > 16 ? 16 : x ) )
X
X/*
X * make() - make a target, given its name
X */
X
Xvoid
Xmake( n_targets, targets )
Xint n_targets;
Xchar **targets;
X{
X int i;
X COUNTS counts[1];
X
X memset( (char *)counts, 0, sizeof( *counts ) );
X
X for( i = 0; i < n_targets; i++ )
X make0( bindtarget( targets[i] ), (time_t)0, 0, counts );
X
X if( DEBUG_MAKEQ )
X {
X if( counts->targets )
X printf( "...found %d target(s)...\n", counts->targets );
X }
X
X if( DEBUG_MAKE )
X {
X if( counts->temp )
X printf( "...using %d temp target(s)...\n", counts->temp );
X if( counts->updating )
X printf( "...updating %d target(s)...\n", counts->updating );
X if( counts->dontknow )
X printf( "...can't make %d target(s)...\n", counts->dontknow );
X }
X
X for( i = 0; i < n_targets; i++ )
X make1( bindtarget( targets[i] ), counts );
X}
X
X/*
X * make0() - bind and scan everything to make a TARGET
X *
X * Make0() recursively binds a target, searches for #included headers,
X * calls itself on those headers, and calls itself on any dependents.
X */
X
Xstatic void
Xmake0( t, parent, depth, counts )
XTARGET *t;
Xtime_t parent;
Xint depth;
XCOUNTS *counts;
X{
X TARGETS *c;
X int fate, hfate;
X time_t last, hlast;
X char *flag = "";
X
X if( DEBUG_MAKEPROG )
X printf( "make\t--\t%s%s\n", spaces( depth ), t->name );
X
X /*
X * Step 1: don't remake if already trying or tried
X */
X
X switch( t->fate )
X {
X case T_FATE_MAKING:
X printf( "warning: %s depends on itself\n", t->name );
X return;
X
X default:
X return;
X
X case T_FATE_INIT:
X break;
X }
X
X t->fate = T_FATE_MAKING;
X
X /*
X * Step 2: under the influence of "on target" variables,
X * bind the target and search for headers.
X */
X
X /* Step 2a: set "on target" variables. */
X
X pushsettings( t->settings );
X
X /* Step 2b: find and timestamp the target file (if it's a file). */
X
X if( t->binding == T_BIND_UNBOUND && !( t->flags & T_FLAG_NOTIME ) )
X {
X t->boundname = search( t->name, &t->time );
X t->binding = t->time ? T_BIND_EXISTS : T_BIND_MISSING;
X }
X
X /* If temp file doesn't exist, use parent */
X
X if( t->binding == T_BIND_MISSING && t->flags & T_FLAG_TEMP && parent )
X {
X t->time = parent;
X t->binding = t->time ? T_BIND_TEMP : T_BIND_MISSING;
X }
X
X /* Step 2c: If its a file, search for headers. */
X
X if( t->binding == T_BIND_EXISTS )
X headers( t );
X
X /* Step 2d: reset "on target" variables */
X
X popsettings( t->settings );
X
X /*
X * Step 3: recursively make0() dependents
X */
X
X /* Step 3a: recursively make0() dependents */
X
X last = 0;
X fate = T_FATE_STABLE;
X
X for( c = t->deps; c; c = c->next )
X {
X make0( c->target, t->time, depth + 1, counts );
X last = max( last, c->target->time );
X last = max( last, c->target->htime );
X fate = max( fate, c->target->fate );
X fate = max( fate, c->target->hfate );
X }
X
X /* Step 3b: recursively make0() headers */
X
X hlast = 0;
X hfate = T_FATE_STABLE;
X
X for( c = t->headers; c; c = c->next )
X {
X make0( c->target, parent, depth + 1, counts );
X hlast = max( hlast, c->target->time );
X hlast = max( hlast, c->target->htime );
X hfate = max( hfate, c->target->fate );
X hfate = max( hfate, c->target->hfate );
X }
X
X /*
X * Step 4: aftermath: determine fate and propapate dependents time
X * and fate.
X */
X
X /* Step 4a: determine fate: rebuild target or what? */
X /* If children newer than target or */
X /* If target doesn't exist, rebuild. */
X
X if( fate > T_FATE_STABLE )
X {
X fate = T_FATE_UPDATE;
X }
X else if( t->binding == T_BIND_MISSING )
X {
X fate = T_FATE_MISSING;
X }
X else if( t->binding == T_BIND_EXISTS && last > t->time )
X {
X fate = T_FATE_OUTDATED;
X }
X else if( t->binding == T_BIND_TEMP && last > t->time )
X {
X fate = T_FATE_OUTDATED;
X }
X else if( t->binding == T_BIND_EXISTS && t->flags & T_FLAG_TEMP )
X {
X fate = T_FATE_ISTMP;
X }
X else if( t->flags & T_FLAG_TOUCHED )
X {
X fate = T_FATE_TOUCHED;
X }
X
X /* Step 4b: handle missing files */
X /* If it's missing and there are no actions to create it, boom. */
X /* If we can't make a target we don't care about, 'sokay */
X
X if( fate == T_FATE_MISSING && !t->actions && !t->deps )
X {
X if( t->flags & T_FLAG_NOCARE )
X {
X fate = T_FATE_STABLE;
X }
X else
X {
X fate = T_FATE_DONTKNOW;
X printf( "don't know how to make %s\n", t->name );
X }
X }
X
X /* Step 4c: Step 6: propagate dependents' time & fate. */
X
X t->time = max( t->time, last );
X t->fate = fate;
X
X t->htime = hlast;
X t->hfate = hfate;
X
X /*
X * Step 5: a little harmless tabulating for tracing purposes
X */
X
X if( !( ++counts->targets % 1000 ) && DEBUG_MAKE )
X printf( "...patience...\n" );
X
X if( fate > T_FATE_ISTMP && t->actions )
X counts->updating++;
X else if( fate == T_FATE_ISTMP )
X counts->temp++;
X else if( fate == T_FATE_DONTKNOW )
X counts->dontknow++;
X
X if( t->binding == T_BIND_EXISTS && parent && t->time > parent )
X flag = "*";
X
X if( DEBUG_MAKEPROG )
X printf( "make%s\t%s\t%s%s\n",
X flag, target_fate[ t->fate ],
X spaces( depth ), t->name );
X}
X
X/*
X * make1() - execute commands to update a TARGET
X */
X
Xstatic void
Xmake1( t, counts )
XTARGET *t;
XCOUNTS *counts;
X{
X TARGETS *c;
X char *failed = "dependents";
X
X /* Don't remake if already trying or tried */
X
X if( t->progress != T_MAKE_INIT )
X return;
X
X t->progress = T_MAKE_STABLE;
X
X /* recurseively make1() headers */
X
X for( c = t->headers; c && t->progress != T_MAKE_INTR; c = c->next )
X {
X make1( c->target, counts );
X
X if( c->target->progress > t->progress )
X {
X t->progress = c->target->progress;
X failed = c->target->name;
X }
X }
X
X /* recursively make1() dependents */
X
X for( c = t->deps; c && t->progress != T_MAKE_INTR; c = c->next )
X {
X make1( c->target, counts );
X
X if( c->target->progress > t->progress )
X {
X t->progress = c->target->progress;
X failed = c->target->name;
X }
X }
X
X /* If it's missing and there are no actions to create it, boom. */
X /* if reasonable, execute all actions to make target */
X
X if( t->progress == T_MAKE_FAIL )
X {
X printf( "%s skipped for lack of %s\n", t->name, failed );
X }
X else if( t->progress == T_MAKE_INTR )
X {
X return;
X }
X else switch( t->fate )
X {
X case T_FATE_INIT:
X case T_FATE_MAKING:
X /* shouldn't happen */ ;
X
X case T_FATE_STABLE:
X break;
X
X case T_FATE_ISTMP:
X if( DEBUG_MAKEQ )
X printf( "using %s\n", t->name );
X t->progress = T_MAKE_OK;
X break;
X
X case T_FATE_MISSING:
X case T_FATE_OUTDATED:
X case T_FATE_UPDATE:
X /* Set "on target" vars, execute actions, unset vars */
X
X pushsettings( t->settings );
X t->progress = make1a( t->name, t->actions );
X popsettings( t->settings );
X
X if( !( ++counts->made % 100 ) && DEBUG_MAKE )
X printf( "...on %dth target...\n", counts->made );
X
X break;
X
X case T_FATE_DONTKNOW:
X t->progress = T_MAKE_FAIL;
X break;
X }
X}
X
X/*
X * make1a() - execute all actions to build a target
X *
X * Executes all actions to build a given target, if the actions haven't
X * been executed previously.
X *
X * Returns:
X * T_MAKE_FAIL execution of command failed
X * T_MAKE_OK execution successful
X */
X
Xstatic int
Xmake1a( name, actions )
Xchar *name;
XACTIONS *actions;
X{
X /* Step through actions */
X /* Actions may be shared with other targets or grouped with */
X /* RULE_TOGETHER, so actions already executed are expected. */
X
X for( ; actions; actions = actions->next )
X {
X ACTION *action = actions->action;
X RULE *rule = action->rule;
X LIST *targets;
X LIST *sources;
X ACTIONS *a1;
X
X /* Only do rules with commands to execute. */
X /* If this action has already been executed, use saved progress */
X
X if( !rule->actions )
X continue;
X
X switch( action->progress )
X {
X case T_MAKE_OK: continue;
X case T_MAKE_FAIL: return T_MAKE_FAIL;
X case T_MAKE_INIT: /* fall through */;
X }
X
X /* Make LISTS of targets and sources */
X /* If `execute together` has been specified for this rule, tack */
X /* on sources from each instance of this rule for this target. */
X
X targets = makexlist( (LIST *)0, action->targets, 0 );
X sources = makexlist( (LIST *)0, action->sources,
X rule->flags & RULE_NEWSRCS );
X
X if( rule->flags & RULE_TOGETHER )
X for( a1 = actions->next; a1; a1 = a1->next )
X if( a1->action->rule == rule )
X {
X sources = makexlist( sources, a1->action->sources,
X rule->flags & RULE_NEWSRCS );
X }
X
X /* Execute single command, saving progress */
X /* If `execute together` has been specified for this rule, */
X /* distribute progress to each instance of this rule. */
X
X if( rule->flags & RULE_QUIETLY ? DEBUG_MAKEQ : DEBUG_MAKE )
X printf( "%s %s\n", rule->name, name );
X
X action->progress = make1b( rule, targets, sources );
X
X if( rule->flags & RULE_TOGETHER )
X for( a1 = actions->next; a1; a1 = a1->next )
X if( a1->action->rule == rule )
X {
X a1->action->progress = action->progress;
X }
X
X /* Free target & source lists */
X
X list_free( targets );
X list_free( sources );
X
X /* Abandon target if any rule fails. */
X
X if( action->progress != T_MAKE_OK )
X return action->progress;
X }
X
X return T_MAKE_OK;
X}
X
X/*
X * make1b() - execute single command to update a target
X *
X * Returns:
X * T_MAKE_FAIL execution of command failed
X * T_MAKE_OK execution successful
X */
X
Xstatic int
Xmake1b( rule, targets, sources )
XRULE *rule;
XLIST *targets;
XLIST *sources;
X{
X int chunk = 0;
X LIST *somes;
X int status = T_MAKE_OK;
X
X /* If rule is to be cut into (at most) MAXCMD pieces, estimate */
X /* bytes per $(>) element and aim for using MAXCMD minus a two */
X /* element pad. */
X
X if( rule->flags & RULE_PIECEMEAL )
X chunk = make1chunk( rule->actions, targets, sources );
X
X /* If cutting rule up, make separate invocations of make1c() for */
X /* each chunk of $(>). Otherwise, do it 'ole. */
X
X if( DEBUG_EXEC && chunk )
X printf( "%d arguments per invocation\n", chunk );
X
X if( chunk )
X {
X int start;
X
X for( start = 0;
X somes = list_sublist( sources, start, chunk );
X start += chunk )
X {
X status = make1c( rule, targets, somes );
X list_free( somes );
X
X if( status != T_MAKE_OK )
X break;
X }
X }
X else
X {
X status = make1c( rule, targets, sources );
X }
X
X /* If the command was interrupted and the target is not */
X /* "precious", remove the targets */
X
X if( status == T_MAKE_INTR && !( rule->flags & RULE_TOGETHER ) )
X make1u( targets );
X
X return status;
X}
X
X/*
X * make1c() - execute a (piecemeal) piece of a command to update a target
X */
X
Xstatic int
Xmake1c( rule, targets, sources )
XRULE *rule;
XLIST *targets;
XLIST *sources;
X{
X int len;
X char buf[ MAXCMD ];
X
X len = var_string( rule->actions, buf, targets, sources );
X
X if( len > MAXCMD )
X {
X /* Can't do much here - we just blew our stack! */
X printf( "fatal error: command too long\n" );
X exit( -1 );
X }
X
X if( DEBUG_EXEC )
X printf( "%s\n", buf );
X
X if( globs.noexec )
X return T_MAKE_OK;
X
X if( DEBUG_MAKE )
X fflush( stdout );
X
X switch( execcmd( buf ) )
X {
X case EXEC_CMD_OK:
X return T_MAKE_OK;
X
X case EXEC_CMD_FAIL:
X if( rule->flags & RULE_IGNORE )
X return T_MAKE_OK;
X
X return T_MAKE_FAIL;
X
X case EXEC_CMD_INTR:
X printf( "...interrupted\n" );
X return T_MAKE_INTR;
X
X default:
X return T_MAKE_FAIL; /* NOTREACHED */
X }
X}
X
Xstatic int
Xmake1chunk( cmd, targets, sources )
Xchar *cmd;
XLIST *targets;
XLIST *sources;
X{
X int onesize;
X int onediff;
X int chunk = 0;
X LIST *somes;
X char buf[ MAXCMD ];
X
X somes = list_sublist( sources, 0, 1 );
X onesize = var_string( cmd, buf, targets, somes );
X list_free( somes );
X
X somes = list_sublist( sources, 0, 2 );
X onediff = var_string( cmd, buf, targets, somes ) - onesize;
X list_free( somes );
X
X if( onediff > 0 )
X chunk = 3 * ( MAXCMD - onesize ) / 4 / onediff + 1;
X
X return chunk;
X}
X
X/*
X * make1u() - remove targets after interrupted command
X */
X
Xstatic void
Xmake1u( targets )
XLIST *targets;
X{
X for( ; targets; targets = list_next( targets ) )
X {
X if( !unlink( targets->string ) )
X printf( "%s removed\n", targets->string );
X }
X}
X
X/*
X * makexlist() - turn a list of targets into a LIST, for $(<) and $(>)
X */
X
Xstatic LIST *
Xmakexlist( l, targets, newonly )
XLIST *l;
XTARGETS *targets;
Xint newonly;
X{
X for( ; targets; targets = targets->next )
X {
X TARGET *t = targets->target;
X
X /*
X * spot the kludge! If a target is not in the dependency tree,
X * it didn't get bound by make0(), so we have to do it here.
X * Ugly.
X */
X
X if( t->binding == T_BIND_UNBOUND && !( t->flags & T_FLAG_NOTIME ) )
X {
X printf( "warning: using independent target %s\n", t->name );
X pushsettings( t->settings );
X t->boundname = search( t->name, &t->time );
X t->binding = t->time ? T_BIND_EXISTS : T_BIND_MISSING;
X popsettings( t->settings );
X }
X
X if( !newonly || t->fate > T_FATE_STABLE )
X l = list_new( l, copystr( t->boundname ) );
X }
X
X return l;
X}
SHAR_EOF
chmod 0444 make.c ||
echo 'restore of make.c failed'
Wc_c="`wc -c < 'make.c'`"
test 14415 -eq "$Wc_c" ||
echo 'make.c: original size 14415, current size' "$Wc_c"
fi
# ============= make.h ==============
if test -f 'make.h' -a X"$1" != X"-c"; then
echo 'x - skipping make.h (File already exists)'
else
echo 'x - extracting make.h (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'make.h' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X/*
X * make.h - bring a target up to date, once rules are in place
X */
X
Xvoid make();
SHAR_EOF
chmod 0444 make.h ||
echo 'restore of make.h failed'
Wc_c="`wc -c < 'make.h'`"
test 131 -eq "$Wc_c" ||
echo 'make.h: original size 131, current size' "$Wc_c"
fi
# ============= newstr.c ==============
if test -f 'newstr.c' -a X"$1" != X"-c"; then
echo 'x - skipping newstr.c (File already exists)'
else
echo 'x - extracting newstr.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'newstr.c' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X# include "jam.h"
X# include "newstr.h"
X# include "hash.h"
X
X/*
X * newstr.c - string manipulation routines
X *
X * To minimize string copying, string creation, copying, and freeing
X * is done through newstr.
X *
X * External functions:
X *
X * newstr() - return a malloc'ed copy of a string
X * copystr() - return a copy of a string previously returned by newstr()
X * freestr() - free a string returned by newstr() or copystr()
X * donestr() - free string tables
X *
X * Once a string is passed to newstr(), the returned string is readonly.
X *
X * This implementation builds a hash table of all strings, so that multiple
X * calls of newstr() on the same string allocate memory for the string once.
X * Strings are never actually freed.
X */
X
Xtypedef char *STRING;
X
Xstatic struct hash *strhash = 0;
Xstatic int strtotal = 0;
X
X/*
X * newstr() - return a malloc'ed copy of a string
X */
X
Xchar *
Xnewstr( string )
Xchar *string;
X{
X STRING str, *s = &str;
X
X if( !strhash )
X strhash = hashinit( sizeof( STRING ), "strings" );
X
X *s = string;
X
X if( hashenter( strhash, (HASHDATA **)&s ) )
X {
X int l = strlen( string );
X char *m = (char *)malloc( l + 1 );
X
X strtotal += l + 1;
X memcpy( m, string, l + 1 );
X *s = m;
X }
X
X return *s;
X}
X
X/*
X * copystr() - return a copy of a string previously returned by newstr()
X */
X
Xchar *
Xcopystr( s )
Xchar *s;
X{
X return s;
X}
X
X/*
X * freestr() - free a string returned by newstr() or copystr()
X */
X
Xvoid
Xfreestr( s )
Xchar *s;
X{
X}
X
X/*
X * donestr() - free string tables
X */
X
Xvoid
Xdonestr()
X{
X hashdone( strhash );
X
X if( DEBUG_MEM )
X printf( "%dK in strings\n", strtotal / 1024 );
X}
SHAR_EOF
chmod 0444 newstr.c ||
echo 'restore of newstr.c failed'
Wc_c="`wc -c < 'newstr.c'`"
test 1671 -eq "$Wc_c" ||
echo 'newstr.c: original size 1671, current size' "$Wc_c"
fi
# ============= newstr.h ==============
if test -f 'newstr.h' -a X"$1" != X"-c"; then
echo 'x - skipping newstr.h (File already exists)'
else
echo 'x - extracting newstr.h (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'newstr.h' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X/*
X * newstr.h - string manipulation routines
X */
X
Xchar *newstr();
Xchar *copystr();
Xvoid freestr();
X
SHAR_EOF
chmod 0444 newstr.h ||
echo 'restore of newstr.h failed'
Wc_c="`wc -c < 'newstr.h'`"
test 148 -eq "$Wc_c" ||
echo 'newstr.h: original size 148, current size' "$Wc_c"
fi
# ============= option.c ==============
if test -f 'option.c' -a X"$1" != X"-c"; then
echo 'x - skipping option.c (File already exists)'
else
echo 'x - extracting option.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'option.c' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X# include "jam.h"
X# include "option.h"
X
X/*
X * option.c - command line option processing
X *
X * {o >o
X * \<>) "Process command line options as defined in <option.h>.
X * Return the number of argv[] elements used up by options,
X * or -1 if an invalid option flag was given or an argument
X * was supplied for an option that does not require one."
X */
X
Xint
Xgetoptions(argc, argv, opts, optv)
Xchar **argv;
Xchar *opts;
Xoption *optv;
X{
X int i;
X int optc = N_OPTS;
X
X memset( (char *)optv, '\0', sizeof( *optv ) * N_OPTS );
X
X for( i = 0; i < argc; i++ )
X {
X char *arg;
X
X if( argv[i][0] != '-' || !isalpha( argv[i][1] ) )
X break;
X
X if( !optc-- )
X {
X printf( "too many options\n" );
X return -1;
X }
X
X for( arg = &argv[i][1]; *arg; arg++ )
X {
X char *f;
X
X for( f = opts; *f; f++ )
X if( *f == *arg )
X break;
X
X if( !*f )
X {
X printf( "Invalid option: -%c\n", *arg );
X return -1;
X }
X
X optv->flag = *f;
X
X if( f[1] != ':' )
X {
X optv++->val = "true";
X }
X else if( arg[1] )
X {
X optv++->val = &arg[1];
X break;
X }
X else if( ++i < argc )
X {
X optv++->val = argv[i];
X break;
X }
X else
X {
X printf( "option: -%c needs argument\n", *f );
X return -1;
X }
X }
X }
X
X return i;
X}
X
X/*
X * Name: getoptval() - find an option given its character
X */
X
Xchar *
Xgetoptval( optv, opt, subopt )
Xoption *optv;
Xchar opt;
Xint subopt;
X{
X int i;
X
X for( i = 0; i < N_OPTS; i++, optv++ )
X if( optv->flag == opt && !subopt-- )
X return optv->val;
X
X return 0;
X}
SHAR_EOF
chmod 0444 option.c ||
echo 'restore of option.c failed'
Wc_c="`wc -c < 'option.c'`"
test 1584 -eq "$Wc_c" ||
echo 'option.c: original size 1584, current size' "$Wc_c"
fi
# ============= option.h ==============
if test -f 'option.h' -a X"$1" != X"-c"; then
echo 'x - skipping option.h (File already exists)'
else
echo 'x - extracting option.h (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'option.h' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X/*
X * option.h - command line option processing
X *
X * {o >o
X * \ -) "Command line option."
X */
X
Xtypedef struct option
X{
X char flag; /* filled in by getoption() */
X char *val; /* set to random address if true */
X} option;
X
X# define N_OPTS 10
X
Xint getoptions( /* int argc, char **argv, char *opts, option *optv */ );
Xchar *getoptval( /* option *optv, char opt, int subopt */ );
SHAR_EOF
chmod 0444 option.h ||
echo 'restore of option.h failed'
Wc_c="`wc -c < 'option.h'`"
test 428 -eq "$Wc_c" ||
echo 'option.h: original size 428, current size' "$Wc_c"
fi
# ============= parse.c ==============
if test -f 'parse.c' -a X"$1" != X"-c"; then
echo 'x - skipping parse.c (File already exists)'
else
echo 'x - extracting parse.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'parse.c' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X# include "jam.h"
X# include "lists.h"
X# include "parse.h"
X# include "newstr.h"
X
X/*
X * parse.c - make and destroy parse trees as driven by the parser
X */
X
XPARSE *
Xparse_make( func, left, right, string, string1, llist, rlist, num )
Xvoid (*func)();
XPARSE *left;
XPARSE *right;
Xchar *string;
Xchar *string1;
XLIST *llist;
XLIST *rlist;
Xint num;
X{
X PARSE *p = (PARSE *)malloc( sizeof( PARSE ) );
X
X p->func = func;
X p->left = left;
X p->right = right;
X p->string = string;
X p->string1 = string1;
X p->llist = llist;
X p->rlist = rlist;
X p->num = num;
X
X return p;
X}
X
Xvoid
Xparse_free( p )
XPARSE *p;
X{
X if( p->string )
X freestr( p->string );
X if( p->string1 )
X freestr( p->string1 );
X if( p->llist )
X list_free( p->llist );
X if( p->rlist )
X list_free( p->rlist );
X if( p->left )
X parse_free( p->left );
X if( p->right )
X parse_free( p->right );
X
X free( (char *)p );
X}
SHAR_EOF
chmod 0444 parse.c ||
echo 'restore of parse.c failed'
Wc_c="`wc -c < 'parse.c'`"
test 923 -eq "$Wc_c" ||
echo 'parse.c: original size 923, current size' "$Wc_c"
fi
# ============= parse.h ==============
if test -f 'parse.h' -a X"$1" != X"-c"; then
echo 'x - skipping parse.h (File already exists)'
else
echo 'x - extracting parse.h (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'parse.h' &&
X/*
X * Copyright 1993 Christopher Seiwald.
X */
X
X/*
X * parse.h - make and destroy parse trees as driven by the parser
X */
X
X/*
X * parse tree node
X */
X
Xtypedef struct _PARSE PARSE;
X
Xstruct _PARSE {
X void (*func)();
X PARSE *left;
X PARSE *right;
X char *string;
X char *string1;
X LIST *llist;
X LIST *rlist;
X int num;
X} ;
X
XPARSE *parse_make();
Xvoid parse_free();
SHAR_EOF
chmod 0444 parse.h ||
echo 'restore of parse.h failed'
Wc_c="`wc -c < 'parse.h'`"
test 354 -eq "$Wc_c" ||
echo 'parse.h: original size 354, current size' "$Wc_c"
fi
# ============= patchlevel.h ==============
if test -f 'patchlevel.h' -a X"$1" != X"-c"; then
echo 'x - skipping patchlevel.h (File already exists)'
else
echo 'x - extracting patchlevel.h (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'patchlevel.h' &&
X#define VERSION 1
X#define PATCHLEVEL 1
SHAR_EOF
chmod 0444 patchlevel.h ||
echo 'restore of patchlevel.h failed'
Wc_c="`wc -c < 'patchlevel.h'`"
test 39 -eq "$Wc_c" ||
echo 'patchlevel.h: original size 39, current size' "$Wc_c"
fi
# ============= regexp.c ==============
if test -f 'regexp.c' -a X"$1" != X"-c"; then
echo 'x - skipping regexp.c (File already exists)'
else
echo 'x - extracting regexp.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'regexp.c' &&
X/*
X * regcomp and regexec -- regsub and regerror are elsewhere
X *
X * Copyright (c) 1986 by University of Toronto.
X * Written by Henry Spencer. Not derived from licensed software.
X *
X * Permission is granted to anyone to use this software for any
X * purpose on any computer system, and to redistribute it freely,
X * subject to the following restrictions:
X *
X * 1. The author is not responsible for the consequences of use of
X * this software, no matter how awful, even if they arise
X * from defects in it.
X *
X * 2. The origin of this software must not be misrepresented, either
X * by explicit claim or by omission.
X *
X * 3. Altered versions must be plainly marked as such, and must not
X * be misrepresented as being the original software.
X *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
X *** hoptoad!gnu, on 27 Dec 1986, to add \n as an alternative to |
X *** to assist in implementing egrep.
X *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
X *** hoptoad!gnu, on 27 Dec 1986, to add \< and \> for word-matching
X *** as in BSD grep and ex.
X *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
X *** hoptoad!gnu, on 28 Dec 1986, to optimize characters quoted with \.
X *** THIS IS AN ALTERED VERSION. It was altered by James A. Woods,
X *** ames!jaw, on 19 June 1987, to quash a regcomp() redundancy.
X *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
X *** seiwald@vix.com, on 28 August 1993, for use in jam. Regmagic.h
X *** was moved into regexp.h, and the include of regexp.h now uses "'s
X *** to avoid conflicting with the system regexp.h. Const, bless its
X *** soul, was removed so it can compile everywhere. The declaration
X *** of strchr() was in conflict on AIX, so it was removed (as it is
X *** happily defined in string.h).
X *
X * Beware that some of this code is subtly aware of the way operator
X * precedence is structured in regular expressions. Serious changes in
X * regular-expression syntax might require a total rethink.
X */
X#include "regexp.h"
X#include <stdio.h>
X#include <ctype.h>
X#ifndef ultrix
X#include <stdlib.h>
X#endif
X#include <string.h>
X
X/*
X * The "internal use only" fields in regexp.h are present to pass info from
X * compile to execute that permits the execute phase to run lots faster on
X * simple cases. They are:
X *
X * regstart char that must begin a match; '\0' if none obvious
X * reganch is the match anchored (at beginning-of-line only)?
X * regmust string (pointer into program) that match must include, or NULL
X * regmlen length of regmust string
X *
X * Regstart and reganch permit very fast decisions on suitable starting points
X * for a match, cutting down the work a lot. Regmust permits fast rejection
X * of lines that cannot possibly match. The regmust tests are costly enough
X * that regcomp() supplies a regmust only if the r.e. contains something
X * potentially expensive (at present, the only such thing detected is * or +
X * at the start of the r.e., which can involve a lot of backup). Regmlen is
X * supplied because the test in regexec() needs it and regcomp() is computing
X * it anyway.
X */
X
X/*
X * Structure for regexp "program". This is essentially a linear encoding
X * of a nondeterministic finite-state machine (aka syntax charts or
X * "railroad normal form" in parsing technology). Each node is an opcode
X * plus a "next" pointer, possibly plus an operand. "Next" pointers of
X * all nodes except BRANCH implement concatenation; a "next" pointer with
X * a BRANCH on both ends of it is connecting two alternatives. (Here we
X * have one of the subtle syntax dependencies: an individual BRANCH (as
X * opposed to a collection of them) is never concatenated with anything
X * because of operator precedence.) The operand of some types of node is
X * a literal string; for others, it is a node leading into a sub-FSM. In
X * particular, the operand of a BRANCH node is the first node of the branch.
X * (NB this is *not* a tree structure: the tail of the branch connects
X * to the thing following the set of BRANCHes.) The opcodes are:
X */
X
X/* definition number opnd? meaning */
X#define END 0 /* no End of program. */
X#define BOL 1 /* no Match "" at beginning of line. */
X#define EOL 2 /* no Match "" at end of line. */
X#define ANY 3 /* no Match any one character. */
X#define ANYOF 4 /* str Match any character in this string. */
X#define ANYBUT 5 /* str Match any character not in this string. */
X#define BRANCH 6 /* node Match this alternative, or the next... */
X#define BACK 7 /* no Match "", "next" ptr points backward. */
X#define EXACTLY 8 /* str Match this string. */
X#define NOTHING 9 /* no Match empty string. */
X#define STAR 10 /* node Match this (simple) thing 0 or more times. */
X#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
X#define WORDA 12 /* no Match "" at wordchar, where prev is nonword */
X#define WORDZ 13 /* no Match "" at nonwordchar, where prev is word */
X#define OPEN 20 /* no Mark this point in input as start of #n. */
X /* OPEN+1 is number 1, etc. */
X#define CLOSE 30 /* no Analogous to OPEN. */
X
X/*
X * Opcode notes:
X *
X * BRANCH The set of branches constituting a single choice are hooked
X * together with their "next" pointers, since precedence prevents
X * anything being concatenated to any individual branch. The
X * "next" pointer of the last BRANCH in a choice points to the
X * thing following the whole choice. This is also where the
X * final "next" pointer of each individual branch points; each
X * branch starts with the operand node of a BRANCH node.
X *
X * BACK Normal "next" pointers all implicitly point forward; BACK
X * exists to make loop structures possible.
X *
X * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
X * BRANCH structures using BACK. Simple cases (one character
X * per match) are implemented with STAR and PLUS for speed
X * and to minimize recursive plunges.
X *
X * OPEN,CLOSE ...are numbered at compile time.
X */
X
X/*
X * A node is one char of opcode followed by two chars of "next" pointer.
X * "Next" pointers are stored as two 8-bit pieces, high order first. The
X * value is a positive offset from the opcode of the node containing it.
X * An operand, if any, simply follows the node. (Note that much of the
X * code generation knows about this implicit relationship.)
X *
X * Using two bytes for the "next" pointer is vast overkill for most things,
X * but allows patterns to get big without disasters.
X */
X#define OP(p) (*(p))
X#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
X#define OPERAND(p) ((p) + 3)
X
X/*
X * See regmagic.h for one further detail of program structure.
X */
X
X
X/*
X * Utility definitions.
X */
X#ifndef CHARBITS
X#define UCHARAT(p) ((int)*(unsigned char *)(p))
X#else
X#define UCHARAT(p) ((int)*(p)&CHARBITS)
X#endif
X
X#define FAIL(m) { regerror(m); return(NULL); }
X#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
X
X/*
X * Flags to be passed up and down.
X */
X#define HASWIDTH 01 /* Known never to match null string. */
X#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
X#define SPSTART 04 /* Starts with * or +. */
X#define WORST 0 /* Worst case. */
X
X/*
X * Global work variables for regcomp().
X */
Xstatic char *regparse; /* Input-scan pointer. */
Xstatic int regnpar; /* () count. */
Xstatic char regdummy;
Xstatic char *regcode; /* Code-emit pointer; ®dummy = don't. */
Xstatic long regsize; /* Code size. */
X
X/*
X * Forward declarations for regcomp()'s friends.
X */
X#ifndef STATIC
X#define STATIC static
X#endif
XSTATIC char *reg();
XSTATIC char *regbranch();
XSTATIC char *regpiece();
XSTATIC char *regatom();
XSTATIC char *regnode();
XSTATIC char *regnext();
XSTATIC void regc();
XSTATIC void reginsert();
XSTATIC void regtail();
XSTATIC void regoptail();
X#ifdef STRCSPN
XSTATIC int strcspn();
X#endif
X
X/*
X - regcomp - compile a regular expression into internal code
X *
X * We can't allocate space until we know how big the compiled form will be,
X * but we can't compile it (and thus know how big it is) until we've got a
X * place to put the code. So we cheat: we compile it twice, once with code
X * generation turned off and size counting turned on, and once "for real".
X * This also means that we don't allocate space until we are sure that the
X * thing really will compile successfully, and we never have to move the
X * code and thus invalidate pointers into it. (Note that it has to be in
X * one piece because free() must be able to free it all.)
X *
X * Beware that the optimization-preparation code in here knows about some
X * of the structure of the compiled regexp.
X */
Xregexp *
Xregcomp(exp)
Xchar *exp;
X{
X register regexp *r;
X register char *scan;
X register char *longest;
X register int len;
X int flags;
X
X if (exp == NULL)
X FAIL("NULL argument");
X
X /* First pass: determine size, legality. */
X#ifdef notdef
X if (exp[0] == '.' && exp[1] == '*') exp += 2; /* aid grep */
X#endif
X regparse = (char *)exp;
X regnpar = 1;
X regsize = 0L;
X regcode = ®dummy;
X regc(MAGIC);
X if (reg(0, &flags) == NULL)
X return(NULL);
X
X /* Small enough for pointer-storage convention? */
X if (regsize >= 32767L) /* Probably could be 65535L. */
X FAIL("regexp too big");
X
X /* Allocate space. */
X r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
X if (r == NULL)
X FAIL("out of space");
X
X /* Second pass: emit code. */
X regparse = (char *)exp;
X regnpar = 1;
X regcode = r->program;
X regc(MAGIC);
X if (reg(0, &flags) == NULL)
X return(NULL);
X
X /* Dig out information for optimizations. */
X r->regstart = '\0'; /* Worst-case defaults. */
X r->reganch = 0;
X r->regmust = NULL;
X r->regmlen = 0;
X scan = r->program+1; /* First BRANCH. */
X if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
X scan = OPERAND(scan);
X
X /* Starting-point info. */
X if (OP(scan) == EXACTLY)
X r->regstart = *OPERAND(scan);
X else if (OP(scan) == BOL)
X r->reganch++;
X
X /*
X * If there's something expensive in the r.e., find the
X * longest literal string that must appear and make it the
X * regmust. Resolve ties in favor of later strings, since
X * the regstart check works with the beginning of the r.e.
X * and avoiding duplication strengthens checking. Not a
X * strong reason, but sufficient in the absence of others.
X */
X if (flags&SPSTART) {
X longest = NULL;
X len = 0;
X for (; scan != NULL; scan = regnext(scan))
X if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
X longest = OPERAND(scan);
X len = strlen(OPERAND(scan));
X }
X r->regmust = longest;
X r->regmlen = len;
X }
X }
X
X return(r);
X}
X
X/*
X - reg - regular expression, i.e. main body or parenthesized thing
X *
X * Caller must absorb opening parenthesis.
X *
X * Combining parenthesis handling with the base level of regular expression
X * is a trifle forced, but the need to tie the tails of the branches to what
X * follows makes it hard to avoid.
X */
Xstatic char *
Xreg(paren, flagp)
Xint paren; /* Parenthesized? */
Xint *flagp;
X{
X register char *ret;
X register char *br;
X register char *ender;
X register int parno;
X int flags;
X
X *flagp = HASWIDTH; /* Tentatively. */
X
X /* Make an OPEN node, if parenthesized. */
X if (paren) {
X if (regnpar >= NSUBEXP)
X FAIL("too many ()");
X parno = regnpar;
X regnpar++;
X ret = regnode(OPEN+parno);
X } else
X ret = NULL;
X
X /* Pick up the branches, linking them together. */
X br = regbranch(&flags);
X if (br == NULL)
X return(NULL);
X if (ret != NULL)
X regtail(ret, br); /* OPEN -> first. */
X else
X ret = br;
X if (!(flags&HASWIDTH))
X *flagp &= ~HASWIDTH;
X *flagp |= flags&SPSTART;
X while (*regparse == '|' || *regparse == '\n') {
X regparse++;
X br = regbranch(&flags);
X if (br == NULL)
X return(NULL);
X regtail(ret, br); /* BRANCH -> BRANCH. */
X if (!(flags&HASWIDTH))
X *flagp &= ~HASWIDTH;
X *flagp |= flags&SPSTART;
X }
X
X /* Make a closing node, and hook it on the end. */
X ender = regnode((paren) ? CLOSE+parno : END);
X regtail(ret, ender);
X
X /* Hook the tails of the branches to the closing node. */
X for (br = ret; br != NULL; br = regnext(br))
X regoptail(br, ender);
X
X /* Check for proper termination. */
X if (paren && *regparse++ != ')') {
X FAIL("unmatched ()");
X } else if (!paren && *regparse != '\0') {
X if (*regparse == ')') {
X FAIL("unmatched ()");
X } else
X FAIL("junk on end"); /* "Can't happen". */
X /* NOTREACHED */
X }
X
X return(ret);
X}
X
X/*
X - regbranch - one alternative of an | operator
X *
X * Implements the concatenation operator.
X */
Xstatic char *
Xregbranch(flagp)
Xint *flagp;
X{
X register char *ret;
X register char *chain;
X register char *latest;
X int flags;
X
X *flagp = WORST; /* Tentatively. */
X
X ret = regnode(BRANCH);
X chain = NULL;
X while (*regparse != '\0' && *regparse != ')' &&
X *regparse != '\n' && *regparse != '|') {
X latest = regpiece(&flags);
X if (latest == NULL)
X return(NULL);
X *flagp |= flags&HASWIDTH;
X if (chain == NULL) /* First piece. */
X *flagp |= flags&SPSTART;
X else
X regtail(chain, latest);
X chain = latest;
X }
X if (chain == NULL) /* Loop ran zero times. */
X (void) regnode(NOTHING);
X
X return(ret);
X}
X
X/*
X - regpiece - something followed by possible [*+?]
X *
X * Note that the branching code sequences used for ? and the general cases
X * of * and + are somewhat optimized: they use the same NOTHING node as
X * both the endmarker for their branch list and the body of the last branch.
X * It might seem that this node could be dispensed with entirely, but the
X * endmarker role is not redundant.
X */
Xstatic char *
Xregpiece(flagp)
Xint *flagp;
X{
X register char *ret;
X register char op;
X register char *next;
X int flags;
X
X ret = regatom(&flags);
X if (ret == NULL)
X return(NULL);
X
X op = *regparse;
X if (!ISMULT(op)) {
X *flagp = flags;
X return(ret);
X }
X
X if (!(flags&HASWIDTH) && op != '?')
X FAIL("*+ operand could be empty");
X *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
X
X if (op == '*' && (flags&SIMPLE))
X reginsert(STAR, ret);
X else if (op == '*') {
X /* Emit x* as (x&|), where & means "self". */
X reginsert(BRANCH, ret); /* Either x */
X regoptail(ret, regnode(BACK)); /* and loop */
X regoptail(ret, ret); /* back */
X regtail(ret, regnode(BRANCH)); /* or */
X regtail(ret, regnode(NOTHING)); /* null. */
X } else if (op == '+' && (flags&SIMPLE))
X reginsert(PLUS, ret);
X else if (op == '+') {
X /* Emit x+ as x(&|), where & means "self". */
X next = regnode(BRANCH); /* Either */
X regtail(ret, next);
X regtail(regnode(BACK), ret); /* loop back */
X regtail(next, regnode(BRANCH)); /* or */
X regtail(ret, regnode(NOTHING)); /* null. */
X } else if (op == '?') {
X /* Emit x? as (x|) */
X reginsert(BRANCH, ret); /* Either x */
X regtail(ret, regnode(BRANCH)); /* or */
X next = regnode(NOTHING); /* null. */
X regtail(ret, next);
X regoptail(ret, next);
X }
X regparse++;
X if (ISMULT(*regparse))
X FAIL("nested *?+");
X
X return(ret);
X}
X
X/*
X - regatom - the lowest level
X *
X * Optimization: gobbles an entire sequence of ordinary characters so that
X * it can turn them into a single node, which is smaller to store and
X * faster to run. Backslashed characters are exceptions, each becoming a
X * separate node; the code is simpler that way and it's not worth fixing.
X */
Xstatic char *
Xregatom(flagp)
Xint *flagp;
X{
X register char *ret;
X int flags;
X
X *flagp = WORST; /* Tentatively. */
X
X switch (*regparse++) {
X /* FIXME: these chars only have meaning at beg/end of pat? */
X case '^':
X ret = regnode(BOL);
X break;
X case '$':
X ret = regnode(EOL);
X break;
X case '.':
X ret = regnode(ANY);
X *flagp |= HASWIDTH|SIMPLE;
X break;
X case '[': {
X register int class;
X register int classend;
X
X if (*regparse == '^') { /* Complement of range. */
X ret = regnode(ANYBUT);
X regparse++;
X } else
X ret = regnode(ANYOF);
X if (*regparse == ']' || *regparse == '-')
X regc(*regparse++);
X while (*regparse != '\0' && *regparse != ']') {
X if (*regparse == '-') {
X regparse++;
X if (*regparse == ']' || *regparse == '\0')
X regc('-');
X else {
X class = UCHARAT(regparse-2)+1;
X classend = UCHARAT(regparse);
X if (class > classend+1)
X FAIL("invalid [] range");
X for (; class <= classend; class++)
X regc(class);
X regparse++;
X }
X } else
X regc(*regparse++);
X }
X regc('\0');
X if (*regparse != ']')
X FAIL("unmatched []");
X regparse++;
X *flagp |= HASWIDTH|SIMPLE;
X }
X break;
X case '(':
X ret = reg(1, &flags);
X if (ret == NULL)
X return(NULL);
X *flagp |= flags&(HASWIDTH|SPSTART);
X break;
X case '\0':
X case '|':
X case '\n':
X case ')':
X FAIL("internal urp"); /* Supposed to be caught earlier. */
X break;
X case '?':
X case '+':
X case '*':
X FAIL("?+* follows nothing");
X break;
X case '\\':
X switch (*regparse++) {
X case '\0':
X FAIL("trailing \\");
X break;
X case '<':
X ret = regnode(WORDA);
X break;
X case '>':
X ret = regnode(WORDZ);
X break;
X /* FIXME: Someday handle \1, \2, ... */
X default:
X /* Handle general quoted chars in exact-match routine */
X goto de_fault;
X }
X break;
X de_fault:
X default:
X /*
X * Encode a string of characters to be matched exactly.
X *
X * This is a bit tricky due to quoted chars and due to
X * '*', '+', and '?' taking the SINGLE char previous
X * as their operand.
X *
X * On entry, the char at regparse[-1] is going to go
X * into the string, no matter what it is. (It could be
X * following a \ if we are entered from the '\' case.)
X *
X * Basic idea is to pick up a good char in ch and
X * examine the next char. If it's *+? then we twiddle.
X * If it's \ then we frozzle. If it's other magic char
X * we push ch and terminate the string. If none of the
X * above, we push ch on the string and go around again.
X *
X * regprev is used to remember where "the current char"
X * starts in the string, if due to a *+? we need to back
X * up and put the current char in a separate, 1-char, string.
X * When regprev is NULL, ch is the only char in the
X * string; this is used in *+? handling, and in setting
X * flags |= SIMPLE at the end.
X */
X {
X char *regprev;
X register char ch;
X
X regparse--; /* Look at cur char */
X ret = regnode(EXACTLY);
X for ( regprev = 0 ; ; ) {
X ch = *regparse++; /* Get current char */
X switch (*regparse) { /* look at next one */
X
X default:
X regc(ch); /* Add cur to string */
X break;
X
X case '.': case '[': case '(':
X case ')': case '|': case '\n':
X case '$': case '^':
X case '\0':
X /* FIXME, $ and ^ should not always be magic */
X magic:
X regc(ch); /* dump cur char */
X goto done; /* and we are done */
X
X case '?': case '+': case '*':
X if (!regprev) /* If just ch in str, */
X goto magic; /* use it */
X /* End mult-char string one early */
X regparse = regprev; /* Back up parse */
X goto done;
X
X case '\\':
X regc(ch); /* Cur char OK */
X switch (regparse[1]){ /* Look after \ */
X case '\0':
X case '<':
X case '>':
X /* FIXME: Someday handle \1, \2, ... */
X goto done; /* Not quoted */
X default:
X /* Backup point is \, scan * point is after it. */
X regprev = regparse;
X regparse++;
X continue; /* NOT break; */
X }
X }
X regprev = regparse; /* Set backup point */
X }
X done:
X regc('\0');
X *flagp |= HASWIDTH;
X if (!regprev) /* One char? */
X *flagp |= SIMPLE;
X }
X break;
X }
X
X return(ret);
X}
X
X/*
X - regnode - emit a node
X */
Xstatic char * /* Location. */
Xregnode(op)
Xchar op;
X{
X register char *ret;
X register char *ptr;
X
X ret = regcode;
X if (ret == ®dummy) {
X regsize += 3;
X return(ret);
X }
X
X ptr = ret;
X *ptr++ = op;
X *ptr++ = '\0'; /* Null "next" pointer. */
X *ptr++ = '\0';
X regcode = ptr;
X
X return(ret);
X}
X
X/*
X - regc - emit (if appropriate) a byte of code
X */
Xstatic void
Xregc(b)
Xchar b;
X{
X if (regcode != ®dummy)
X *regcode++ = b;
X else
X regsize++;
X}
X
X/*
X - reginsert - insert an operator in front of already-emitted operand
X *
X * Means relocating the operand.
X */
Xstatic void
Xreginsert(op, opnd)
Xchar op;
Xchar *opnd;
X{
X register char *src;
X register char *dst;
X register char *place;
X
X if (regcode == ®dummy) {
X regsize += 3;
X return;
X }
X
X src = regcode;
X regcode += 3;
X dst = regcode;
X while (src > opnd)
X *--dst = *--src;
X
X place = opnd; /* Op node, where operand used to be. */
X *place++ = op;
X *place++ = '\0';
X *place++ = '\0';
X}
X
X/*
X - regtail - set the next-pointer at the end of a node chain
X */
Xstatic void
Xregtail(p, val)
Xchar *p;
Xchar *val;
X{
X register char *scan;
X register char *temp;
X register int offset;
X
X if (p == ®dummy)
X return;
X
X /* Find last node. */
X scan = p;
X for (;;) {
X temp = regnext(scan);
X if (temp == NULL)
X break;
X scan = temp;
X }
X
X if (OP(scan) == BACK)
X offset = scan - val;
X else
X offset = val - scan;
X *(scan+1) = (offset>>8)&0377;
X *(scan+2) = offset&0377;
X}
X
X/*
X - regoptail - regtail on operand of first argument; nop if operandless
X */
Xstatic void
Xregoptail(p, val)
Xchar *p;
Xchar *val;
X{
X /* "Operandless" and "op != BRANCH" are synonymous in practice. */
X if (p == NULL || p == ®dummy || OP(p) != BRANCH)
X return;
X regtail(OPERAND(p), val);
X}
X
X/*
X * regexec and friends
X */
X
X/*
X * Global work variables for regexec().
X */
Xstatic char *reginput; /* String-input pointer. */
Xstatic char *regbol; /* Beginning of input, for ^ check. */
Xstatic char **regstartp; /* Pointer to startp array. */
Xstatic char **regendp; /* Ditto for endp. */
X
X/*
X * Forwards.
X */
XSTATIC int regtry();
XSTATIC int regmatch();
XSTATIC int regrepeat();
X
X#ifdef DEBUG
Xint regnarrate = 0;
Xvoid regdump();
XSTATIC char *regprop();
X#endif
X
X/*
X - regexec - match a regexp against a string
X */
Xint
Xregexec(prog, string)
Xregister regexp *prog;
Xregister char *string;
X{
X register char *s;
X
X /* Be paranoid... */
X if (prog == NULL || string == NULL) {
X regerror("NULL parameter");
X return(0);
X }
X
X /* Check validity of program. */
X if (UCHARAT(prog->program) != MAGIC) {
X regerror("corrupted program");
X return(0);
X }
X
X /* If there is a "must appear" string, look for it. */
X if (prog->regmust != NULL) {
X s = (char *)string;
X while ((s = strchr(s, prog->regmust[0])) != NULL) {
X if (strncmp(s, prog->regmust, prog->regmlen) == 0)
X break; /* Found it. */
X s++;
X }
X if (s == NULL) /* Not present. */
X return(0);
X }
X
X /* Mark beginning of line for ^ . */
X regbol = (char *)string;
X
X /* Simplest case: anchored match need be tried only once. */
X if (prog->reganch)
X return(regtry(prog, string));
X
X /* Messy cases: unanchored match. */
X s = (char *)string;
X if (prog->regstart != '\0')
X /* We know what char it must start with. */
X while ((s = strchr(s, prog->regstart)) != NULL) {
X if (regtry(prog, s))
X return(1);
X s++;
X }
X else
X /* We don't -- general case. */
X do {
X if (regtry(prog, s))
X return(1);
X } while (*s++ != '\0');
X
X /* Failure. */
X return(0);
X}
X
X/*
X - regtry - try match at specific point
X */
Xstatic int /* 0 failure, 1 success */
Xregtry(prog, string)
Xregexp *prog;
Xchar *string;
X{
X register int i;
X register char **sp;
X register char **ep;
X
X reginput = string;
X regstartp = prog->startp;
X regendp = prog->endp;
X
X sp = prog->startp;
X ep = prog->endp;
X for (i = NSUBEXP; i > 0; i--) {
X *sp++ = NULL;
X *ep++ = NULL;
X }
X if (regmatch(prog->program + 1)) {
X prog->startp[0] = string;
X prog->endp[0] = reginput;
X return(1);
X } else
X return(0);
X}
X
X/*
X - regmatch - main matching routine
X *
X * Conceptually the strategy is simple: check to see whether the current
X * node matches, call self recursively to see whether the rest matches,
X * and then act accordingly. In practice we make some effort to avoid
X * recursion, in particular by going through "ordinary" nodes (that don't
X * need to know whether the rest of the match failed) by a loop instead of
X * by recursion.
X */
Xstatic int /* 0 failure, 1 success */
Xregmatch(prog)
Xchar *prog;
X{
X register char *scan; /* Current node. */
X char *next; /* Next node. */
X
X scan = prog;
X#ifdef DEBUG
X if (scan != NULL && regnarrate)
X fprintf(stderr, "%s(\n", regprop(scan));
X#endif
X while (scan != NULL) {
X#ifdef DEBUG
X if (regnarrate)
X fprintf(stderr, "%s...\n", regprop(scan));
X#endif
X next = regnext(scan);
X
X switch (OP(scan)) {
X case BOL:
X if (reginput != regbol)
X return(0);
X break;
X case EOL:
X if (*reginput != '\0')
X return(0);
X break;
X case WORDA:
X /* Must be looking at a letter, digit, or _ */
X if ((!isalnum(*reginput)) && *reginput != '_')
X return(0);
X /* Prev must be BOL or nonword */
X if (reginput > regbol &&
X (isalnum(reginput[-1]) || reginput[-1] == '_'))
X return(0);
X break;
X case WORDZ:
X /* Must be looking at non letter, digit, or _ */
X if (isalnum(*reginput) || *reginput == '_')
X return(0);
X /* We don't care what the previous char was */
X break;
X case ANY:
X if (*reginput == '\0')
X return(0);
X reginput++;
X break;
X case EXACTLY: {
X register int len;
X register char *opnd;
X
X opnd = OPERAND(scan);
X /* Inline the first character, for speed. */
X if (*opnd != *reginput)
X return(0);
X len = strlen(opnd);
X if (len > 1 && strncmp(opnd, reginput, len) != 0)
X return(0);
X reginput += len;
X }
X break;
X case ANYOF:
X if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
X return(0);
X reginput++;
X break;
X case ANYBUT:
X if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
X return(0);
X reginput++;
X break;
X case NOTHING:
X break;
X case BACK:
X break;
X case OPEN+1:
X case OPEN+2:
X case OPEN+3:
X case OPEN+4:
X case OPEN+5:
X case OPEN+6:
X case OPEN+7:
X case OPEN+8:
X case OPEN+9: {
X register int no;
X register char *save;
X
X no = OP(scan) - OPEN;
X save = reginput;
X
X if (regmatch(next)) {
X /*
X * Don't set startp if some later
X * invocation of the same parentheses
X * already has.
X */
X if (regstartp[no] == NULL)
X regstartp[no] = save;
X return(1);
X } else
X return(0);
X }
X break;
X case CLOSE+1:
X case CLOSE+2:
X case CLOSE+3:
X case CLOSE+4:
X case CLOSE+5:
X case CLOSE+6:
X case CLOSE+7:
X case CLOSE+8:
X case CLOSE+9: {
X register int no;
X register char *save;
X
X no = OP(scan) - CLOSE;
X save = reginput;
X
X if (regmatch(next)) {
X /*
X * Don't set endp if some later
X * invocation of the same parentheses
X * already has.
X */
X if (regendp[no] == NULL)
X regendp[no] = save;
X return(1);
X } else
X return(0);
X }
X break;
X case BRANCH: {
X register char *save;
X
X if (OP(next) != BRANCH) /* No choice. */
X next = OPERAND(scan); /* Avoid recursion. */
X else {
X do {
X save = reginput;
X if (regmatch(OPERAND(scan)))
X return(1);
X reginput = save;
X scan = regnext(scan);
X } while (scan != NULL && OP(scan) == BRANCH);
X return(0);
X /* NOTREACHED */
X }
X }
X break;
X case STAR:
X case PLUS: {
X register char nextch;
X register int no;
X register char *save;
X register int min;
X
X /*
X * Lookahead to avoid useless match attempts
X * when we know what character comes next.
X */
X nextch = '\0';
X if (OP(next) == EXACTLY)
X nextch = *OPERAND(next);
X min = (OP(scan) == STAR) ? 0 : 1;
X save = reginput;
X no = regrepeat(OPERAND(scan));
X while (no >= min) {
X /* If it could work, try it. */
X if (nextch == '\0' || *reginput == nextch)
X if (regmatch(next))
X return(1);
X /* Couldn't or didn't -- back up. */
X no--;
X reginput = save + no;
X }
X return(0);
X }
X break;
X case END:
X return(1); /* Success! */
X break;
X default:
X regerror("memory corruption");
X return(0);
X break;
X }
X
X scan = next;
X }
X
X /*
X * We get here only if there's trouble -- normally "case END" is
X * the terminating point.
X */
X regerror("corrupted pointers");
X return(0);
X}
X
X/*
X - regrepeat - repeatedly match something simple, report how many
X */
Xstatic int
Xregrepeat(p)
Xchar *p;
X{
X register int count = 0;
X register char *scan;
X register char *opnd;
X
X scan = reginput;
X opnd = OPERAND(p);
X switch (OP(p)) {
X case ANY:
X count = strlen(scan);
X scan += count;
X break;
X case EXACTLY:
X while (*opnd == *scan) {
X count++;
X scan++;
X }
X break;
X case ANYOF:
X while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
X count++;
X scan++;
X }
X break;
X case ANYBUT:
X while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
X count++;
X scan++;
X }
X break;
X default: /* Oh dear. Called inappropriately. */
X regerror("internal foulup");
X count = 0; /* Best compromise. */
X break;
X }
X reginput = scan;
X
X return(count);
X}
X
X/*
X - regnext - dig the "next" pointer out of a node
X */
Xstatic char *
Xregnext(p)
Xregister char *p;
X{
X register int offset;
X
X if (p == ®dummy)
X return(NULL);
X
X offset = NEXT(p);
X if (offset == 0)
X return(NULL);
X
X if (OP(p) == BACK)
X return(p-offset);
X else
X return(p+offset);
X}
X
X#ifdef DEBUG
X
XSTATIC char *regprop();
X
X/*
X - regdump - dump a regexp onto stdout in vaguely comprehensible form
X */
Xvoid
Xregdump(r)
Xregexp *r;
X{
X register char *s;
X register char op = EXACTLY; /* Arbitrary non-END op. */
X register char *next;
X
X
X s = r->program + 1;
X while (op != END) { /* While that wasn't END last time... */
X op = OP(s);
X printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
X next = regnext(s);
X if (next == NULL) /* Next ptr. */
X printf("(0)");
X else
X printf("(%d)", (s-r->program)+(next-s));
X s += 3;
X if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
X /* Literal string, where present. */
X while (*s != '\0') {
X putchar(*s);
X s++;
X }
X s++;
X }
X putchar('\n');
X }
X
X /* Header fields of interest. */
X if (r->regstart != '\0')
X printf("start `%c' ", r->regstart);
X if (r->reganch)
X printf("anchored ");
X if (r->regmust != NULL)
X printf("must have \"%s\"", r->regmust);
X printf("\n");
X}
X
X/*
X - regprop - printable representation of opcode
X */
Xstatic char *
Xregprop(op)
Xchar *op;
X{
X register char *p;
X static char buf[50];
X
X (void) strcpy(buf, ":");
X
X switch (OP(op)) {
X case BOL:
X p = "BOL";
X break;
X case EOL:
X p = "EOL";
X break;
X case ANY:
X p = "ANY";
X break;
X case ANYOF:
X p = "ANYOF";
X break;
X case ANYBUT:
X p = "ANYBUT";
X break;
X case BRANCH:
X p = "BRANCH";
X break;
X case EXACTLY:
X p = "EXACTLY";
X break;
X case NOTHING:
X p = "NOTHING";
X break;
X case BACK:
X p = "BACK";
X break;
X case END:
X p = "END";
X break;
X case OPEN+1:
X case OPEN+2:
X case OPEN+3:
X case OPEN+4:
X case OPEN+5:
X case OPEN+6:
X case OPEN+7:
X case OPEN+8:
X case OPEN+9:
X sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
X p = NULL;
X break;
X case CLOSE+1:
X case CLOSE+2:
X case CLOSE+3:
X case CLOSE+4:
X case CLOSE+5:
X case CLOSE+6:
X case CLOSE+7:
X case CLOSE+8:
X case CLOSE+9:
X sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
X p = NULL;
X break;
X case STAR:
X p = "STAR";
X break;
X case PLUS:
X p = "PLUS";
X break;
X case WORDA:
X p = "WORDA";
X break;
X case WORDZ:
X p = "WORDZ";
X break;
X default:
X regerror("corrupted opcode");
X break;
X }
X if (p != NULL)
X (void) strcat(buf, p);
X return(buf);
X}
X#endif
X
X/*
X * The following is provided for those people who do not have strcspn() in
X * their C libraries. They should get off their butts and do something
X * about it; at least one public-domain implementation of those (highly
X * useful) string routines has been published on Usenet.
X */
X#ifdef STRCSPN
X/*
X * strcspn - find length of initial segment of s1 consisting entirely
X * of characters not from s2
X */
X
Xstatic int
Xstrcspn(s1, s2)
Xchar *s1;
Xchar *s2;
X{
X register char *scan1;
X register char *scan2;
X register int count;
X
X count = 0;
X for (scan1 = s1; *scan1 != '\0'; scan1++) {
X for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */
X if (*scan1 == *scan2++)
X return(count);
X count++;
X }
X return(count);
X}
X#endif
SHAR_EOF
chmod 0444 regexp.c ||
echo 'restore of regexp.c failed'
Wc_c="`wc -c < 'regexp.c'`"
test 31610 -eq "$Wc_c" ||
echo 'regexp.c: original size 31610, current size' "$Wc_c"
fi
true || echo 'restore of regexp.h failed'
echo End of part 3, continue with part 4
exit 0