home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
unix
/
volume19
/
flex2
/
part03
< prev
next >
Wrap
Text File
|
1989-06-21
|
37KB
|
1,433 lines
Subject: v19i057: Flex, a fast LEX replacement, Part03/07
Newsgroups: comp.sources.unix
Sender: sources
Approved: rsalz@uunet.UU.NET
Submitted-by: Vern Paxson <vern@csam.lbl.gov>
Posting-number: Volume 19, Issue 57
Archive-name: flex2/part03
#! /bin/sh
# This is a shell archive. Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file". To overwrite existing
# files, type "sh file -c". You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g.. If this archive is complete, you
# will see the following message at the end:
# "End of archive 3 (of 7)."
# Contents: flex/main.c flex/nfa.c
# Wrapped by rsalz@prune.bbn.com on Thu Jun 22 19:01:46 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'flex/main.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'flex/main.c'\"
else
echo shar: Extracting \"'flex/main.c'\" \(16556 characters\)
sed "s/^X//" >'flex/main.c' <<'END_OF_FILE'
X/* flex - tool to generate fast lexical analyzers
X *
X *
X * Copyright (c) 1989 The Regents of the University of California.
X * All rights reserved.
X *
X * This code is derived from software contributed to Berkeley by
X * Vern Paxson.
X *
X * The United States Government has rights in this work pursuant to
X * contract no. DE-AC03-76SF00098 between the United States Department of
X * Energy and the University of California.
X *
X * Redistribution and use in source and binary forms are permitted
X * provided that the above copyright notice and this paragraph are
X * duplicated in all such forms and that any documentation,
X * advertising materials, and other materials related to such
X * distribution and use acknowledge that the software was developed
X * by the University of California, Berkeley. The name of the
X * University may not be used to endorse or promote products derived
X * from this software without specific prior written permission.
X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
X *
X */
X
X#ifndef lint
X
Xstatic char copyright[] =
X "@(#) Copyright (c) 1989 The Regents of the University of California.\n";
Xstatic char CR_continuation[] = "@(#) All rights reserved.\n";
X
Xstatic char rcsid[] =
X "@(#) $Header: main.c,v 2.2 89/06/20 16:36:26 vern Exp $ (LBL)";
X
X#endif
X
X
X#include "flexdef.h"
X
Xstatic char flex_version[] = "2.1 (beta)";
X
X
X/* these globals are all defined and commented in flexdef.h */
Xint printstats, syntaxerror, eofseen, ddebug, trace, spprdflt;
Xint interactive, caseins, useecs, fulltbl, usemecs;
Xint fullspd, gen_line_dirs, performance_report, backtrack_report;
Xint yymore_used, reject, real_reject, continued_action;
Xint yymore_really_used, reject_really_used;
Xint datapos, dataline, linenum;
XFILE *skelfile = NULL;
Xchar *infilename = NULL;
Xint onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
Xint onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
Xint current_mns, num_rules, current_max_rules, lastnfa;
Xint *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
Xint *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum;
Xint current_state_type;
Xint variable_trailing_context_rules;
Xint numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
Xint protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
Xint numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1];
Xint tecbck[CSIZE + 1];
Xint lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc;
Xchar **scname;
Xint current_max_dfa_size, current_max_xpairs;
Xint current_max_template_xpairs, current_max_dfas;
Xint lastdfa, *nxt, *chk, *tnxt;
Xint *base, *def, tblend, firstfree, **dss, *dfasiz;
Xunion dfaacc_union *dfaacc;
Xint *accsiz, *dhash, numas;
Xint numsnpairs, jambase, jamstate;
Xint lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
Xint current_max_ccl_tbl_size;
Xchar *ccltbl;
Xchar *starttime, *endtime, nmstr[MAXLINE];
Xint sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
Xint tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
Xint num_backtracking, bol_needed;
XFILE *temp_action_file;
XFILE *backtrack_file;
Xint end_of_buffer_state;
X#ifndef SHORT_FILE_NAMES
Xchar action_file_name[] = "/tmp/flexXXXXXX";
X#else
Xchar action_file_name[] = "flexXXXXXX.tmp";
X#endif
X
X#ifndef SHORT_FILE_NAMES
Xstatic char outfile[] = "lex.yy.c";
X#else
Xstatic char outfile[] = "lexyy.c";
X#endif
Xstatic int outfile_created = 0;
X
X
X/* flex - main program
X *
X * synopsis (from the shell)
X * flex [-v] [file ...]
X */
X
Xmain( argc, argv )
Xint argc;
Xchar **argv;
X
X {
X flexinit( argc, argv );
X
X readin();
X
X if ( syntaxerror )
X flexend( 1 );
X
X if ( yymore_really_used == REALLY_USED )
X yymore_used = true;
X else if ( yymore_really_used == REALLY_NOT_USED )
X yymore_used = false;
X
X if ( reject_really_used == REALLY_USED )
X reject = true;
X else if ( reject_really_used == REALLY_NOT_USED )
X reject = false;
X
X if ( performance_report )
X {
X if ( yymore_used )
X fprintf( stderr,
X "yymore() entails a minor performance penalty\n" );
X
X if ( interactive )
X fprintf( stderr,
X "-I (interactive) entails a minor performance penalty\n" );
X
X if ( reject )
X fprintf( stderr,
X "REJECT entails a large performance penalty\n" );
X
X if ( variable_trailing_context_rules )
X fprintf( stderr,
X"Variable trailing context rules entail a large performance penalty\n" );
X }
X
X if ( reject )
X real_reject = true;
X
X if ( variable_trailing_context_rules )
X reject = true;
X
X if ( (fulltbl || fullspd) && reject )
X {
X if ( real_reject )
X flexerror( "REJECT cannot be used with -f or -F" );
X else
X flexerror(
X "variable trailing context rules cannot be used with -f or -F" );
X }
X
X /* convert the ndfa to a dfa */
X ntod();
X
X /* generate the C state transition tables from the DFA */
X make_tables();
X
X /* note, flexend does not return. It exits with its argument as status. */
X
X flexend( 0 );
X
X /*NOTREACHED*/
X }
X
X
X/* flexend - terminate flex
X *
X * synopsis
X * int status;
X * flexend( status );
X *
X * status is exit status.
X *
X * note
X * This routine does not return.
X */
X
Xflexend( status )
Xint status;
X
X {
X int tblsiz;
X char *flex_gettime();
X
X if ( skelfile != NULL )
X (void) fclose( skelfile );
X
X if ( temp_action_file )
X {
X (void) fclose( temp_action_file );
X (void) unlink( action_file_name );
X }
X
X if ( status != 0 && outfile_created )
X {
X (void) fclose( stdout );
X (void) unlink( outfile );
X }
X
X if ( backtrack_report )
X {
X if ( num_backtracking == 0 )
X fprintf( backtrack_file, "No backtracking.\n" );
X else if ( fullspd || fulltbl )
X fprintf( backtrack_file,
X "%d backtracking (non-accepting) states.\n",
X num_backtracking );
X else
X fprintf( backtrack_file, "Compressed tables always backtrack.\n" );
X
X (void) fclose( backtrack_file );
X }
X
X if ( printstats )
X {
X endtime = flex_gettime();
X
X fprintf( stderr, "flex version %s usage statistics:\n", flex_version );
X fprintf( stderr, " started at %s, finished at %s\n",
X starttime, endtime );
X
X fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns );
X fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa,
X current_max_dfas, totnst );
X fprintf( stderr, " %d rules\n", num_rules - 1 /* - 1 for def. rule */ );
X
X if ( num_backtracking == 0 )
X fprintf( stderr, " No backtracking\n" );
X else if ( fullspd || fulltbl )
X fprintf( stderr, " %d backtracking (non-accepting) states\n",
X num_backtracking );
X else
X fprintf( stderr, " compressed tables always backtrack\n" );
X
X if ( bol_needed )
X fprintf( stderr, " Beginning-of-line patterns used\n" );
X
X fprintf( stderr, " %d/%d start conditions\n", lastsc,
X current_max_scs );
X fprintf( stderr, " %d epsilon states, %d double epsilon states\n",
X numeps, eps2 );
X
X if ( lastccl == 0 )
X fprintf( stderr, " no character classes\n" );
X else
X fprintf( stderr,
X " %d/%d character classes needed %d/%d words of storage, %d reused\n",
X lastccl, current_maxccls,
X cclmap[lastccl] + ccllen[lastccl],
X current_max_ccl_tbl_size, cclreuse );
X
X fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs );
X fprintf( stderr, " %d/%d unique/duplicate transitions\n",
X numuniq, numdup );
X
X if ( fulltbl )
X {
X tblsiz = lastdfa * numecs;
X fprintf( stderr, " %d table entries\n", tblsiz );
X }
X
X else
X {
X tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend;
X
X fprintf( stderr, " %d/%d base-def entries created\n",
X lastdfa + numtemps, current_max_dfas );
X fprintf( stderr, " %d/%d (peak %d) nxt-chk entries created\n",
X tblend, current_max_xpairs, peakpairs );
X fprintf( stderr,
X " %d/%d (peak %d) template nxt-chk entries created\n",
X numtemps * nummecs, current_max_template_xpairs,
X numtemps * numecs );
X fprintf( stderr, " %d empty table entries\n", nummt );
X fprintf( stderr, " %d protos created\n", numprots );
X fprintf( stderr, " %d templates created, %d uses\n",
X numtemps, tmpuses );
X }
X
X if ( useecs )
X {
X tblsiz = tblsiz + CSIZE;
X fprintf( stderr, " %d/%d equivalence classes created\n",
X numecs, CSIZE );
X }
X
X if ( usemecs )
X {
X tblsiz = tblsiz + numecs;
X fprintf( stderr, " %d/%d meta-equivalence classes created\n",
X nummecs, CSIZE );
X }
X
X fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n",
X hshcol, hshsave, dfaeql );
X fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs );
X fprintf( stderr, " %d total table entries needed\n", tblsiz );
X }
X
X#ifndef VMS
X exit( status );
X#else
X exit( status + 1 );
X#endif
X }
X
X
X/* flexinit - initialize flex
X *
X * synopsis
X * int argc;
X * char **argv;
X * flexinit( argc, argv );
X */
X
Xflexinit( argc, argv )
Xint argc;
Xchar **argv;
X
X {
X int i, sawcmpflag, use_stdout;
X char *arg, *skelname = NULL, *flex_gettime(), clower(), *mktemp();
X
X printstats = syntaxerror = trace = spprdflt = interactive = caseins = false;
X backtrack_report = performance_report = ddebug = fulltbl = fullspd = false;
X yymore_used = continued_action = reject = false;
X yymore_really_used = reject_really_used = false;
X gen_line_dirs = usemecs = useecs = true;
X
X sawcmpflag = false;
X use_stdout = false;
X
X /* read flags */
X for ( --argc, ++argv; argc ; --argc, ++argv )
X {
X if ( argv[0][0] != '-' || argv[0][1] == '\0' )
X break;
X
X arg = argv[0];
X
X for ( i = 1; arg[i] != '\0'; ++i )
X switch ( arg[i] )
X {
X case 'b':
X backtrack_report = true;
X break;
X
X case 'c':
X if ( i != 1 )
X flexerror( "-c flag must be given separately" );
X
X if ( ! sawcmpflag )
X {
X useecs = false;
X usemecs = false;
X fulltbl = false;
X sawcmpflag = true;
X }
X
X for ( ++i; arg[i] != '\0'; ++i )
X switch ( clower( arg[i] ) )
X {
X case 'e':
X useecs = true;
X break;
X
X case 'F':
X fullspd = true;
X break;
X
X case 'f':
X fulltbl = true;
X break;
X
X case 'm':
X usemecs = true;
X break;
X
X default:
X lerrif( "unknown -c option %c",
X (int) arg[i] );
X break;
X }
X
X goto get_next_arg;
X
X case 'd':
X ddebug = true;
X break;
X
X case 'f':
X useecs = usemecs = false;
X fulltbl = true;
X break;
X
X case 'F':
X useecs = usemecs = false;
X fullspd = true;
X break;
X
X case 'I':
X interactive = true;
X break;
X
X case 'i':
X caseins = true;
X break;
X
X case 'L':
X gen_line_dirs = false;
X break;
X
X case 'p':
X performance_report = true;
X break;
X
X case 'S':
X if ( i != 1 )
X flexerror( "-S flag must be given separately" );
X
X skelname = arg + i + 1;
X goto get_next_arg;
X
X case 's':
X spprdflt = true;
X break;
X
X case 't':
X use_stdout = true;
X break;
X
X case 'T':
X trace = true;
X break;
X
X case 'v':
X printstats = true;
X break;
X
X default:
X lerrif( "unknown flag %c", (int) arg[i] );
X break;
X }
X
Xget_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */
X ;
X }
X
X if ( (fulltbl || fullspd) && usemecs )
X flexerror( "full table and -cm don't make sense together" );
X
X if ( (fulltbl || fullspd) && interactive )
X flexerror( "full table and -I are (currently) incompatible" );
X
X if ( fulltbl && fullspd )
X flexerror( "full table and -F are mutually exclusive" );
X
X if ( ! skelname )
X {
X static char skeleton_name_storage[400];
X
X skelname = skeleton_name_storage;
X (void) strcpy( skelname, DEFAULT_SKELETON_FILE );
X }
X
X if ( ! use_stdout )
X {
X FILE *prev_stdout = freopen( outfile, "w", stdout );
X
X if ( prev_stdout == NULL )
X flexerror( "could not create lex.yy.c" );
X
X outfile_created = 1;
X }
X
X if ( argc )
X {
X if ( argc > 1 )
X flexerror( "extraneous argument(s) given" );
X
X yyin = fopen( infilename = argv[0], "r" );
X
X if ( yyin == NULL )
X lerrsf( "can't open %s", argv[0] );
X }
X
X else
X yyin = stdin;
X
X if ( backtrack_report )
X {
X#ifndef SHORT_FILE_NAMES
X backtrack_file = fopen( "lex.backtrack", "w" );
X#else
X backtrack_file = fopen( "lex.bck", "w" );
X#endif
X
X if ( backtrack_file == NULL )
X flexerror( "could not create lex.backtrack" );
X }
X
X else
X backtrack_file = NULL;
X
X
X lastccl = 0;
X lastsc = 0;
X
X /* initialize the statistics */
X starttime = flex_gettime();
X
X if ( (skelfile = fopen( skelname, "r" )) == NULL )
X lerrsf( "can't open skeleton file %s", skelname );
X
X (void) mktemp( action_file_name );
X
X if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL )
X lerrsf( "can't open temporary action file %s", action_file_name );
X
X lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0;
X numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0;
X numuniq = numdup = hshsave = eofseen = datapos = dataline = 0;
X num_backtracking = onesp = numprots = 0;
X variable_trailing_context_rules = bol_needed = false;
X
X linenum = sectnum = 1;
X firstprot = NIL;
X
X /* used in mkprot() so that the first proto goes in slot 1
X * of the proto queue
X */
X lastprot = 1;
X
X if ( useecs )
X {
X /* set up doubly-linked equivalence classes */
X ecgroup[1] = NIL;
X
X for ( i = 2; i <= CSIZE; ++i )
X {
X ecgroup[i] = i - 1;
X nextecm[i - 1] = i;
X }
X
X nextecm[CSIZE] = NIL;
X }
X
X else
X { /* put everything in its own equivalence class */
X for ( i = 1; i <= CSIZE; ++i )
X {
X ecgroup[i] = i;
X nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */
X }
X }
X
X set_up_initial_allocations();
X }
X
X
X/* readin - read in the rules section of the input file(s)
X *
X * synopsis
X * readin();
X */
X
Xreadin()
X
X {
X if ( ddebug )
X puts( "#define FLEX_DEBUG" );
X
X if ( fulltbl )
X puts( "#define FLEX_FULL_TABLE" );
X else if ( fullspd )
X puts( "#define FLEX_FAST_COMPRESSED" );
X else
X puts( "#define FLEX_COMPRESSED" );
X
X skelout();
X
X line_directive_out( stdout );
X
X if ( yyparse() )
X lerrif( "fatal parse error at line %d", linenum );
X
X if ( useecs )
X {
X numecs = cre8ecs( nextecm, ecgroup, CSIZE );
X ccl2ecl();
X }
X
X else
X numecs = CSIZE;
X
X }
X
X
X
X/* set_up_initial_allocations - allocate memory for internal tables */
X
Xset_up_initial_allocations()
X
X {
X current_mns = INITIAL_MNS;
X firstst = allocate_integer_array( current_mns );
X lastst = allocate_integer_array( current_mns );
X finalst = allocate_integer_array( current_mns );
X transchar = allocate_integer_array( current_mns );
X trans1 = allocate_integer_array( current_mns );
X trans2 = allocate_integer_array( current_mns );
X accptnum = allocate_integer_array( current_mns );
X assoc_rule = allocate_integer_array( current_mns );
X state_type = allocate_integer_array( current_mns );
X
X current_max_rules = INITIAL_MAX_RULES;
X rule_type = allocate_integer_array( current_max_rules );
X rule_linenum = allocate_integer_array( current_max_rules );
X
X current_max_scs = INITIAL_MAX_SCS;
X scset = allocate_integer_array( current_max_scs );
X scbol = allocate_integer_array( current_max_scs );
X scxclu = allocate_integer_array( current_max_scs );
X sceof = allocate_integer_array( current_max_scs );
X scname = allocate_char_ptr_array( current_max_scs );
X actvsc = allocate_integer_array( current_max_scs );
X
X current_maxccls = INITIAL_MAX_CCLS;
X cclmap = allocate_integer_array( current_maxccls );
X ccllen = allocate_integer_array( current_maxccls );
X cclng = allocate_integer_array( current_maxccls );
X
X current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE;
X ccltbl = allocate_character_array( current_max_ccl_tbl_size );
X
X current_max_dfa_size = INITIAL_MAX_DFA_SIZE;
X
X current_max_xpairs = INITIAL_MAX_XPAIRS;
X nxt = allocate_integer_array( current_max_xpairs );
X chk = allocate_integer_array( current_max_xpairs );
X
X current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS;
X tnxt = allocate_integer_array( current_max_template_xpairs );
X
X current_max_dfas = INITIAL_MAX_DFAS;
X base = allocate_integer_array( current_max_dfas );
X def = allocate_integer_array( current_max_dfas );
X dfasiz = allocate_integer_array( current_max_dfas );
X accsiz = allocate_integer_array( current_max_dfas );
X dhash = allocate_integer_array( current_max_dfas );
X dss = allocate_int_ptr_array( current_max_dfas );
X dfaacc = allocate_dfaacc_union( current_max_dfas );
X }
END_OF_FILE
if test 16556 -ne `wc -c <'flex/main.c'`; then
echo shar: \"'flex/main.c'\" unpacked with wrong size!
fi
# end of 'flex/main.c'
fi
if test -f 'flex/nfa.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'flex/nfa.c'\"
else
echo shar: Extracting \"'flex/nfa.c'\" \(17293 characters\)
sed "s/^X//" >'flex/nfa.c' <<'END_OF_FILE'
X/* nfa - NFA construction routines */
X
X/*
X * Copyright (c) 1989 The Regents of the University of California.
X * All rights reserved.
X *
X * This code is derived from software contributed to Berkeley by
X * Vern Paxson.
X *
X * The United States Government has rights in this work pursuant to
X * contract no. DE-AC03-76SF00098 between the United States Department of
X * Energy and the University of California.
X *
X * Redistribution and use in source and binary forms are permitted
X * provided that the above copyright notice and this paragraph are
X * duplicated in all such forms and that any documentation,
X * advertising materials, and other materials related to such
X * distribution and use acknowledge that the software was developed
X * by the University of California, Berkeley. The name of the
X * University may not be used to endorse or promote products derived
X * from this software without specific prior written permission.
X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
X */
X
X#ifndef lint
X
Xstatic char copyright[] =
X "@(#) Copyright (c) 1989 The Regents of the University of California.\n";
Xstatic char CR_continuation[] = "@(#) All rights reserved.\n";
X
Xstatic char rcsid[] =
X "@(#) $Header: nfa.c,v 2.0 89/06/20 15:50:05 vern Locked $ (LBL)";
X
X#endif
X
X#include "flexdef.h"
X
X/* add_accept - add an accepting state to a machine
X *
X * synopsis
X *
X * add_accept( mach, accepting_number );
X *
X * accepting_number becomes mach's accepting number.
X */
X
Xadd_accept( mach, accepting_number )
Xint mach;
X
X {
X /* hang the accepting number off an epsilon state. if it is associated
X * with a state that has a non-epsilon out-transition, then the state
X * will accept BEFORE it makes that transition, i.e., one character
X * too soon
X */
X
X if ( transchar[finalst[mach]] == SYM_EPSILON )
X accptnum[finalst[mach]] = accepting_number;
X
X else
X {
X int astate = mkstate( SYM_EPSILON );
X accptnum[astate] = accepting_number;
X mach = link_machines( mach, astate );
X }
X }
X
X
X/* copysingl - make a given number of copies of a singleton machine
X *
X * synopsis
X *
X * newsng = copysingl( singl, num );
X *
X * newsng - a new singleton composed of num copies of singl
X * singl - a singleton machine
X * num - the number of copies of singl to be present in newsng
X */
X
Xint copysingl( singl, num )
Xint singl, num;
X
X {
X int copy, i;
X
X copy = mkstate( SYM_EPSILON );
X
X for ( i = 1; i <= num; ++i )
X copy = link_machines( copy, dupmachine( singl ) );
X
X return ( copy );
X }
X
X
X/* dumpnfa - debugging routine to write out an nfa
X *
X * synopsis
X * int state1;
X * dumpnfa( state1 );
X */
X
Xdumpnfa( state1 )
Xint state1;
X
X {
X int sym, tsp1, tsp2, anum, ns;
X
X fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n",
X state1 );
X
X /* we probably should loop starting at firstst[state1] and going to
X * lastst[state1], but they're not maintained properly when we "or"
X * all of the rules together. So we use our knowledge that the machine
X * starts at state 1 and ends at lastnfa.
X */
X
X /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
X for ( ns = 1; ns <= lastnfa; ++ns )
X {
X fprintf( stderr, "state # %4d\t", ns );
X
X sym = transchar[ns];
X tsp1 = trans1[ns];
X tsp2 = trans2[ns];
X anum = accptnum[ns];
X
X fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 );
X
X if ( anum != NIL )
X fprintf( stderr, " [%d]", anum );
X
X fprintf( stderr, "\n" );
X }
X
X fprintf( stderr, "********** end of dump\n" );
X }
X
X
X/* dupmachine - make a duplicate of a given machine
X *
X * synopsis
X *
X * copy = dupmachine( mach );
X *
X * copy - holds duplicate of mach
X * mach - machine to be duplicated
X *
X * note that the copy of mach is NOT an exact duplicate; rather, all the
X * transition states values are adjusted so that the copy is self-contained,
X * as the original should have been.
X *
X * also note that the original MUST be contiguous, with its low and high
X * states accessible by the arrays firstst and lastst
X */
X
Xint dupmachine( mach )
Xint mach;
X
X {
X int i, init, state_offset;
X int state = 0;
X int last = lastst[mach];
X
X for ( i = firstst[mach]; i <= last; ++i )
X {
X state = mkstate( transchar[i] );
X
X if ( trans1[i] != NO_TRANSITION )
X {
X mkxtion( finalst[state], trans1[i] + state - i );
X
X if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION )
X mkxtion( finalst[state], trans2[i] + state - i );
X }
X
X accptnum[state] = accptnum[i];
X }
X
X if ( state == 0 )
X flexfatal( "empty machine in dupmachine()" );
X
X state_offset = state - i + 1;
X
X init = mach + state_offset;
X firstst[init] = firstst[mach] + state_offset;
X finalst[init] = finalst[mach] + state_offset;
X lastst[init] = lastst[mach] + state_offset;
X
X return ( init );
X }
X
X/* finish_rule - finish up the processing for a rule
X *
X * synopsis
X *
X * finish_rule( mach, variable_trail_rule, headcnt, trailcnt );
X *
X * An accepting number is added to the given machine. If variable_trail_rule
X * is true then the rule has trailing context and both the head and trail
X * are variable size. Otherwise if headcnt or trailcnt is non-zero then
X * the machine recognizes a pattern with trailing context and headcnt is
X * the number of characters in the matched part of the pattern, or zero
X * if the matched part has variable length. trailcnt is the number of
X * trailing context characters in the pattern, or zero if the trailing
X * context has variable length.
X */
X
Xfinish_rule( mach, variable_trail_rule, headcnt, trailcnt )
Xint mach, variable_trail_rule, headcnt, trailcnt;
X
X {
X add_accept( mach, num_rules );
X
X /* we did this in new_rule(), but it often gets the wrong
X * number because we do it before we start parsing the current rule
X */
X rule_linenum[num_rules] = linenum;
X
X fprintf( temp_action_file, "case %d:\n", num_rules );
X
X if ( variable_trail_rule )
X {
X rule_type[num_rules] = RULE_VARIABLE;
X
X if ( performance_report )
X fprintf( stderr, "Variable trailing context rule at line %d\n",
X rule_linenum[num_rules] );
X
X variable_trailing_context_rules = true;
X }
X
X else
X {
X rule_type[num_rules] = RULE_NORMAL;
X
X if ( headcnt > 0 || trailcnt > 0 )
X {
X /* do trailing context magic to not match the trailing characters */
X char *scanner_cp = "yy_c_buf_p = yy_cp";
X char *scanner_bp = "yy_bp";
X
X fprintf( temp_action_file,
X "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
X
X if ( headcnt > 0 )
X {
X if ( headcnt > 0 )
X fprintf( temp_action_file, "%s = %s + %d;\n",
X scanner_cp, scanner_bp, headcnt );
X
X else
X fprintf( temp_action_file, "%s = %s;\n",
X scanner_cp, scanner_bp );
X }
X
X else
X fprintf( temp_action_file,
X "%s -= %d;\n", scanner_cp, trailcnt );
X
X fprintf( temp_action_file,
X "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
X }
X }
X
X line_directive_out( temp_action_file );
X }
X
X
X/* link_machines - connect two machines together
X *
X * synopsis
X *
X * new = link_machines( first, last );
X *
X * new - a machine constructed by connecting first to last
X * first - the machine whose successor is to be last
X * last - the machine whose predecessor is to be first
X *
X * note: this routine concatenates the machine first with the machine
X * last to produce a machine new which will pattern-match first first
X * and then last, and will fail if either of the sub-patterns fails.
X * FIRST is set to new by the operation. last is unmolested.
X */
X
Xint link_machines( first, last )
Xint first, last;
X
X {
X if ( first == NIL )
X return ( last );
X
X else if ( last == NIL )
X return ( first );
X
X else
X {
X mkxtion( finalst[first], last );
X finalst[first] = finalst[last];
X lastst[first] = max( lastst[first], lastst[last] );
X firstst[first] = min( firstst[first], firstst[last] );
X
X return ( first );
X }
X }
X
X
X/* mark_beginning_as_normal - mark each "beginning" state in a machine
X * as being a "normal" (i.e., not trailing context-
X * associated) states
X *
X * synopsis
X *
X * mark_beginning_as_normal( mach )
X *
X * mach - machine to mark
X *
X * The "beginning" states are the epsilon closure of the first state
X */
X
Xmark_beginning_as_normal( mach )
Xregister int mach;
X
X {
X switch ( state_type[mach] )
X {
X case STATE_NORMAL:
X /* oh, we've already visited here */
X return;
X
X case STATE_TRAILING_CONTEXT:
X state_type[mach] = STATE_NORMAL;
X
X if ( transchar[mach] == SYM_EPSILON )
X {
X if ( trans1[mach] != NO_TRANSITION )
X mark_beginning_as_normal( trans1[mach] );
X
X if ( trans2[mach] != NO_TRANSITION )
X mark_beginning_as_normal( trans2[mach] );
X }
X break;
X
X default:
X flexerror( "bad state type in mark_beginning_as_normal()" );
X break;
X }
X }
X
X
X/* mkbranch - make a machine that branches to two machines
X *
X * synopsis
X *
X * branch = mkbranch( first, second );
X *
X * branch - a machine which matches either first's pattern or second's
X * first, second - machines whose patterns are to be or'ed (the | operator)
X *
X * note that first and second are NEITHER destroyed by the operation. Also,
X * the resulting machine CANNOT be used with any other "mk" operation except
X * more mkbranch's. Compare with mkor()
X */
X
Xint mkbranch( first, second )
Xint first, second;
X
X {
X int eps;
X
X if ( first == NO_TRANSITION )
X return ( second );
X
X else if ( second == NO_TRANSITION )
X return ( first );
X
X eps = mkstate( SYM_EPSILON );
X
X mkxtion( eps, first );
X mkxtion( eps, second );
X
X return ( eps );
X }
X
X
X/* mkclos - convert a machine into a closure
X *
X * synopsis
X * new = mkclos( state );
X *
X * new - a new state which matches the closure of "state"
X */
X
Xint mkclos( state )
Xint state;
X
X {
X return ( mkopt( mkposcl( state ) ) );
X }
X
X
X/* mkopt - make a machine optional
X *
X * synopsis
X *
X * new = mkopt( mach );
X *
X * new - a machine which optionally matches whatever mach matched
X * mach - the machine to make optional
X *
X * notes:
X * 1. mach must be the last machine created
X * 2. mach is destroyed by the call
X */
X
Xint mkopt( mach )
Xint mach;
X
X {
X int eps;
X
X if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
X {
X eps = mkstate( SYM_EPSILON );
X mach = link_machines( mach, eps );
X }
X
X /* can't skimp on the following if FREE_EPSILON(mach) is true because
X * some state interior to "mach" might point back to the beginning
X * for a closure
X */
X eps = mkstate( SYM_EPSILON );
X mach = link_machines( eps, mach );
X
X mkxtion( mach, finalst[mach] );
X
X return ( mach );
X }
X
X
X/* mkor - make a machine that matches either one of two machines
X *
X * synopsis
X *
X * new = mkor( first, second );
X *
X * new - a machine which matches either first's pattern or second's
X * first, second - machines whose patterns are to be or'ed (the | operator)
X *
X * note that first and second are both destroyed by the operation
X * the code is rather convoluted because an attempt is made to minimize
X * the number of epsilon states needed
X */
X
Xint mkor( first, second )
Xint first, second;
X
X {
X int eps, orend;
X
X if ( first == NIL )
X return ( second );
X
X else if ( second == NIL )
X return ( first );
X
X else
X {
X /* see comment in mkopt() about why we can't use the first state
X * of "first" or "second" if they satisfy "FREE_EPSILON"
X */
X eps = mkstate( SYM_EPSILON );
X
X first = link_machines( eps, first );
X
X mkxtion( first, second );
X
X if ( SUPER_FREE_EPSILON(finalst[first]) &&
X accptnum[finalst[first]] == NIL )
X {
X orend = finalst[first];
X mkxtion( finalst[second], orend );
X }
X
X else if ( SUPER_FREE_EPSILON(finalst[second]) &&
X accptnum[finalst[second]] == NIL )
X {
X orend = finalst[second];
X mkxtion( finalst[first], orend );
X }
X
X else
X {
X eps = mkstate( SYM_EPSILON );
X
X first = link_machines( first, eps );
X orend = finalst[first];
X
X mkxtion( finalst[second], orend );
X }
X }
X
X finalst[first] = orend;
X return ( first );
X }
X
X
X/* mkposcl - convert a machine into a positive closure
X *
X * synopsis
X * new = mkposcl( state );
X *
X * new - a machine matching the positive closure of "state"
X */
X
Xint mkposcl( state )
Xint state;
X
X {
X int eps;
X
X if ( SUPER_FREE_EPSILON(finalst[state]) )
X {
X mkxtion( finalst[state], state );
X return ( state );
X }
X
X else
X {
X eps = mkstate( SYM_EPSILON );
X mkxtion( eps, state );
X return ( link_machines( state, eps ) );
X }
X }
X
X
X/* mkrep - make a replicated machine
X *
X * synopsis
X * new = mkrep( mach, lb, ub );
X *
X * new - a machine that matches whatever "mach" matched from "lb"
X * number of times to "ub" number of times
X *
X * note
X * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
X */
X
Xint mkrep( mach, lb, ub )
Xint mach, lb, ub;
X
X {
X int base_mach, tail, copy, i;
X
X base_mach = copysingl( mach, lb - 1 );
X
X if ( ub == INFINITY )
X {
X copy = dupmachine( mach );
X mach = link_machines( mach,
X link_machines( base_mach, mkclos( copy ) ) );
X }
X
X else
X {
X tail = mkstate( SYM_EPSILON );
X
X for ( i = lb; i < ub; ++i )
X {
X copy = dupmachine( mach );
X tail = mkopt( link_machines( copy, tail ) );
X }
X
X mach = link_machines( mach, link_machines( base_mach, tail ) );
X }
X
X return ( mach );
X }
X
X
X/* mkstate - create a state with a transition on a given symbol
X *
X * synopsis
X *
X * state = mkstate( sym );
X *
X * state - a new state matching sym
X * sym - the symbol the new state is to have an out-transition on
X *
X * note that this routine makes new states in ascending order through the
X * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
X * relies on machines being made in ascending order and that they are
X * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
X * that it admittedly is)
X */
X
Xint mkstate( sym )
Xint sym;
X
X {
X if ( ++lastnfa >= current_mns )
X {
X if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS )
X lerrif( "input rules are too complicated (>= %d NFA states)",
X current_mns );
X
X ++num_reallocs;
X
X firstst = reallocate_integer_array( firstst, current_mns );
X lastst = reallocate_integer_array( lastst, current_mns );
X finalst = reallocate_integer_array( finalst, current_mns );
X transchar = reallocate_integer_array( transchar, current_mns );
X trans1 = reallocate_integer_array( trans1, current_mns );
X trans2 = reallocate_integer_array( trans2, current_mns );
X accptnum = reallocate_integer_array( accptnum, current_mns );
X assoc_rule = reallocate_integer_array( assoc_rule, current_mns );
X state_type = reallocate_integer_array( state_type, current_mns );
X }
X
X firstst[lastnfa] = lastnfa;
X finalst[lastnfa] = lastnfa;
X lastst[lastnfa] = lastnfa;
X transchar[lastnfa] = sym;
X trans1[lastnfa] = NO_TRANSITION;
X trans2[lastnfa] = NO_TRANSITION;
X accptnum[lastnfa] = NIL;
X assoc_rule[lastnfa] = num_rules;
X state_type[lastnfa] = current_state_type;
X
X /* fix up equivalence classes base on this transition. Note that any
X * character which has its own transition gets its own equivalence class.
X * Thus only characters which are only in character classes have a chance
X * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b'
X * into two different equivalence classes. "[ab]" puts them in the same
X * equivalence class (barring other differences elsewhere in the input).
X */
X
X if ( sym < 0 )
X {
X /* we don't have to update the equivalence classes since that was
X * already done when the ccl was created for the first time
X */
X }
X
X else if ( sym == SYM_EPSILON )
X ++numeps;
X
X else
X {
X if ( useecs )
X mkechar( sym, nextecm, ecgroup );
X }
X
X return ( lastnfa );
X }
X
X
X/* mkxtion - make a transition from one state to another
X *
X * synopsis
X *
X * mkxtion( statefrom, stateto );
X *
X * statefrom - the state from which the transition is to be made
X * stateto - the state to which the transition is to be made
X */
X
Xmkxtion( statefrom, stateto )
Xint statefrom, stateto;
X
X {
X if ( trans1[statefrom] == NO_TRANSITION )
X trans1[statefrom] = stateto;
X
X else if ( (transchar[statefrom] != SYM_EPSILON) ||
X (trans2[statefrom] != NO_TRANSITION) )
X flexfatal( "found too many transitions in mkxtion()" );
X
X else
X { /* second out-transition for an epsilon state */
X ++eps2;
X trans2[statefrom] = stateto;
X }
X }
X
X/* new_rule - initialize for a new rule
X *
X * synopsis
X *
X * new_rule();
X *
X * the global num_rules is incremented and the any corresponding dynamic
X * arrays (such as rule_type[]) are grown as needed.
X */
X
Xnew_rule()
X
X {
X if ( ++num_rules >= current_max_rules )
X {
X ++num_reallocs;
X current_max_rules += MAX_RULES_INCREMENT;
X rule_type = reallocate_integer_array( rule_type, current_max_rules );
X rule_linenum =
X reallocate_integer_array( rule_linenum, current_max_rules );
X }
X
X if ( num_rules > MAX_RULE )
X lerrif( "too many rules (> %d)!", MAX_RULE );
X
X rule_linenum[num_rules] = linenum;
X }
END_OF_FILE
if test 17293 -ne `wc -c <'flex/nfa.c'`; then
echo shar: \"'flex/nfa.c'\" unpacked with wrong size!
fi
# end of 'flex/nfa.c'
fi
echo shar: End of archive 3 \(of 7\).
cp /dev/null ark3isdone
MISSING=""
for I in 1 2 3 4 5 6 7 ; do
if test ! -f ark${I}isdone ; then
MISSING="${MISSING} ${I}"
fi
done
if test "${MISSING}" = "" ; then
echo You have unpacked all 7 archives.
rm -f ark[1-9]isdone
else
echo You still need to unpack the following archives:
echo " " ${MISSING}
fi
## End of shell archive.
exit 0