home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Acorn User 11
/
AUCD11B.iso
/
LANGUAGES
/
WraithSet
/
AwkStuff
/
MawkSrc
/
rexp
/
c
/
rexp3
< prev
next >
Wrap
Text File
|
1993-07-24
|
7KB
|
340 lines
/********************************************
rexp3.c
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/*$Log: rexp3.c,v $
* Revision 1.3 1993/07/24 17:55:15 mike
* more cleanup
*
* Revision 1.2 1993/07/23 13:21:48 mike
* cleanup rexp code
*
* Revision 1.1.1.1 1993/07/03 18:58:28 mike
* move source to cvs
*
* Revision 3.6 1992/12/24 00:44:53 mike
* fixed potential LMDOS bozo with M_STR+U_ON+END_ON
* fixed minor bug in M_CLASS+U_ON+END_ON
*
* Revision 3.5 1992/01/21 17:33:20 brennan
* added some casts so that character classes work with signed chars
*
* Revision 3.4 91/10/29 10:54:09 brennan
* SIZE_T
*
* Revision 3.3 91/08/13 09:10:18 brennan
* VERSION .9994
*
* Revision 3.2 91/06/10 16:18:17 brennan
* changes for V7
*
* Revision 3.1 91/06/07 10:33:28 brennan
* VERSION 0.995
*
* Revision 1.4 91/05/31 10:56:32 brennan
* stack_empty hack for DOS large model
*
*/
/* match a string against a machine */
#include "rexp.h"
extern RT_STATE *RE_run_stack_base ;
extern RT_STATE *RE_run_stack_limit ;
extern RT_STATE *RE_run_stack_empty ;
RT_STATE *RE_new_run_stack() ;
#define push(mx,sx,ssx,ux) if (++stackp == RE_run_stack_limit)\
stackp = RE_new_run_stack() ;\
stackp->m=(mx);stackp->s=(sx);stackp->ss=(ssx);\
stackp->u = (ux)
#define CASE_UANY(x) case x + U_OFF : case x + U_ON
/* returns start of first longest match and the length by
reference. If no match returns NULL and length zero */
char *REmatch(str, machine, lenp)
char *str ;
PTR machine ;
unsigned *lenp ;
{
register STATE *m = (STATE *) machine ;
register char *s = str ;
char *ss ;
register RT_STATE *stackp ;
int u_flag, t ;
char *str_end, *ts ;
/* state of current best match stored here */
char *cb_ss ; /* the start */
char *cb_e ; /* the end , pts at first char not matched */
*lenp = 0 ;
/* check for the easy case */
if ((m + 1)->type == M_ACCEPT && m->type == M_STR)
{
if (ts = str_str(s, m->data.str, m->len)) *lenp = m->len ;
return ts ;
}
u_flag = U_ON ; cb_ss = ss = str_end = (char *) 0 ;
stackp = RE_run_stack_empty ;
goto reswitch ;
refill :
if (stackp == RE_run_stack_empty)
{
if (cb_ss) *lenp = cb_e - cb_ss ;
return cb_ss ;
}
ss = stackp->ss ;
s = stackp--->s ;
if (cb_ss) /* does new state start too late ? */
{
if (ss)
{
if (cb_ss < ss) goto refill ;
}
else if (cb_ss < s) goto refill ;
}
m = (stackp + 1)->m ;
u_flag = (stackp + 1)->u ;
reswitch :
switch (m->type + u_flag)
{
case M_STR + U_OFF + END_OFF:
if (strncmp(s, m->data.str, m->len)) goto refill ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
s += m->len ; m++ ;
goto reswitch ;
case M_STR + U_OFF + END_ON:
if (strcmp(s, m->data.str)) goto refill ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
s += m->len ; m++ ;
goto reswitch ;
case M_STR + U_ON + END_OFF:
if (!(s = str_str(s, m->data.str, m->len))) goto refill ;
push(m, s + 1, ss, U_ON) ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
s += m->len ; m++ ; u_flag = U_OFF ;
goto reswitch ;
case M_STR + U_ON + END_ON:
if (!str_end) str_end = s + strlen(s) ;
t = (str_end - s) - m->len ;
if (t < 0 || memcmp(ts = s + t, m->data.str, m->len))
goto refill ;
if (!ss)
{
if (cb_ss && ts > cb_ss) goto refill ;
else ss = ts ;
}
s = str_end ; m++ ; u_flag = U_OFF ;
goto reswitch ;
case M_CLASS + U_OFF + END_OFF:
if (!ison(*m->data.bvp, s[0])) goto refill ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
s++ ; m++ ;
goto reswitch ;
case M_CLASS + U_OFF + END_ON:
if (s[1] || !ison(*m->data.bvp, s[0])) goto refill ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
s++ ; m++ ;
goto reswitch ;
case M_CLASS + U_ON + END_OFF:
while (!ison(*m->data.bvp, s[0]))
{
if (s[0] == 0) goto refill ;
else s++ ;
}
s++ ;
push(m, s, ss, U_ON) ;
if (!ss)
{
if (cb_ss && s - 1 > cb_ss) goto refill ;
else ss = s - 1 ;
}
m++ ; u_flag = U_OFF ;
goto reswitch ;
case M_CLASS + U_ON + END_ON:
if (!str_end) str_end = s + strlen(s) ;
if (s[0] == 0 || !ison(*m->data.bvp, str_end[-1]))
goto refill ;
if (!ss)
{
if (cb_ss && str_end - 1 > cb_ss) goto refill ;
else ss = str_end - 1 ;
}
s = str_end ; m++ ; u_flag = U_OFF ;
goto reswitch ;
case M_ANY + U_OFF + END_OFF:
if (s[0] == 0) goto refill ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
s++ ; m++ ;
goto reswitch ;
case M_ANY + U_OFF + END_ON:
if (s[0] == 0 || s[1] != 0) goto refill ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
s++ ; m++ ;
goto reswitch ;
case M_ANY + U_ON + END_OFF:
if (s[0] == 0) goto refill ;
s++ ;
push(m, s, ss, U_ON) ;
if (!ss)
{
if (cb_ss && s - 1 > cb_ss) goto refill ;
else ss = s - 1 ;
}
m++ ; u_flag = U_OFF ;
goto reswitch ;
case M_ANY + U_ON + END_ON:
if (s[0] == 0) goto refill ;
if (!str_end) str_end = s + strlen(s) ;
if (!ss)
{
if (cb_ss && str_end - 1 > cb_ss) goto refill ;
else ss = str_end - 1 ;
}
s = str_end ; m++ ; u_flag = U_OFF ;
goto reswitch ;
case M_START + U_OFF + END_OFF:
case M_START + U_ON + END_OFF:
if (s != str) goto refill ;
ss = s ;
m++ ; u_flag = U_OFF ;
goto reswitch ;
case M_START + U_OFF + END_ON:
case M_START + U_ON + END_ON:
if (s != str || s[0] != 0) goto refill ;
ss = s ;
m++ ; u_flag = U_OFF ;
goto reswitch ;
case M_END + U_OFF:
if (s[0] != 0) goto refill ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
m++ ; goto reswitch ;
case M_END + U_ON:
s = str_end ? str_end : (str_end = s + strlen(s)) ;
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
m++ ; u_flag = U_OFF ;
goto reswitch ;
CASE_UANY(M_U):
if (!ss)
{
if (cb_ss && s > cb_ss) goto refill ;
else ss = s ;
}
u_flag = U_ON ; m++ ;
goto reswitch ;
CASE_UANY(M_1J):
m += m->data.jump ;
goto reswitch ;
CASE_UANY(M_2JA): /* take the non jump branch */
push(m + m->data.jump, s, ss, u_flag) ;
m++ ;
goto reswitch ;
CASE_UANY(M_2JB): /* take the jump branch */
push(m + 1, s, ss, u_flag) ;
m += m->data.jump ;
goto reswitch ;
case M_ACCEPT + U_OFF:
if (!ss) ss = s ;
if (!cb_ss || ss < cb_ss || ss == cb_ss && s > cb_e)
{
/* we have a new current best */
cb_ss = ss ; cb_e = s ;
}
goto refill ;
case M_ACCEPT + U_ON:
if (!ss) ss = s ;
else s = str_end ? str_end : (str_end = s + strlen(s)) ;
if (!cb_ss || ss < cb_ss || ss == cb_ss && s > cb_e)
{
/* we have a new current best */
cb_ss = ss ; cb_e = s ;
}
goto refill ;
default:
RE_panic("unexpected case in REmatch") ;
}
}