home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Fresh Fish 8
/
FreshFishVol8-CD2.bin
/
bbs
/
gnu
/
sed-2.05-src.lha
/
sed-2.05
/
rx.h
< prev
next >
Wrap
C/C++ Source or Header
|
1994-05-13
|
33KB
|
1,054 lines
#if !defined(RXH) || defined(RX_WANT_SE_DEFS)
#define RXH
/* Copyright (C) 1992, 1993 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this software; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
/* t. lord Wed Sep 23 18:20:57 1992 */
#ifndef RX_WANT_SE_DEFS
/* This page: Bitsets */
#ifndef RX_subset
typedef unsigned int RX_subset;
#define RX_subset_bits (32)
#define RX_subset_mask (RX_subset_bits - 1)
#endif
typedef RX_subset * rx_Bitset;
#ifdef __STDC__
typedef void (*rx_bitset_iterator) (void *, int member_index);
#else
typedef void (*rx_bitset_iterator) ();
#endif
#define rx_bitset_subset(N) ((N) / RX_subset_bits)
#define rx_bitset_subset_val(B,N) ((B)[rx_bitset_subset(N)])
#define RX_bitset_access(B,N,OP) \
((B)[rx_bitset_subset(N)] OP rx_subset_singletons[(N) & RX_subset_mask])
#define RX_bitset_member(B,N) RX_bitset_access(B, N, &)
#define RX_bitset_enjoin(B,N) RX_bitset_access(B, N, |=)
#define RX_bitset_remove(B,N) RX_bitset_access(B, N, &= ~)
#define RX_bitset_toggle(B,N) RX_bitset_access(B, N, ^= )
#define rx_bitset_numb_subsets(N) (((N) + RX_subset_bits - 1) / RX_subset_bits)
#define rx_sizeof_bitset(N) (rx_bitset_numb_subsets(N) * sizeof(RX_subset))
/* This page: Splay trees. */
#ifdef __STDC__
typedef int (*rx_sp_comparer) (void * a, void * b);
#else
typedef int (*rx_sp_comparer) ();
#endif
struct rx_sp_node
{
void * key;
void * data;
struct rx_sp_node * kids[2];
};
#ifdef __STDC__
typedef void (*rx_sp_key_data_freer) (struct rx_sp_node *);
#else
typedef void (*rx_sp_key_data_freer) ();
#endif
/* giant inflatable hash trees */
struct rx_hash_item
{
struct rx_hash_item * next_same_hash;
struct rx_hash * table;
unsigned long hash;
void * data;
void * binding;
};
struct rx_hash
{
struct rx_hash * parent;
int refs;
struct rx_hash * children[13];
struct rx_hash_item * buckets [13];
int bucket_size [13];
};
struct rx_hash_rules;
#ifdef __STDC__
/* should return like == */
typedef int (*rx_hash_eq)(void *, void *);
typedef struct rx_hash * (*rx_alloc_hash)(struct rx_hash_rules *);
typedef void (*rx_free_hash)(struct rx_hash *,
struct rx_hash_rules *);
typedef struct rx_hash_item * (*rx_alloc_hash_item)(struct rx_hash_rules *,
void *);
typedef void (*rx_free_hash_item)(struct rx_hash_item *,
struct rx_hash_rules *);
#else
typedef int (*rx_hash_eq)();
typedef struct rx_hash * (*rx_alloc_hash)();
typedef void (*rx_free_hash)();
typedef struct rx_hash_item * (*rx_alloc_hash_item)();
typedef void (*rx_free_hash_item)();
#endif
struct rx_hash_rules
{
rx_hash_eq eq;
rx_alloc_hash hash_alloc;
rx_free_hash free_hash;
rx_alloc_hash_item hash_item_alloc;
rx_free_hash_item free_hash_item;
};
/* Matchers decide what to do by examining a series of these.
* Instruction types are described below.
*/
struct rx_inx
{
void * inx;
void * data;
void * data_2;
void * fnord;
};
/* Struct RX holds a compiled regular expression - that is, an nfa ready to be
* converted on demand to a more efficient nfa. This is for the low level interface.
* The high-level interface incloses this in a `struct re_pattern_buffer'.
*/
struct rx_cache;
#ifdef __STDC__
struct rx_se_list;
struct rx;
typedef int (*rx_se_list_order) (struct rx *,
struct rx_se_list *, struct rx_se_list *);
#else
typedef int (*rx_se_list_order) ();
#endif
struct rx_superset;
struct rx
{
int rx_id; /* Every edition numbered and signed by eclose_nfa. */
struct rx_cache * cache; /* Where superstates come from */
/* Every regex defines the size of its own character set. */
int local_cset_size;
void * buffer; /* Malloced memory for the nfa. */
unsigned long allocated; /* Size of that memory. */
/* How much buffer space to save for external uses. After compilation,
* this space will be available at (buffer + allocated - reserved)
*/
unsigned long reserved;
/* --------- The remaining fields are for internal use only. --------- */
/* --------- But! they should be initialized to 0. --------- */
/* NODEC is the number of nodes in the NFA with non-epsilon
* orx transitions.
*/
int nodec;
/* EPSNODEC is the number of nodes with only epsilon (orx) transitions. */
int epsnodec;
/* The sum of NODEC & EPSNODEC is the total number of states in the
* compiled NFA.
*/
/* side_effect_progs temporarily holds a tree of side effect lists. */
struct rx_hash se_list_memo;
/* A memo for sets of states in the possible_future lists of an nfa: */
struct rx_hash set_list_memo;
/* The instruction table is indexed by the enum of instructions defined in
* rxrun.h. The values in the table are used to fill in the `inx'
* slot of instruction frames (see rxrun.h).
*/
void ** instruction_table;
struct rx_nfa_state *nfa_states;
struct rx_nfa_state *start;
/* This orders the search through super-nfa paths. */
rx_se_list_order se_list_cmp;
struct rx_superset * start_set;
};
/* An RX NFA may contain epsilon edges labeled with side effects.
* These side effects represent match actions that can not normally be
* defined in a `pure' NFA; for example, recording the location at
* which a paren is crossed in a register structure.
*
* A matcher is supposed to find a particular path
* through the NFA (such as leftmost-longest), and then to execute the
* side effects along that path. Operationally, the space of paths is
* searched and side effects are carried out incrementally, and with
* backtracking.
*
* As the NFA is manipulated during matching sets of side effects.
* Simple lists are used to hold side effect lists.
*/
typedef void * rx_side_effect;
struct rx_se_list
{
rx_side_effect car;
struct rx_se_list * cdr;
};
/* Struct rexp_node holds an expression tree that represents a regexp.
* In this expression tree, every node has a type, and some parameters
* appropriate to that type.
*/
enum rexp_node_type
{
r_cset, /* Match from a character set. `a' or `[a-z]'*/
r_concat, /* Concat two regexps. `ab' */
r_alternate, /* Choose one of two regexps. `a\|b' */
r_opt, /* Optional regexp. `a?' */
r_star, /* Repeated regexp. `a*' */
r_2phase_star, /* hard to explain */
r_side_effect, /* Matches the empty string, but
* implies that a side effect must
* take place. These nodes are used
* by the parser to implement parens,
* backreferences etc.
*/
r_data /* R_DATA is soley for the convenience
* of parsers or other rexp
* transformers that want to
* (temporarily) introduce new node
* types in rexp structures. These
* must be eliminated
* by the time build_nfa is called.
*/
};
struct rexp_node
{
enum rexp_node_type type;
union
{
rx_Bitset cset;
rx_side_effect side_effect;
struct
{
struct rexp_node *left;
struct rexp_node *right;
} pair;
void * data;
} params;
};
/* This defines the structure of the NFA into which rexps are compiled. */
struct rx_nfa_state
{
int id;
struct rx_nfa_edge *edges;
struct rx_possible_future *futures;
unsigned int is_final:1;
unsigned int is_start:1;
unsigned int eclosure_needed:1;
struct rx_nfa_state *next;
unsigned int mark:1;
};
enum rx_nfa_etype
{
ne_cset,
ne_epsilon,
ne_side_effect /* A special kind of epsilon. */
};
struct rx_nfa_edge
{
struct rx_nfa_edge *next;
enum rx_nfa_etype type;
struct rx_nfa_state *dest;
union
{
rx_Bitset cset;
rx_side_effect side_effect;
} params;
};
struct rx_nfa_state_set
{