home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Simtel MSDOS 1992 June
/
SIMTEL_0692.cdr
/
msdos
/
ddjmag
/
ddj8702.arc
/
FLOYD.LST
< prev
next >
Wrap
File List
|
1980-01-02
|
10KB
|
340 lines
{Listing 1: Declarations of data structures
used by hashing and symbol table routines.}
CONST
symbol_hash_size = 100;
{Number of buckets - 1 in the hash table.
I believe it should be a prime - 1.}
TYPE
str255 = String [255]; {General large str}
symbol_data = RECORD
{Data to be associated with identifier}
usecount: INTEGER;
END;
symbol_name = String [255];
{Symbol identifier is a string}
symbol_ptr = ^symbol_Type;
symbol_range = 0..symbol_hash_size;
symbol_Type = RECORD
{identifier and its data}
sym_chain: symbol_ptr;
{Ptr to next symbol in list}
sym_data: symbol_data;
{Type declared in the main program}
sym_name: symbol_name;
{Symbol name or identifier}
END;
symbol_control = RECORD
{Declare one of these in main program for
each symbol table to be used}
symbols, searches, notfound: INTEGER;
probes: REAL;
{Real because some counts exceed 32767}
this_bucket: symbol_range;
{Bucket # of last referenced symbol}
this_symbol: symbol_ptr;
{Pointer to last referenced symbol}
sym_ptr: ARRAY [symbol_range] OF symbol_ptr;
{Buckets}
END;
{Listing 2: Routines to initialize the symbol
table, insert a symbol, and locate a symbol,
without MTF.}
FUNCTION symbol_size
(VAR s_name: symbol_name): INTEGER;
{Return the size of memory required to contain
a symbol named in s_name.}
BEGIN
symbol_size := SIZEOF (symbol_ptr)
+ SIZEOF (symbol_data)
+ SUCC (LENGTH (s_name));
END;
PROCEDURE symbol_init (VAR sym: symbol_control);
{Initialize symbol control pointers. Call this
before the first use of a Symbol_Control area.}
VAR
i: symbol_range;
BEGIN
WITH sym DO BEGIN
FOR i := 0 TO symbol_hash_size
DO sym_ptr [i] := NIL;
this_bucket := 0;
this_symbol := NIL;
symbols := 0;
searches := 0;
probes := 0.0;
notfound := 0;
END;
END;
PROCEDURE symbol_put (VAR sym: symbol_control;
s_name: symbol_name; VAR s_data: symbol_data);
{Insert symbol name and data in table. This routine does not check for duplicate symbol.}
BEGIN
WITH sym DO BEGIN
this_bucket := symbol_hash (s_name);
GETMEM (this_symbol, symbol_size (s_name));
WITH this_symbol^ DO BEGIN
sym_chain := sym_ptr [this_bucket];
sym_data := s_data;
sym_name := s_name;
sym_ptr [this_bucket] := this_symbol;
END;
symbols := SUCC (symbols);
END;
END;
FUNCTION symbol_get
(VAR sym: symbol_control; s_name: symbol_name;
VAR s_data: symbol_data): BOOLEAN;
{Retrieve a symbol. If the symbol is found,
set s_data to the data stored by the last call
to symbol_put specifying the same symbol name,
point this_symbol to the symbol table entry, and
return TRUE. If the symbol is not found leave
s_data unchanged, leave this_symbol undefined,
and return FALSE. This version does NOT
implement the MTF algorithm.}
VAR
p: symbol_ptr; {work pointer}
BEGIN
WITH sym DO BEGIN
this_bucket := symbol_hash (s_name);
p := sym_ptr [this_bucket];
symbol_get := FALSE;
searches := SUCC (searches);
IF p = NIL THEN
notfound := SUCC (notfound);
WHILE p <> NIL DO WITH p^ DO BEGIN
probes := probes + 1.0;
IF s_name = sym_name THEN BEGIN
{found it!}
s_data := sym_data;
this_symbol := p;
p := NIL;
symbol_get := TRUE;
END ELSE BEGIN
{not this one, chain to the next}
p := sym_chain;
if p = NIL THEN
notfound := SUCC (notfound);
END;
END;
END;
END;
{Listing 3: Hash functions, presented as a
single Pascal function with case statement
controlled by a global variable ``hashtype''
to select one of the four routines.}
{First the table used by the CRC-16 routine,
this from a public domain file uncompression
program: DeArc, by Bela Lubkin.}
const crctab : array [0..255] of integer =
($0000, $C0C1, $C181, $0140, $C301, $03C0, $0280,
$C241, $C601, $06C0, $0780, $C741, $0500, $C5C1,
$C481, $0440, $CC01, $0CC0, $0D80, $CD41, $0F00,
$CFC1, $CE81, $0E40, $0A00, $CAC1, $CB81, $0B40,
$C901, $09C0, $0880, $C841, $D801, $18C0, $1980,
$D941, $1B00, $DBC1, $DA81, $1A40, $1E00, $DEC1,
$DF81, $1F40, $DD01, $1DC0, $1C80, $DC41, $1400,
$D4C1, $D581, $1540, $D701, $17C0, $1680, $D641,
$D201, $12C0, $1380, $D341, $1100, $D1C1, $D081,
$1040, $F001, $30C0, $3180, $F141, $3300, $F3C1,
$F281, $3240, $3600, $F6C1, $F781, $3740, $F501,
$35C0, $3480, $F441, $3C00, $FCC1, $FD81, $3D40,
$FF01, $3FC0, $3E80, $FE41, $FA01, $3AC0, $3B80,
$FB41, $3900, $F9C1, $F881, $3840, $2800, $E8C1,
$E981, $2940, $EB01, $2BC0, $2A80, $EA41, $EE01,
$2EC0, $2F80, $EF41, $2D00, $EDC1, $EC81, $2C40,
$E401, $24C0, $2580, $E541, $2700, $E7C1, $E681,
$2640, $2200, $E2C1, $E381, $2340, $E101, $21C0,
$2080, $E041, $A001, $60C0, $6180, $A141, $6300,
$A3C1, $A281, $6240, $6600, $A6C1, $A781, $6740,
$A501, $65C0, $6480, $A441, $6C00, $ACC1, $AD81,
$6D40, $AF01, $6FC0, $6E80, $AE41, $AA01, $6AC0,
$6B80, $AB41, $6900, $A9C1, $A881, $6840, $7800,
$B8C1, $B981, $7940, $BB01, $7BC0, $7A80, $BA41,
$BE01, $7EC0, $7F80, $BF41, $7D00, $BDC1, $BC81,
$7C40, $B401, $74C0, $7580, $B541, $7700, $B7C1,
$B681, $7640, $7200, $B2C1, $B381, $7340, $B101,
$71C0, $7080, $B041, $5000, $90C1, $9181, $5140,
$9301, $53C0, $5280, $9241, $9601, $56C0, $5780,
$9741, $5500, $95C1, $9481, $5440, $9C01, $5CC0,
$5D80, $9D41, $5F00, $9FC1, $9E81, $5E40, $5A00,
$9AC1, $9B81, $5B40, $9901, $59C0, $5880, $9841,
$8801, $48C0, $4980, $8941, $4B00, $8BC1, $8A81,
$4A40, $4E00, $8EC1, $8F81, $4F40, $8D01, $4DC0,
$4C80, $8C41, $4400, $84C1, $8581, $4540, $8701,
$47C0, $4680, $8641, $8201, $42C0, $4380, $8341,
$4100, $81C1, $8081, $4040 );
FUNCTION symbol_hash
(VAR s_name: symbol_name): symbol_range;
{Hash the symbol name to a number between
0 and the hash table size.}
VAR
i, j: INTEGER;
BEGIN
CASE hashtype OF
1: BEGIN {Sum of the characters + length}
j := 0;
FOR i := 0 to LENGTH (s_name) DO
j := j + ORD (s_name [i]);
symbol_hash :=
j MOD SUCC (symbol_hash_size);
END;
2: BEGIN {First + Last + Length}
symbol_hash :=
((ORD (s_name [1]) SHL 8)
+ ORD (s_name [Length (s_name)])
+ Length (s_name))
MOD SUCC (symbol_hash_size);
END;
3: BEGIN {HashPJW}
j := 0;
FOR i := 1 TO LENGTH (s_name) DO BEGIN
j := (j SHL 4) + ORD (s_name [i]);
IF (j AND $F000) <> 0 THEN
j := j XOR (j SHR 12) AND $0FFF;
END;
symbol_hash := (j AND $7FFF)
MOD SUCC (symbol_hash_size);
END;
4: BEGIN {CRC-16}
j := 0;
FOR i := 1 TO LENGTH (s_name) DO
j := (j SHR 8) XOR
crctab [(j XOR ORD
(s_name [i])) AND $00FF];
symbol_hash := (j AND $7FFF)
MOD SUCC (symbol_hash_size);
END;
else symbol_hash := 0;
{Not specified, punish the user}
END;
END;
{Listing 4: Symbol distribution function U(h,t)}
FUNCTION symbol_distribution
(VAR sym: symbol_control): REAL;
{Compute the distribution test as outlined in
Aho, et al. This function approaches 1.0 as
the ``randomness'' of the hashing improves.}
VAR
p: symbol_ptr;
b, n, m, r: REAL;
i: symbol_range;
j: INTEGER;
BEGIN
r := 0.0;
WITH sym DO BEGIN
FOR i := 0 to symbol_hash_size DO BEGIN
p := sym_ptr [i];
j := 0;
WHILE p <> NIL DO
WITH p^ DO BEGIN {count the list}
p := sym_chain;
j := SUCC (j);
END;
b := j;
r := r + (b * (b + 1.0)) / 2.0;
END;
m := SUCC (symbol_hash_size);
n := symbols;
symbol_distribution := r /
((n / (2.0 * m)) * (n + 2.0 * m - 1.0));
END;
END;
{Listing 5: Symbol search with MTF}
{Note: for the purposes of the test program, the
application of MTF is controlled by a global
boolean variable ``mtf'' set by the main program.
The test for this boolean should be removed in a
production version of the routine. MTF with all
its performance advantages is accomplished with
the addition of seven lines of code!}
FUNCTION symbol_get
(VAR sym: symbol_control; s_name: symbol_name;
VAR s_data: symbol_data): BOOLEAN;
{Retrieve a symbol. If the found, s_data is set
to the data stored by the last call to symbol_put
specifying the same symbol name, this_symbol
points to the symbol table found, the symbol is
moved to the front of the chain, and the function
returns TRUE. If the symbol is not found s_data
is unchanged, this_symbol is undefined, and the
function returns FALSE.}
VAR
p: symbol_ptr; {work pointer}
BEGIN
WITH sym DO BEGIN
this_bucket := symbol_hash (s_name);
p := sym_ptr [this_bucket];
symbol_get := FALSE;
this_symbol := NIL;
searches := SUCC (searches);
IF p = NIL THEN notfound := SUCC (notfound);
WHILE p <> NIL DO WITH p^ DO BEGIN
probes := probes + 1.0;
IF s_name = sym_name THEN BEGIN
{found it!}
IF this_symbol <> NIL THEN IF mtf THEN
BEGIN {Move it to the front}
this_symbol^.sym_chain := sym_chain;
sym_chain := sym_ptr [this_bucket];
sym_ptr [this_bucket] := p;
END;
s_data := sym_data;
this_symbol := p;
p := NIL;
symbol_get := TRUE;
END ELSE BEGIN
{not this one, chain to the next}
this_symbol := p;
p := sym_chain;
if p = NIL THEN
notfound := SUCC (notfound);
END;
END;
END;
END;