home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Source Code 1992 March
/
Source_Code_CD-ROM_Walnut_Creek_March_1992.iso
/
usenet
/
altsrcs
/
3
/
3582
< prev
next >
Wrap
Text File
|
1991-07-02
|
20KB
|
732 lines
Newsgroups: alt.sources
From: goer@ellis.uchicago.edu (Richard L. Goerwitz)
Subject: kjv browser, part 8 of 11
Message-ID: <1991Jul3.065222.28343@midway.uchicago.edu>
Date: Wed, 3 Jul 1991 06:52:22 GMT
---- Cut Here and feed the following to sh ----
#!/bin/sh
# this is bibleref.08 (part 8 of a multipart archive)
# do not concatenate these parts, unpack them in order with /bin/sh
# file findre.icn continued
#
if test ! -r _shar_seq_.tmp; then
echo 'Please unpack part 1 first!'
exit 1
fi
(read Scheck
if test "$Scheck" != 8; then
echo Please unpack part "$Scheck" next!
exit 1
else
exit 0
fi
) < _shar_seq_.tmp || exit 1
if test ! -f _shar_wnt_.tmp; then
echo 'x - still skipping findre.icn'
else
echo 'x - continuing file findre.icn'
sed 's/^X//' << 'SHAR_EOF' >> 'findre.icn' &&
X tmp := tab(many('*?+')) | &null
X if upto('*?',\tmp)
X then put(token_list,-ord("*"))
X else put(token_list,-ord("+"))
X }
X "?" : {
X tmp := tab(many('*?+')) | &null
X if upto('*+',\tmp)
X then put(token_list,-ord("*"))
X else put(token_list,-ord("?"))
X }
X "(" : {
X tab(many('*+?'))
X put(token_list,-ord("("))
X }
X default: {
X put(token_list,-ord(chr))
X }
X }
X }
X else {
X case chr of {
X # More egrep compatibility stuff.
X "[" : {
X b_loc := find("[") | *&subject+1
X every next_one := find("]",,,b_loc)
X \next_one ~= &pos | err_out(s,2,chr)
X put(token_list,-ord(chr))
X }
X "]" : {
X if &pos = (\next_one+1)
X then put(token_list,-ord(chr)) &
X next_one := &null
X else put(token_list,ord(chr))
X }
X default: put(token_list,ord(chr))
X }
X }
X }
X }
X
X token_list := UnMetaBrackets(token_list)
X
X fixed_length_token_list := list(*token_list)
X every i := 1 to *token_list
X do fixed_length_token_list[i] := token_list[i]
X return fixed_length_token_list
X
Xend
X
X
X
Xprocedure UnMetaBrackets(l)
X
X # Since brackets delineate a cset, it doesn't make
X # any sense to have metacharacters inside of them.
X # UnMetaBrackets makes sure there are no metacharac-
X # ters inside of the braces.
X
X local tmplst, i, Lb, Rb
X
X tmplst := list(); i := 0
X Lb := -ord("[")
X Rb := -ord("]")
X
X while (i +:= 1) <= *l do {
X if l[i] = Lb then {
X put(tmplst,l[i])
X until l[i +:= 1] = Rb
X do put(tmplst,abs(l[i]))
X put(tmplst,l[i])
X }
X else put(tmplst,l[i])
X }
X return tmplst
X
Xend
X
X
X
Xprocedure MakeFSTN(l,INI,FIN)
X
X # MakeFSTN recursively descends through the tree structure
X # implied by the tokenized string, l, recording in (global)
X # fstn_table a list of operations to be performed, and the
X # initial and final states which apply to them.
X
X local i, inter, inter2, tmp, Op, Arg
X static Lp, Rp, Sl, Lb, Rb, Caret_inside, Dot, Dollar, Caret_outside
X # global biggest_nonmeta_str, slash_present, parends_present
X initial {
X Lp := -ord("("); Rp := -ord(")")
X Sl := -ord("|")
X Lb := -ord("["); Rb := -ord("]"); Caret_inside := ord("^")
X Dot := -ord("."); Dollar := -ord("$"); Caret_outside := -ord("^")
X }
X
X /INI := 1 & state_table := table() &
X NextState("new") & biggest_nonmeta_str := ""
X /FIN := 0
X
X # I haven't bothered to test for empty lists everywhere.
X if *l = 0 then {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(zSucceed,&null,FIN))
X return
X }
X
X # HUNT DOWN THE SLASH (ALTERNATION OPERATOR)
X every i := 1 to *l do {
X if l[i] = Sl & tab_bal(l,Lp,Rp) = i then {
X if i = 1 then err_out(l,2,char(abs(l[i]))) else {
X /slash_present := "yes"
X inter := NextState()
X inter2:= NextState()
X MakeFSTN(l[1:i],inter2,FIN)
X MakeFSTN(l[i+1:0],inter,FIN)
X /state_table[INI] := []
X put(state_table[INI],o_a_s(apply_FSTN,inter2,0))
X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X return
X }
X }
X }
X
X # HUNT DOWN PARENTHESES
X if l[1] = Lp then {
X i := tab_bal(l,Lp,Rp) | err_out(l,2,"(")
X inter := NextState()
X if any('*+?',char(abs(0 > l[i+1]))) then {
X case l[i+1] of {
X -ord("*") : {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X MakeFSTN(l[2:i],INI,INI)
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X -ord("+") : {
X inter2 := NextState()
X /state_table[inter2] := []
X MakeFSTN(l[2:i],INI,inter2)
X put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
X MakeFSTN(l[2:i],inter2,inter2)
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X -ord("?") : {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X MakeFSTN(l[2:i],INI,inter)
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X }
X }
X else {
X MakeFSTN(l[2:i],INI,inter)
X MakeFSTN(l[i+1:0],inter,FIN)
X return
X }
X }
X else { # I.E. l[1] NOT = Lp (left parenthesis as -ord("("))
X every i := 1 to *l do {
X case l[i] of {
X Lp : {
X inter := NextState()
X MakeFSTN(l[1:i],INI,inter)
X /parends_present := "yes"
X MakeFSTN(l[i:0],inter,FIN)
X return
X }
X Rp : err_out(l,2,")")
X }
X }
X }
X
X # NOW, HUNT DOWN BRACKETS
X if l[1] = Lb then {
X i := tab_bal(l,Lb,Rb) | err_out(l,2,"[")
X inter := NextState()
X tmp := ""; every tmp ||:= char(l[2 to i-1])
X if Caret_inside = l[2]
X then tmp := ~cset(Expand(tmp[2:0]))
X else tmp := cset(Expand(tmp))
X if any('*+?',char(abs(0 > l[i+1]))) then {
X case l[i+1] of {
X -ord("*") : {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X put(state_table[INI],o_a_s(any,tmp,INI))
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X -ord("+") : {
X inter2 := NextState()
X /state_table[INI] := []
X put(state_table[INI],o_a_s(any,tmp,inter2))
X /state_table[inter2] := []
X put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
X put(state_table[inter2],o_a_s(any,tmp,inter2))
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X -ord("?") : {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X put(state_table[INI],o_a_s(any,tmp,inter))
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X }
X }
X else {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(any,tmp,inter))
X MakeFSTN(l[i+1:0],inter,FIN)
X return
X }
X }
X else { # I.E. l[1] not = Lb
X every i := 1 to *l do {
X case l[i] of {
X Lb : {
X inter := NextState()
X MakeFSTN(l[1:i],INI,inter)
X MakeFSTN(l[i:0],inter,FIN)
X return
X }
X Rb : err_out(l,2,"]")
X }
X }
X }
X
X # FIND INITIAL SEQUENCES OF POSITIVE INTEGERS, CONCATENATE THEM
X if i := match_positive_ints(l) then {
X inter := NextState()
X tmp := Ints2String(l[1:i])
X # if a slash has been encountered already, forget optimizing
X # in this way; if parends are present, too, then forget it,
X # unless we are at the beginning or end of the input string
X if INI = 1 | FIN = 2 | /parends_present &
X /slash_present & *tmp > *biggest_nonmeta_str
X then biggest_nonmeta_str := tmp
X /state_table[INI] := []
X put(state_table[INI],o_a_s(match,tmp,inter))
X MakeFSTN(l[i:0],inter,FIN)
X return
X }
X
X # OKAY, CLEAN UP ALL THE JUNK THAT'S LEFT
X i := 0
X while (i +:= 1) <= *l do {
X case l[i] of {
X Dot : { Op := any; Arg := &cset }
X Dollar : { Op := pos; Arg := 0 }
X Caret_outside: { Op := pos; Arg := 1 }
X default : { Op := match; Arg := char(0 < l[i]) }
X } | err_out(l,2,char(abs(l[i])))
X inter := NextState()
X if any('*+?',char(abs(0 > l[i+1]))) then {
X case l[i+1] of {
X -ord("*") : {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X put(state_table[INI],o_a_s(Op,Arg,INI))
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X -ord("+") : {
X inter2 := NextState()
X /state_table[INI] := []
X put(state_table[INI],o_a_s(Op,Arg,inter2))
X /state_table[inter2] := []
X put(state_table[inter2],o_a_s(apply_FSTN,inter,0))
X put(state_table[inter2],o_a_s(Op,Arg,inter2))
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X -ord("?") : {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(apply_FSTN,inter,0))
X put(state_table[INI],o_a_s(Op,Arg,inter))
X MakeFSTN(l[i+2:0],inter,FIN)
X return
X }
X }
X }
X else {
X /state_table[INI] := []
X put(state_table[INI],o_a_s(Op,Arg,inter))
X MakeFSTN(l[i+1:0],inter,FIN)
X return
X }
X }
X
X # WE SHOULD NOW BE DONE INSERTING EVERYTHING INTO state_table
X # IF WE GET TO HERE, WE'VE PARSED INCORRECTLY!
X err_out(l,4)
X
Xend
X
X
X
Xprocedure NextState(new)
X static nextstate
X if \new then nextstate := 1
X else nextstate +:= 1
X return nextstate
Xend
X
X
X
Xprocedure err_out(x,i,elem)
X writes(&errout,"Error number ",i," parsing ",image(x)," at ")
X if \elem
X then write(&errout,image(elem),".")
X else write(&errout,"(?).")
X exit(i)
Xend
X
X
X
Xprocedure zSucceed()
X return .&pos
Xend
X
X
X
Xprocedure Expand(s)
X
X local s2, c1, c2
X
X s2 := ""
X s ? {
X s2 ||:= ="^"
X s2 ||:= ="-"
X while s2 ||:= tab(find("-")-1) do {
X if (c1 := move(1), ="-",
X c2 := move(1),
X c1 << c2)
X then every s2 ||:= char(ord(c1) to ord(c2))
X else s2 ||:= 1(move(2), not(pos(0))) | err_out(s,2,"-")
X }
X s2 ||:= tab(0)
X }
X return s2
X
Xend
X
X
X
Xprocedure tab_bal(l,i1,i2)
X
X local i, i1_count, i2_count
X
X i := 0
X i1_count := 0; i2_count := 0
X while (i +:= 1) <= *l do {
X case l[i] of {
X i1 : i1_count +:= 1
X i2 : i2_count +:= 1
X }
X if i1_count = i2_count
X then suspend i
X }
X
Xend
X
X
Xprocedure match_positive_ints(l)
X
X # Matches the longest sequence of positive integers in l,
X # beginning at l[1], which neither contains, nor is fol-
X # lowed by a negative integer. Returns the first position
X # after the match. Hence, given [55, 55, 55, -42, 55],
X # match_positive_ints will return 3. [55, -42] will cause
X # it to fail rather than return 1 (NOTE WELL!).
X
X local i
X
X every i := 1 to *l do {
X if l[i] < 0
X then return (3 < i) - 1 | fail
X }
X return *l + 1
X
Xend
X
X
Xprocedure Ints2String(l)
X
X local tmp
X
X tmp := ""
X every tmp ||:= char(!l)
X return tmp
X
Xend
X
X
Xprocedure StripChar(s,s2)
X
X local tmp
X
X if find(s2,s) then {
X tmp := ""
X s ? {
X while tmp ||:= tab(find("s2"))
X do tab(many(cset(s2)))
X tmp ||:= tab(0)
X }
X }
X return \tmp | s
X
Xend
SHAR_EOF
echo 'File findre.icn is complete' &&
true || echo 'restore of findre.icn failed'
rm -f _shar_wnt_.tmp
fi
# ============= kjv2rtv.icn ==============
if test -f 'kjv2rtv.icn' -a X"$1" != X"-c"; then
echo 'x - skipping kjv2rtv.icn (File already exists)'
rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting kjv2rtv.icn (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'kjv2rtv.icn' &&
X############################################################################
X#
X# Name: kjv2rtv.icn
X#
X# Title: kjv2rtv (KJV -> retrieve format converter)
X#
X# Author: Richard L. Goerwitz
X#
X# Version: 1.5
X#
X############################################################################
X#
X# Program for converting PD KJV biblical texts into retrieve format.
X# Reads standard input. Writes reformatted text to standard output.
X# Assumes the specific PC-SIG KJV format for input files. If you
X# have a KJV text that has been "tampered" with, this program may not
X# work correctly. And then again....
X#
X############################################################################
X#
X# Links: complete.icn ./convertr.icn ./name2num.icn
X#
X############################################################################
X
X
Xprocedure main()
X
X local line, bitmap, verse
X
X # While you can read lines from stdin...
X while line := read() do {
X
X # ...scan them for book ch:vs references, and output these in
X # retrieve format, along with corresponding text.
X line ? {
X
X # Housekeeping.
X pos(0) & next # skip past empty lines
X ="\x1F" # tab past ASCII 31 (if present)
X tab(many('\t ')) # tab past whitespace (if present)
X
X # If the next line begins with a book ch:vs reference,
X # then write out the text of the preceding verse (if in
X # fact there *was* a preceding verse). Finally, write out
X # the new book ch:vs reference (in retrieve format).
X if bitmap := convertr(tab(find(" "))) then {
X write(REplace("" ~== trim(\verse, '\t \x0D'), " ", " "))
X write("::", bitmap)
X tab(many(' \t'))
X verse := trim(tab(0), '\t \x0D')
X } else {
X # Dump the (rest of) the line onto verse.
X verse ||:= " " || ("" ~== trim(tab(0), '\t \x0D'))
X }
X }
X }
X # Flush the "verse" buffer.
X write(REplace("" ~== trim(\verse, '\t \x0D'), " ", " "))
X
X exit(0)
X
Xend
X
X
X#
X# From strings.icn in the IPL (written by Ralph Griswold).
X#
Xprocedure REplace(s1,s2,s3)
X
X local result, i
X result := ""
X i := *s2
X
X s1 ? {
X while result ||:= tab(find(s2)) do {
X result ||:= s3
X move(i)
X }
X return result || tab(0)
X }
X
Xend
SHAR_EOF
true || echo 'restore of kjv2rtv.icn failed'
rm -f _shar_wnt_.tmp
fi
# ============= convertr.icn ==============
if test -f 'convertr.icn' -a X"$1" != X"-c"; then
echo 'x - skipping convertr.icn (File already exists)'
rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting convertr.icn (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'convertr.icn' &&
X############################################################################
X#
X# Name: convertr.icn
X#
X# Title: convert KJV book chap:verse reference to a
X# writable bitmap suitable for a retrieve text-base
X# file
X#
X# Author: Richard L. Goerwitz
X#
X# Version: 1.3
X#
X############################################################################
X#
X# Links: complete.icn, ./name2num.icn
X#
X############################################################################
X
Xprocedure convertr(s)
X
X local bitmap, bookname, book_numeric, len, no
X
X no := 2
X len := 8
X bookname := ""
X bitmap := 0
X
X s ? {
X
X # Find book name, convert it to an integer.
X bookname ||:= tab(any('1234')); tab(many(' '))
X bookname ||:= tab(many(&letters++&digits)) | fail
X book_numeric := name2num(bookname) | fail
X bitmap := book_numeric || ":"
X
X # Get book and verse fields. Tack them onto bitmap.
X while tab(upto(&digits)) do {
X no -:= 1
X # If no goes below 0 then we have too many fields for the
X # file named in arg 2.
X bitmap ||:= tab(many(&digits)) || ":"
X }
X no ~= 0 & stop("convertr: impossible reference ",image(&subject))
X }
X
X # If the current no is not 0, then we have either too
X # many or too few fields.
X no = 0 | fail
X
X return trim(bitmap, ':')
X
Xend
SHAR_EOF
true || echo 'restore of convertr.icn failed'
rm -f _shar_wnt_.tmp
fi
# ============= makeind.icn ==============
if test -f 'makeind.icn' -a X"$1" != X"-c"; then
echo 'x - skipping makeind.icn (File already exists)'
rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting makeind.icn (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'makeind.icn' &&
X############################################################################
X#
X# Name: makeind.icn
X#
X# Title: makeind.icn
X#
X# Author: Richard L. Goerwitz
X#
X# Version: 1.24
X#
X############################################################################
X#
X# This file, makeind.icn, compiles into an indexing program which
X# creates a series of files offering the user rapid access to
X# individual elements (usually words) within a text file. Access is
X# gained through a set of basic retrieval utilities contained in the
X# file retrieve.icn, bmp2text.icn, retrops.icn, and others included
X# with this package. In order to be indexable, files must interleave
X# string coded bitfield-style designators with text in the following
X# manner:
X#
X# ::001:001:001
X# This is text.
X# ::001:001:002
X# This is more text.
X#
X# The lines beginning with :: (a double colon) mark bitfield-style
X# location-designators. Location designators are strings with digit
X# fields of fixed number and length separated either by nothing (as
X# in, say 001001002), or better yet by non-digits (e.g. 001:001:002).
X# NOTE WELL: The bitmaps must come in ascending order. For example,
X# if we assume three-field bitmaps, 002:001:014 would come before
X# 003:001:013. If your file is not sorted properly, then use the
X# utility, sorttxt provided as a part of this distribution.
X#
X# usage: makeind -f filename -m int -n int [-l int] [-s]
X#
X# When calling makeind, you must specify the filename to be indexed
X# (-f filename), the maximum field value (-m max-value; e.g. if
X# fields can go from 0 to 255, then -m 255 would be used), and the
X# number of fields (-n field-number). The -s switch directs makeind
X# to create a case-sensitive index. The default is case-insensitive.
X# -l [int] tells makeind to create a .LIM file, which is only needed
X# if you want to retrieve text by location marker, and not just via
X# the index (for this, you'll need something to translate human-
X# readable references into retrieve's native format).
X#
X# BUGS: This indexing routine is going to eat up a _tremendous_
X# amount of memory when used on large files, since every token in the
X# input file gets its own entry in wordtbl, and each entry gets a set
X# as its corresponding key. If you don't have the memory, then you
X# could use strings instead of sets (the insert routines will be just
X# a tiny bit more complicated). Intermediate files could also be
X# used. Drop me a line if you want help. Otherwise, make sure you
X# have at *least* two megabytes core for every megabyte of text in
X# the file you wish to index (or else a very, very good virtual
X# memory management system).
X#
X# NOTE: The -S [field-sep] option is currently disabled because using
X# it slows things down drastically. If you want to be able to
X# specify what separator to use when breaking files down into
X# individual words, consult ./gettokens.icn.
X#
X############################################################################
X#
X# Links: options.icn, codeobj.icn, ./indexutl.icn ./gettokens.icn
X#
X# See also: retrieve.icn, bmp2text.icn, expandrf.icn
X#
X############################################################################
X
X# IPL files to be linked in at compile time.
Xlink options, codeobj
X
X# Global variable (for OS-dependencies).
X# global IS # declared in indexutl.icn
X
X# Is is a record containing vital information on an indexed file, such
X# as the field separator, the string-length of fields, etc. I've re-
X# moved the record declaration from this file, and placed it in index-
X# utl.icn.
X# record is(FS, s_len, len, no, is_case_sensitive, r_field)
X
X#
X# Main procedure.
X#
Xprocedure main(a)
X
X local usage, opt_table, fname, rollover_field, index_fname,
X bitmap_fname, upto_field, bofname, bitmap_offset_table,
X out_IS, limits_fname
X # global IS # IS contains stats for file being indexed
X
X #
X # Initialize global OS-related parameters, such as the directory
X # separator (_slash) and the maximum permissible filename length
X # minus four (to make room for extensions makeind tacks on).
X #
X initialize_os_params()
X
X #
X # Read in and check command argument list. Insert FS and no
X # parameters into (global) record IS. Calculate s_len, len, and
X # bitmap_length parameters as well. Returns table of options
SHAR_EOF
true || echo 'restore of makeind.icn failed'
fi
echo 'End of part 8'
echo 'File makeind.icn is continued in part 9'
echo 9 > _shar_seq_.tmp
exit 0
--
-Richard L. Goerwitz goer%sophist@uchicago.bitnet
goer@sophist.uchicago.edu rutgers!oddjob!gide!sophist!goer