home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Source Code 1992 March
/
Source_Code_CD-ROM_Walnut_Creek_March_1992.iso
/
usenet
/
altsrcs
/
3
/
3585
< prev
next >
Wrap
Text File
|
1991-07-02
|
20KB
|
639 lines
Newsgroups: alt.sources
From: goer@ellis.uchicago.edu (Richard L. Goerwitz)
Subject: kjv browser, part 9 of 11
Message-ID: <1991Jul3.065253.28409@midway.uchicago.edu>
Date: Wed, 3 Jul 1991 06:52:53 GMT
---- Cut Here and feed the following to sh ----
#!/bin/sh
# this is bibleref.09 (part 9 of a multipart archive)
# do not concatenate these parts, unpack them in order with /bin/sh
# file makeind.icn continued
#
if test ! -r _shar_seq_.tmp; then
echo 'Please unpack part 1 first!'
exit 1
fi
(read Scheck
if test "$Scheck" != 9; then
echo Please unpack part "$Scheck" next!
exit 1
else
exit 0
fi
) < _shar_seq_.tmp || exit 1
if test ! -f _shar_wnt_.tmp; then
echo 'x - still skipping makeind.icn'
else
echo 'x - continuing file makeind.icn'
sed 's/^X//' << 'SHAR_EOF' >> 'makeind.icn' &&
X # (keys are option letters).
X #
X usage:= "usage: makeind -f filename -m int -n int [-l int] [-s]"
X opt_table := initialize_IS(a)
X fname := \opt_table["f"] | stop(usage)
X rollover_field := opt_table["l"] # (optional)
X
X #
X # Begin the process of tokenizing, recording token locations, and
X # of storing this information in two separate files.
X #
X # Read input file, making a table of words and their locations.
X index_table := create_index(fname)
X
X #
X # Write keys to one file, with pointers into another file
X # containing the bitmaps for each key.
X #
X index_fname := dir_name(fname)||create_fname(fname, "IND")
X bitmap_fname := dir_name(fname)||create_fname(fname, "BMP")
X write_tokens_and_offsets(index_fname, bitmap_fname, index_table)
X
X #
X # Re-open fname and store the locations for each chunk of text
X # marked by a ::location marker. This could certainly be
X # incorporated into the indexing routines, but only at the great
X # expense of clarity.
X #
X upto_field := 1 < (IS.no * 2) / 3 | 1
X bofname := dir_name(fname)||create_fname(fname, "OFS")
X bitmap_offset_table :=
X store_bitmaps_and_offsets(fname, upto_field)
X # store in .OFS file
X write_bitmaps_and_offsets(bofname, bitmap_offset_table, upto_field)
X
X #
X # Re-open fname again, and store the pre-rollover bitmaps in the
X # .LIM file. Obviously this procedure could be stuffed into
X # another one above (e.g. store_bitmaps_and_offsets()).
X #
X if \rollover_field then {
X #
X # Let's say we are using the Bible as our text, and we want to
X # create all the bitmaps for Genesis 1:9-2:10. We need to know
X # what verse chapter 1 goes up to. By supplying makeind
X # with a "-l 3" argument, you are telling it to store this in-
X # formation for later use by expandrf().
X #
X limits_fname := dir_name(fname)||create_fname(fname, "LIM")
X write_limits(limits_fname, fname, rollover_field)
X IS.r_field := rollover_field
X }
X
X #
X # Write IS record to the .IS file.
X #
X out_IS := open(dir_name(fname)||create_fname(fname, "IS"), "w") |
X abort("makeind","can't open .IS file",2)
X writes(out_IS, encode(IS))
X close(out_IS)
X
X # All is well. Exit with zero status.
X exit(0)
X
Xend
X
X
X#
X# initialize_IS
X#
X# Sets up main parameters for the current index file, such as the
X# field separator to be used in tokenizing the file, the string and
X# bit lengths of bitmap fields, the number of fields, and the size of
X# the actual bitmaps (in bytes) as written to disk (comes out to the
X# smallest multiple of eight greater than the field length times the
X# field number. The marker length has to be set in the main
X# procedure, so initialize_IS leaves it null for now.
X#
Xprocedure initialize_IS(a)
X
X local usage, fname, opt_table
X # global IS
X
X usage:="usage: makeind -f filename -m int -n int [-l int] [-s]"
X
X IS := is() # set up some IS fields
X opt_table := options(a, "f:m:n+sS:l+")
X 3 <= *opt_table <= 6 | stop(usage)
X IS.no := \opt_table["n"] | stop(usage)
X IS.FS := \opt_table["S"] | "['.]?[^-0-9A-Za-z']+'?"
X IS.is_case_sensitive := opt_table["s"] # normally is &null
X
X #
X # Calculate string representation length for fields, as well as
X # the number of bits required for their integer representation.
X # I.e. if the opt_table["m"] value is 99, this will take two chars to
X # represent as a string ("99"), but 7 binary "digits" to represent
X # internally as a base-two integer.
X #
X IS.s_len := *string(opt_table["m"])
X IS.len := *exbase10(opt_table["m"], 2)
X
X return opt_table
X
Xend
X
X
X#
X# create_index
X#
X# Creates a table containing all tokens in the file fname, with the
X# set of each token's locations recorded as values for those tokens.
X# IS.FS is a nawk-style field separator regular expression.
X# If &null, defaults to ~(&digits++&letters). IS.s_len
X# is the location marker string-representation field length. Index_
X# stats.len is the number of binary digits needed for an
X# integer representation of a given field. IS.no is
X# the number of fields.
X#
Xprocedure create_index(fname)
X
X local intext, wordtbl, line, bitmap, token
X
X intext := open(fname) |
X abort("create_index","can't open index file, "||fname, 9)
X wordtbl := table()
X
X while line := read(intext) do {
X line ? {
X if ="::" then {
X bitmap := digits_2_bitmap(tab(0)) # in indexutl.icn
X } else {
X # gettokens() resides in a separate file, gettokens.icn
X every token := gettokens(IS.is_case_sensitive) do {
X /wordtbl[token] := set()
X insert(wordtbl[token], \bitmap) |
X abort("create_index","text before location-marker",8)
X }
X }
X }
X }
X \line | abort("create_index", "empty input file, "||fname, 8)
X close(intext)
X return wordtbl
X
Xend
X
X
X#
X# write_tokens_and_offsets
X#
X# Writes to one file a list of all tokens collected from the input
X# file, one to a line, followed by a tab, and then a byte offset into
X# another file where the bitmaps for that token are kept.
X#
X# token tab offset
X#
X# A seek to "offset" in the bitmap file will put you at the start of a
X# block of bitmaps.
X#
Xprocedure write_tokens_and_offsets(index_fname, bitmap_fname, t)
X
X local outtokens, outbitmaps, index_lst, i, bitmap_length, bitmap
X
X outtokens := open(index_fname, "w") |
X abort("write_tokens_and_offsets","can't open "||index_fname,6)
X outbitmaps := open(bitmap_fname, "w") |
X abort("write_tokens_and_offsets","can't open "||bitmap_fname,5)
X # Calculate the length of bitmaps (must be the smallest multiple of
X # 8 >= (IS.len * IS.no)).
X bitmap_length := ((IS.len * IS.no) <= seq(0,8))
X index_lst := sort(t, 3)
X
X every i := 1 to *index_lst-1 by 2 do {
X
X # Write token to index file with the offset of that token's
X # bitmaps in the bitmap file.
X write(outtokens, index_lst[i], "\t", where(outbitmaps))
X
X # Now write the bitmaps for the above token to the bitmap file.
X # First write out the number of bitmaps in this block. Two bytes
X # are allotted to hold this count (16 bits).
X if *index_lst[i+1] > 65535 then { # just in case
X abort("write_tokens_and_offsets",
X "too many bitmaps for"||index_lst[i], 16)
X }
X write_int(outbitmaps, *index_lst[i+1], 16)
X # Having written the bitmap count, now write the bitmaps proper
X # to the bitmap file.
X every write_int(outbitmaps, !index_lst[i+1], bitmap_length)
X }
X
X # Close files. Return number of keys processed (any better ideas??)
X every close(outtokens | outbitmaps)
X return *index_lst / 2 # return number of keys in index file
X
Xend
X
X
X
X#
X# store_bitmaps_and_offsets
X#
X# Runs through the file called fname, finding all the location
X# markers, and recording the offset of the text they precede. Writes
X# bitmap : offset pairs to a .ofs file. Note that the full bitmap is
X# not stored. Rather only the first upto_field fields are stored.
X# Normally upto_field = IS.no - 1.
X#
Xprocedure store_bitmaps_and_offsets(fname, upto_field)
X
X local intext, current_location, last_major_division,
X major_division, bitmap_offset_table
X
X intext := open(fname) |
X abort("store_bitmaps_and_offsets","can't open "||fname,5)
X bitmap_offset_table := table()
X
X while (current_location := where(intext), line := read(intext)) do {
X line ? {
X if ="::" then {
X major_division :=
X ishift(digits_2_bitmap(tab(0)), # in indexutl.icn
X -((IS.no - upto_field) * IS.len))
X if \last_major_division = major_division then
X next
X else {
X insert(
X bitmap_offset_table, major_division, current_location)
X last_major_division := major_division
X }
X }
X }
X }
X
X return bitmap_offset_table
X
Xend
X
X
X#
X# write_bitmaps_and_offsets
X#
X# Does the actual writing of bitmaps and offsets to a file. Receives
X# a table of bitmaps cut down to upto_field fields. Shinking the
X# bitmaps lessens the size of the resulting file, but requires a bit
X# more I/O when it comes time to look something up.
X#
Xprocedure write_bitmaps_and_offsets(bofname, t, upto_field)
X
X local outtext, tmp_list, i, offset_length,
X block_size, stored_bitmap_length
X
X outtext := open(bofname, "w") |
X abort("write_bitmaps_and_offsets","can't open "||bofname,5)
X stored_bitmap_length := ((IS.len * upto_field) <= seq(0,8))
X tmp_list := sort(t, 3)
X
X every i := 1 to *tmp_list-1 by 2 do {
X
X # Number of bits needed to hold offset.
X offset_length := (*exbase10(tmp_list[i+1], 2) <= seq(0,8))
X # Number of bytes needed to hold bitmap and offset (both).
X block_size := (stored_bitmap_length + offset_length) / 8
X
X # We could just code the length of the offset, since the bitmap's
X # length is fixed (and known). Seems better to code the block's
X # total length just in case something gets screwed up. An 8-bit
X # limit means the bitmap+offset length cannot exceed 2^9-1 (255)
X # characters.
X if block_size > 255 then
X abort("write_bitmaps_and_offsets","bitmap+offset too big",15)
X write_int(outtext, block_size, 8)
X write_int(outtext, tmp_list[i], stored_bitmap_length)
X write_int(outtext, tmp_list[i+1], offset_length)
X
X }
X
X return
X
Xend
X
X#
X# write_limits
X#
X# Writes out the bitmaps that will be needed in order for expandrf()
X# to be able to know when the rollover field rolls over.
X#
Xprocedure write_limits(out_fname, in_fname, r_field)
X
X local in, out, shift_bits_out, bitmap_length, bitmaps_read,
X line, bitmap, short_bitmap, old_bitmap
X
X in := open(in_fname) |
X abort("write_limits","can't open "||in_fname,5)
X out := open(out_fname, "w") |
X abort("write_limits","can't open "||out_fname,5)
X r_field <= IS.no |
X abort("write_limits","-l value should not exceed that of -n",50)
X shift_bits_out := -(((IS.no-r_field)+ 1) * IS.len)
X bitmap_length := ((IS.len * IS.no) <= seq(0,8))
X bitmaps_read := 0
X
X while line := read(in) do {
X line ? {
X if ="::" then {
X bitmaps_read +:= 1
X bitmap := digits_2_bitmap(tab(0)) # in indexutl.icn
X short_bitmap := ishift(bitmap, shift_bits_out)
X if ishift(\old_bitmap, shift_bits_out) ~== short_bitmap
X then write_int(out, old_bitmap, bitmap_length)
X old_bitmap := bitmap
X }
X }
X }
X
X write_int(out, \old_bitmap, bitmap_length)
X every close(in | out)
X return bitmaps_read
X
Xend
SHAR_EOF
echo 'File makeind.icn is complete' &&
true || echo 'restore of makeind.icn failed'
rm -f _shar_wnt_.tmp
fi
# ============= gettokens.icn ==============
if test -f 'gettokens.icn' -a X"$1" != X"-c"; then
echo 'x - skipping gettokens.icn (File already exists)'
rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting gettokens.icn (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'gettokens.icn' &&
X############################################################################
X#
X# Name: gettokens.icn
X#
X# Title: get tokens from text-base file
X#
X# Author: Richard L. Goerwitz
X#
X# Version: 1.2
X#
X############################################################################
X#
X# Tokenizing routine used by makeind.icn to create index.
X#
X############################################################################
X#
X# See also: ./makeind.icn
X#
X#############################################################################
X
X# declared in ./indexutl.icn (q.v.)
X# global IS
X#
X# One idea for gettokens, good for small indices. Uses field separator
X# (IS.FS). Also uses (slow) findre. Farther below is a less flexible
X# version of gettokens which runs faster.
X#
X#procedure gettokens(is_case_sensitive)
X#
X# # Used within a scanning expression. Returns tokens in
X# # &subject[&pos:0] (&pos normally = 1). Tokens are stretches of
X# # text separated by the IS.FS field separator. This
X# # field separator is a nawk style FS regular expression. If null,
X# # it gets defined as ~(&digits++&letters).
X#
X# local token
X# static non_alphanums
X# initial non_alphanums := ~(&digits ++ &letters ++ '-')
X#
X# /IS.FS := non_alphanums
X#
X# while token := tab(findre(IS.FS)) do {
X# tab(__endpoint)
X# tab(many('\'')) # unfortunate by-product of findre's weakness
X# if \is_case_sensitive
X# then suspend "" ~== trim(token,'\t ')
X# else suspend map("" ~== trim(token,'\t '))
X# }
X#
X# # Return the rest of &subject. Even though we're not tabbing
X# # upto FS, this is normally what the user intends.
X# if \is_case_sensitive
X# then return "" ~== trim(tab(0),'\t ')
X# else return map("" ~== trim(tab(0),'\t '))
X#
X#end
X
Xprocedure gettokens(is_case_sensitive)
X
X # Used within a scanning expression. Returns tokens in
X # &subject[&pos:0] (&pos normally = 1). Tokens are stretches of
X # text separated by an optional apostrophe or dash, then any
X # stretch of non-alphanumeric characters, then an optional apos-
X # trophe.
X
X local token
X static alphanums, wordchars
X initial {
X alphanums := &digits ++ &letters ++ '-'
X wordchars := alphanums ++ '\''
X }
X
X tab(upto(alphanums))
X while token := tab(many(wordchars)) do {
X if \is_case_sensitive
X then suspend "" ~== trim(token,'\t \'-')
X else suspend map("" ~== trim(token,'\t \'-'))
X tab(upto(alphanums))
X }
X
Xend
SHAR_EOF
true || echo 'restore of gettokens.icn failed'
rm -f _shar_wnt_.tmp
fi
# ============= Makefile.dist ==============
if test -f 'Makefile.dist' -a X"$1" != X"-c"; then
echo 'x - skipping Makefile.dist (File already exists)'
rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting Makefile.dist (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'Makefile.dist' &&
X##########################################################################
X#
X# Makefile.dist for bibleref.
X#
X##########################################################################
X#
X# User-modifiable section. Read carefully! You will almost
X# certainly have to change some settings here.
X#
X
X#
X# Destination directory for binaries; library directory for auxiliary
X# files. Owner and group for public executables. Leave the trailing
X# slash off of directory names.
X#
XDESTDIR = /usr/local/bin
X# DESTDIR = $(HOME)/bin
XLIBDIR = /usr/local/lib/$(PROGNAME)
X# LIBDIR = $(HOME)/$(PROGNAME)
X# LIBDIR = /usr/local/share/lib/$(PROGNAME)
XOWNER = root #bin
XGROUP = root #bin
X
X#
X# Name of your icon compiler and compiler flags.
X#
XICONC = /usr/icon/v8/bin/icont
XIFLAGS = -Sc 200 -Si 1000 -Sn 2000 -SF 30
X
X#
X# Names of KJV files as packaged in the PC-SIG disk set (19 discs).
X# Mine were snarfed from helens.stanford.edu (36.0.2.99) as kjv.tar.Z.
X# You will need to link these to the current directory. Please don't
X# copy them all over, or if you do, be sure to delete them afterwards.
X# They aren't needed after you are done indexing.
X#
XRAWFILES = gen.txt exo.txt lev.txt num.txt deu.txt jos.txt jdg.txt \
X rth.txt sa1.txt sa2.txt ki1.txt ki2.txt ch1.txt ch2.txt \
X ezr.txt neh.txt est.txt job.txt psa.txt pro.txt ecc.txt \
X son.txt isa.txt jer.txt lam.txt eze.txt dan.txt hos.txt \
X joe.txt amo.txt oba.txt jon.txt mic.txt nah.txt hab.txt \
X zep.txt hag.txt zec.txt mal.txt mat.txt mar.txt luk.txt \
X joh.txt act.txt rom.txt co1.txt co2.txt gal.txt eph.txt \
X phi.txt col.txt th1.txt th2.txt ti1.txt ti2.txt tit.txt \
X phm.txt heb.txt jam.txt pe1.txt pe2.txt jo1.txt jo2.txt \
X jo3.txt jud.txt rev.txt
X#
X# If you have your KJV in a single file, that's fine. Just be sure
X# the books are in their correct order (as above), and are in the PC-SIG
X# disk-set format.
X# RAWFILES = ./kjv.Z
X
X#
X# If you've compressed your KJV file(s), use zcat; otherwise use cat.
X#
XCAT = cat
X# CAT = zcat
X
X#
X# Change these only if you're pretty sure of what you're doing.
X#
XSHELL = /bin/sh
XMAKE = make
X
X
X###########################################################################
X#
X# Don't change anything below this line.
X#
X
XRTVFILE = kjv.rtv
X
XCONVERTER = kjv2rtv
XCONVERTSRC = $(CONVERTER).icn convertr.icn name2num.icn complete.icn
X
XINDEXER = makeind
XINDEXSRC = $(INDEXER).icn gettokens.icn indexutl.icn
X
XDUMMY_FILE = index.done
XPROGNAME = bibleref
X
XSEARCHSRC = $(PROGNAME).icn ref2bmap.icn name2num.icn convertb.icn \
X listutil.icn passutil.icn srchutil.icn complete.icn \
X ipause.icn rewrap.icn binsrch.icn bmp2text.icn initfile.icn \
X retrieve.icn indexutl.icn retrops.icn whatnext.icn iolib.icn \
X iscreen.icn findre.icn
X
Xall: $(DUMMY_FILE) $(PROGNAME)
X
X$(DUMMY_FILE):
X @echo ""
X @echo "This may take a while (about 1 minute/MB on a Sun4)."
X @echo ""
X @sleep 2
X $(ICONC) $(IFLAGS) -o $(CONVERTER) $(CONVERTSRC)
X $(CAT) $(RAWFILES) | $(CONVERTER) > $(RTVFILE)
X @echo ""
X @echo "This may take a long time (c. 20 min./MB on a Sun4)."
X @echo "Kids, don't even *think* of trying this at home."
X @echo ""
X @sleep 2
X $(ICONC) $(IFLAGS) -o $(INDEXER) $(INDEXSRC)
X $(INDEXER) -f $(RTVFILE) -m 200 -n 3 -l 3
X touch $(DUMMY_FILE)
X
X$(PROGNAME): $(SEARCHSRC)
X $(ICONC) $(IFLAGS) -o $(PROGNAME) $(SEARCHSRC)
X
X$(PROGNAME).icn: $(PROGNAME).src
X sed "s|/usr/local/lib/bibleref/kjv.rtv|$(LIBDIR)/$(RTVFILE)|" $(PROGNAME).src > $(PROGNAME).icn
X
X$(CONVERTER): $(CONVERTSRC)
X $(ICONC) $(IFLAGS) -o $(CONVERTER) $(CONVERTSRC)
X
X$(INDEXER): $(INDEXSRC)
X $(ICONC) $(IFLAGS) -o $(INDEXER) $(INDEXSRC)
X
X
X##########################################################################
X#
X# Pseudo-target names (install, clean, clobber)
X#
X
X# Pessimistic assumptions regarding the environment (in particular,
X# I don't assume you have the BSD "install" shell script).
Xinstall: all
X -test -d $(DESTDIR) || mkdir $(DESTDIR) && chmod 755 $(DESTDIR)
X cp $(PROGNAME) $(DESTDIR)/$(PROGNAME)
X chgrp $(GROUP) $(DESTDIR)/$(PROGNAME)
X chown $(OWNER) $(DESTDIR)/$(PROGNAME)
X -test -d $(LIBDIR) || mkdir $(LIBDIR) && chmod 755 $(LIBDIR)
X mv xxx* $(RTVFILE) $(LIBDIR)/
X chgrp $(GROUP) $(LIBDIR)
X chown $(OWNER) $(LIBDIR)
X chgrp $(GROUP) $(LIBDIR)/xxx* $(LIBDIR)/$(RTVFILE)
X chown $(OWNER) $(LIBDIR)/xxx* $(LIBDIR)/$(RTVFILE)
X @echo ""
X @echo "Done."
X @echo ""
X
X#
X# For storing the pre-indexed files. All that needs to be done here
X# is to unpack the archive on another machine, and make $(PROGNAME).
X#
Xtar: all
X tar -cf ./$(PROGNAME).tar $(PROGNAME).src $(DUMMY_FILE) $(AUXILSRC) \
X Makefile.dist README
X
X#
X# Cleanup
X#
Xclean:
X rm -f $(CONVERTER) $(INDEXER) $(PROGNAME)
X
X# Be careful; use this target, and you'll be back to square one.
Xclobber: clean
X @echo "Okay, you asked for it."
X rm -f $(RAWFILES) xxx*.??? $(RTVFILE) $(DUMMY_FILE) $(PROGNAME).icn
SHAR_EOF
true || echo 'restore of Makefile.dist failed'
rm -f _shar_wnt_.tmp
fi
# ============= README ==============
if test -f 'README' -a X"$1" != X"-c"; then
echo 'x - skipping README (File already exists)'
rm -f _shar_wnt_.tmp
else
> _shar_wnt_.tmp
echo 'x - extracting README (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'README' &&
X--------
SHAR_EOF
true || echo 'restore of README failed'
fi
echo 'End of part 9'
echo 'File README is continued in part 10'
echo 10 > _shar_seq_.tmp
exit 0
--
-Richard L. Goerwitz goer%sophist@uchicago.bitnet
goer@sophist.uchicago.edu rutgers!oddjob!gide!sophist!goer