home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Source Code 1992 March
/
Source_Code_CD-ROM_Walnut_Creek_March_1992.iso
/
usenet
/
altsrcs
/
1
/
1200
< prev
next >
Wrap
Internet Message Format
|
1990-12-28
|
15KB
From: scott@cs.odu.edu (Scott Yelich)
Newsgroups: comp.sources.wanted,comp.lang.c,alt.sources
Subject: Re: key word searches in text files
Message-ID: <SCOTT.90Apr18133215@croaker.cs.odu.edu>
Date: 18 Apr 90 17:32:15 GMT
>
> We maintain a large mass of text files on the board and I would like
> for a caller to able to look for key works in those files WITHOUT having
> to use [e]grep to bang away at the files... This is the method we're
> currently using. It works, but is slow not to mention the wear and
> tear on the HD.
>
> And there any programs (or suggestions) that will take a text file and
> create a key-word list, ignore the usual non-essential words (and,
> the, at , etc)?
I call this program ``lookfor'' and I use it on my text file databases...
I was going to submit it later this year... perhaps you could modify
it to your liking...
Currently this program is used to keep an ALTERNATE database of
``help'' files for consultants. It works off a file such as this:
------------------------------------------------------------------------------
SUBJECT : Scott D. Yelich
TITLE : Scott D. Yelich
INFO : Wednesday- September 6, 1989 | Wednesday- September 6, 1989 [scott]
KEY WORDS: scott d. yelich information mail call lives house home where why
when etc
Permanent mailing address:
Scott D. Yelich
4 Kensington Ct.
Williamsburg, Va. 23185
(804) 565-1811
------------------------------------------------------------------------------
Of course, it is currently configured for OUR local system... but it
should work without too many problems on almost any unix system.
I am currently working on rw-writing this to make the code faster.
Have fun!
-----------------------------------------------------------------------------
Scott D. Yelich scott@cs.odu.edu [128.82.8.1]
After he pushed me off the cliff, he asked me, as I fell, ``Why'd you jump?''
Administrator of: Game design requests to <game-design-request@cs.odu.edu>
-----------------------------------------------------------------------------
#!/bin/sh
# This is a shell archive.
# Run the file through sh to extract its contents.
# shar: Shell Archiver
# Run the following text with /bin/sh to create:
# ./lookfor
# This archive created: Wed Apr 18 13:26:37 1990
cat << \SHAR_EOF > ./lookfor
#!/bin/sh
# +--------------------------------------------------------------------------+
# | Scott D. Yelich xanth.uucp |
# | C.S. Department scott@cs.odu.edu |
# | Old Dominion University scott@xanth.cs.odu.edu |
# | Norfolk, VA 23529-0612 [ 128.82.8.1 ] |
# +--------------------------------------------------------------------------+
# +--------------------------------------------------------------------------+
# | Copyright 1989 Scott D. Yelich. All rights reserved. Last Mod: 8/8/89 |
# +--------------------------------------------------------------------------+
INITIALIZE_VARIABLES ()
{
#
# Initialize external executable paths (Who needs a path variable anyway!).
#
PATH=
LS=/bin/ls
TR=/bin/tr
CP=/bin/cp
MV=/bin/mv
RM=/bin/rm
AWK=/bin/awk
PWD=/bin/pwd
CAT=/bin/cat
SED=/bin/sed
EXPR=/bin/expr
SORT=/bin/sort
ECHO=/bin/echo
DIFF=/bin/diff
SORT=/bin/sort
TAIL=/usr/ucb/tail
MAIL=/usr/ucb/mail
EGREP=/bin/egrep
COLRM=/usr/ucb/colrm
HELPMAN=/usr/ucb/man
YPWHICH=/bin/ypwhich
BASENAME=/bin/basename
#
# Initialize internal variables and flags...
#
SHOW=
SYNC=
TSKGI=
NAIVE=
OUTPUT=
LOCATE=
HEADER=
MANUAL=
VERBOSE=
OPTIONS=
ADD_FILE=
CONFIGURE=
INITIALIZE=
UPDATE_FILE=
MATCH_WORDS=
FIND_KEYWORD=
LIST_KEYWORDS=
#
# Initialize others and miscellaneous...
#
CR="
UNIQ=$$
TMP=/tmp
NULL=/dev/null
umask 117
TABS=" "
INFO=":INFO :"
TITLE=":TITLE :"
SUBJECT=":SUBJECT :"
KEYWORD=":KEY WORDS:"
WHOIAM=`/usr/ucb/whoami`
SERVERS="helios yucca tuna nansen granite hengest"
MASTER_LIB=/usr/amon-re/local/lib/sysman
MASTER_KEYWORDS=$MASTER_LIB/KEYWORDS
MASTER_FILENAMES=$MASTER_LIB/FILENAMES
NFSERVER=`$YPWHICH | $TR "." " " | $AWK '{print $1}'`
LIB=/usr/$NFSERVER/local/lib/sysman
KEYWORDS=$LIB/KEYWORDS
FILENAMES=$LIB/FILENAMES
PWD_DIRECTORY=`$PWD`
TEMP_1=$TMP/"$UPROGNAME"_"$UNIQ"_TEMP_1
TEMP_2=$TMP/"$UPROGNAME"_"$UNIQ"_TEMP_2
TEMP_3=$TMP/"$UPROGNAME"_"$UNIQ"_TEMP_3
for VAR in $VARS
do
OPTIONS=No
case $OPTION in
"-add" |"-add" |"-a") ADD_FILE=$VAR ;OPTIONS=Yes ;;
"-list" |"-list" |"-l") SHOW=Yes ;OPTIONS= ;;
"-help" |"-help" |"-h") MANUAL=Yes ;OPTIONS= ;;
"-find" |"-find" |"-f") FIND_KEYWORD=Yes ;OPTIONS= ;;
"-guru" |"-guru" |"-g") TSKGI="I $TSKGI" ;OPTIONS= ;;
"-info" |"-info" |"-i") TSKGI="I $TSKGI" ;OPTIONS= ;;
"-help" |"-help" |"-h") MANUAL=Yes ;OPTIONS= ;;
"-match" |"-match"|"-m") MATCH_WORDS=Yes ;OPTIONS= ;;
"-title" |"-title"|"-t") TSKGI="T $TSKGI" ;OPTIONS= ;;
"-update" |"-up" |"-u") UPDATE_FILE=$MASTER_LIB/$VAR;OPTIONS=Yes ;;
"-output" |"-out" |"-o") OUTPUT=$VAR ;OPTIONS=Yes ;;
"-subject" |"-sub" |"-s") TSKGI="S $TSKGI" ;OPTIONS= ;;
"-keyword" |"-key" |"-k") TSKGI="K $TSKGI" ;OPTIONS= ;;
"-verbose" |"-verb" |"-v") VERBOSE=Yes ;OPTIONS= ;;
"-database" |"-data" |"-d") LIB=$VAR ;OPTIONS=Yes ;;
"-configure" |"-conf" |"-c") CONFIGURE=Yes ;OPTIONS= ;;
"-elaborate" |"-elab" |"-e") LOCATE="$LOCATE$VAR " ;OPTIONS=Yes ;;
"--conf" |"--c" ) CONFIGURE=No ;OPTIONS= ;;
"-sync" |"-synk" ) SYNC=Yes ;OPTIONS= ;;
"-naive" |"-naive"|"-n") NAIVE=Yes ;OPTIONS= ;;
esac
if [ "$OPTIONS" = "No" ]; then
if [ "$OPTION" -a "$OPTION" != "$UPROGNAME" ]; then
LOCATE="$LOCATE$OPTION "
fi
fi
if [ "$OPTIONS" != "Yes" ]; then
OPTION=$VAR
else
OPTION=
fi
done
if [ -z "$PAGER" ]; then
PAGER=/usr/new/less
fi
if [ -z "$EDITOR" ]; then
EDITOR="/usr/new/emacs -nw"
fi
if [ ! "$LOCATE" ]; then
MANUAL=Yes
fi
if [ ! "$TSKGI" ]; then
TSKGI="K"
fi
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Executing."
$ECHO "$UPROGNAME: Initializing."
fi
: ; }
MANPAGE ()
{
#
# Function: MAN
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Checking variables for displaying manual page."
fi
if [ "$MANUAL" ]; then
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Displaying the manual page for $UPROGNAME."
fi
$HELPMAN $LPROGNAME
exit 0
fi
: ; }
NOTIFY ()
{
#
# Function: NOTIFY
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Notify systems."
fi
$MAIL -s "Sysman updated: $FILENAME" system <<EOF_NOTIFY >$NULL
I updated sysman...
Here is the new "$FILENAME", please take a look at it.
If you want to change this file, use "sysman -v -u $FILENAME"
Thank you.
---8<--[ CUT HERE ]----------------------------------------------------------
~r $UPDATE_FILE
EOF_NOTIFY
: ; }
INSTALL ()
{
#
# Function: INSTALL
#
if [ ! "$FILENAME" ]; then
FILENAME=`$BASENAME $UPDATE_FILE`
fi
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Installing- $FILENAME"
fi
for MACHINE in $SERVERS
do
$CP $UPDATE_FILE /usr/$MACHINE/local/lib/sysman/$FILENAME
done
: ; }
ADD ()
{
#
# Function: ADD
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Add $ADD_FILE"
fi
if [ "$ADD_FILE" ]; then
FILE=`$BASENAME $ADD_FILE`
DUMMY=`($LS $MASTER_LIB/$FILE.* ) 2>$NULL | $TR "." " " | $SORT -n +1.0 | $TAIL -1 | $AWK '{print $2}'`
APPENDAGE=`$EXPR 0$DUMMY + 1`
FILENAME=$FILE.$APPENDAGE
UPDATE_FILE=$MASTER_LIB/$FILENAME
$CAT >$UPDATE_FILE <<EOF
SUBJECT : Subject
TITLE : Title
INFO : WeekDay- Month Day, Year | WeekDay- Month Day, Year [Guru]
KEY WORDS: KeyWords
EOF
$CAT $ADD_FILE >>$UPDATE_FILE 2>$NULL
if [ "$CONFIGURE" != "No" ]; then
CONFIGURE=Yes
fi
fi
: ; }
UPDATE ()
{
#
# Function: UPDATE
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Update $UPDATE_FILE"
fi
if [ "$UPDATE_FILE" -o "$ADD_FILE" ]; then
if [ -w "$UPDATE_FILE" ]; then
$EDITOR $UPDATE_FILE
$RM -rf $MASTER_LIB/*~
INSTALL
NOTIFY
if [ "$CONFIGURE" != "No" ]; then
CONFIGURE=Yes
fi
else
$ECHO "$UPROGNAME*ERROR: File \"$UPDATE_FILE\" does not exist!"
exit 1
fi
fi
: ; }
SYNCRONIZE ()
{
#
# Function: SYNCRONIZE
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Syncronizing databases..."
fi
if [ "$SYNC" ]; then
$RM -rf $TEMP_3
$LS $MASTER_LIB >$TEMP_1
for MACHINE in $SERVERS
do
$LS /usr/$MACHINE/local/lib/sysman >$TEMP_2
$DIFF $TEMP_1 $TEMP_2 | $EGREP \< | $AWK '{print $2}' >>$TEMP_3
done
$SORT -u $TEMP_3 >$TEMP_1
for FILENAME in `$CAT $TEMP_1`
do
UPDATE_FILE=$MASTER_LIB/$FILENAME
INSTALL
done
exit 0
fi
: ; }
MAKE_DATA ()
{
#
# Function: MAKE_DATA
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Checking master database for files."
fi
#
# CONFIGURE the database?
#
if [ "$CONFIGURE" = "Yes" ]; then
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Creating new database information-- PLEASE WAIT..."
fi
cd $MASTER_LIB
$RM -rf $MASTER_KEYWORDS
$RM -rf $MASTER_FILENAMES
$EGREP "^KEY WORD|^SUBJECT|^TITLE|^INFO" * >$MASTER_FILENAMES
#
# I would think it would be possible to speed up this next loop somehow.
# Old runtime: 15 minutes...
# New runtime: 32 seconds!!!
#
$EGREP -h "^KEY WORD" * >$TEMP_1
$TR -cs A-Za-z '\012' <$TEMP_1 >$TEMP_2
$SORT -u $TEMP_2 >$MASTER_KEYWORDS
UPDATE_FILE=$MASTER_KEYWORDS
FILENAME=`$BASENAME $UPDATE_FILE`
INSTALL
UPDATE_FILE=$MASTER_FILENAMES
FILENAME=`$BASENAME $UPDATE_FILE`
INSTALL
exit 0
else
if [ "$CONFIGURE" = "No" ]; then
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: --conf. Exiting."
fi
exit 0
fi
fi
: ; }
LIST ()
{
#
# Function: LIST
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: List."
fi
$RM -rf $TMP/$UPROGNAME*
for KEY in $TSKGI
do
case $KEY in
"T") $EGREP "$TITLE" $FILENAMES >>$TEMP_1 ;;
"S") $EGREP "$SUBJECT" $FILENAMES >>$TEMP_1 ;;
"K") $EGREP "$KEYWORD" $FILENAMES >>$TEMP_1 ;;
"G") $EGREP "$INFO" $FILENAMES >>$TEMP_1 ;;
"I") $EGREP "$INFO" $FILENAMES >>$TEMP_1 ;;
esac
done
MATCH_KEY=`$ECHO $LOCATE | $TR " " "|"`
if [ "$MATCH_KEY" ]; then
$EGREP -i $MATCH_KEY $TEMP_1 >$TEMP_2
else
$MV $TEMP_1 $TEMP_2
fi
: ; }
CHECK ()
{
#
# Function: CHECK
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Matching: "$LOCATE
fi
BOGUS=
VALID=
NOT_VALID=
for WORD in $LOCATE
do
if [ ! "`$EGREP -i $WORD $FILENAMES | $COLRM 81 999`" ]; then
$ECHO "$UPROGNAME*WARNING: Word \""$WORD"\" is not recognized [IGNORED]."
NOT_VALID="$NOT_VALID$WORD "
else
VALID="$VALID$WORD "
fi
done
# If the following code is included.... people just run sysman without the
# offending keyword..... lets just make sysman do that automatically.
# if [ "$NOT_VALID" ]; then
# exit 1
# fi
: ; }
MATCH ()
{
#
# Function: MATCH
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Checking variables to match a sub string."
fi
if [ "$MATCH_WORDS" ]; then
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Matching strings : "$LOCATE
fi
for WORD in $LOCATE
do
$EGREP -i $WORD $KEYWORDS
done
exit 0
fi
: ; }
SEARCH ()
{
#
# Function: SEARCH
#
# Without ``NAIVE'' ALL keywords must be matched...
# With ``NAIVE'' the keywords are significant from the left...
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Searching the database for matching files."
fi
LOCATE="$VALID $NOT_VALID"
for WORD in $LOCATE
do
$EGREP -i $WORD $TEMP_2 >$TEMP_1
if [ ! "$NAIVE" ]; then
$MV $TEMP_1 $TEMP_2
else
if [ -s "$TEMP_1" ]; then
$MV $TEMP_1 $TEMP_2
else
$ECHO "$UPROGNAME*WARNING: Duh, ignoring \`\`$WORD'' "
fi
fi
done
: ; }
VIEW ()
{
#
# Function: VIEW
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: View."
fi
$COLRM 79 9999 <$TEMP_2 >$TEMP_1
if [ "$SHOW" ]; then
for KEY in $TSKGI
do
case $KEY in
"T") $EGREP "$TITLE" $TEMP_1 | $SED "s/$TITLE/$CR$TABS/g" ;;
"S") $EGREP "$SUBJECT" $TEMP_1 | $SED "s/$SUBJECT/$CR$TABS/g" ;;
"K") $EGREP "$KEYWORD" $TEMP_1 | $SED "s/$KEYWORD/$CR$TABS/g" ;;
"G") $EGREP "$INFO" $TEMP_1 | $SED "s/$INFO/$CR$TABS/g" ;;
"I") $EGREP "$INFO" $TEMP_1 | $SED "s/$INFO/$CR$TABS/g" ;;
esac
done
exit 0
fi
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Displaying the list of answer files."
fi
if [ -s $TEMP_1 ]; then
$TR ":" " " <$TEMP_1 | $AWK '{print $1}' | $SORT -u >$TEMP_2
else
$ECHO -n "$UPROGNAME*ERROR: No matches for "
for KEY in $TSKGI
do
case $KEY in
"T") $ECHO -n "[title] " ;;
"S") $ECHO -n "[subject] " ;;
"K") $ECHO -n "[keyword] " ;;
"G") $ECHO -n "[guru] " ;;
"I") $ECHO -n "[info] " ;;
esac
done
$ECHO ": $LOCATE"
exit 1
fi
if [ "$VERBOSE" -a "$OUTPUT" ]; then
$ECHO "$UPROGNAME: Saving all answers to: $OUTPUT"
fi
cd $LIB
if [ "$OUTPUT" ]; then
cd $PWD_DIRECTORY
while read COPY
do
$CAT $LIB/$COPY >>$OUTPUT
$ECHO "_______________________________________________________________________________" >>$OUTPUT
done <$TEMP_2
else
$PAGER `$CAT $TEMP_2`
fi
: ; }
CLEAN ()
{
#
# Function: CLEAN
#
if [ "$VERBOSE" ]; then
$ECHO "$UPROGNAME: Cleaning up after $UPROGNAME."
fi
$RM -rf $TMP/$UPROGNAME*
cd $PWD_DIRECTORY
: ; } 2>$NULL
MAIN ()
{
#
# Function: MAIN
#
INITIALIZE_VARIABLES
ADD
UPDATE
MAKE_DATA
SYNCRONIZE
LIST
MANPAGE
CHECK
MATCH
SEARCH
VIEW
CLEAN
: ; }
#
# This next part of the code sets up the intial hook.
#
WPROGNAME=$0
LPROGNAME=`/bin/basename $0`
UPROGNAME=`/bin/basename $LPROGNAME | /bin/tr "[a-z]" "[A-Z]"`
VARS="$@ $UPROGNAME"
MAIN
SHAR_EOF
# End of shell archive
exit 0
--
-----------------------------------------------------------------------------
Scott D. Yelich scott@cs.odu.edu [128.82.8.1]
After he pushed me off the cliff, he asked me, as I fell, ``Why'd you jump?''
Administrator of: Game design requests to <game-design-request@cs.odu.edu>
-----------------------------------------------------------------------------