home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Fred Fish Collection 1.5
/
ffcollection-1-5-1992-11.iso
/
ff_disks
/
100-199
/
ff191.lzh
/
ISpell
/
unix.zoo
/
fixdict
< prev
next >
Wrap
Text File
|
1989-02-22
|
3KB
|
82 lines
#!/bin/sh
#
# Add capitalization information to an ispell dictionary
#
# Usage:
#
# fixdict dict-file
#
# Requires availability of UNIX spell. The new dictionary is
# rewritten in place. A list of words that couldn't be
# resolved (because spell doesn't know them) is written to
# standard output. This list appears in lowercase in the
# dictionary, and if there are any errors the must be edited
# by hand.
#
# The final dictionary appears in expanded form and must be
# passed through munchlist to regenerate suffixes.
#
LIBDIR=/tools/sources/ispell
EXPAND1=${LIBDIR}/isexp1.sed
EXPAND2=${LIBDIR}/isexp2.sed
EXPAND3=${LIBDIR}/isexp3.sed
EXPAND4=${LIBDIR}/isexp4.sed
# TDIR=${TMPDIR:-/tmp}
TDIR=/tmp
TMP=${TDIR}/fix$$
trap "/bin/rm -f ${TMP}*; exit 1" 1 2 15
sed -f ${EXPAND1} $1 | sed -f ${EXPAND2} \
| sed -f ${EXPAND3} | sed -f ${EXPAND4} \
| tr '[A-Z]' '[a-z]' \
| spell \
| sort > ${TMP}a
#
# ${TMP}a contains all the words that spell doesn't like.
# Now figure out which of those are because spell doesn't know them at
# all, and leave those in ${TMP}b.
#
tr '[a-z]' '[A-Z]' < ${TMP}a | spell | tr '[A-Z]' '[a-z]' > ${TMP}b
#
# The wrongly-capitalized words are those that spell didn't object to
# in the last step. Produce a list of them in, and capitalize the
# first letter of each. Save this list in ${TMP}c.
#
comm -23 ${TMP}a ${TMP}b \
| sed 's/^a/A/;s/^b/B/;s/^c/C/;s/^d/D/;s/^e/E/;s/^f/F/;s/^g/G/;s/^h/H/
s/^i/I/;s/^j/J/;s/^k/K/;s/^l/L/;s/^m/M/;s/^n/N/;s/^o/O/;s/^p/P/
s/^q/Q/;s/^r/R/;s/^s/S/;s/^t/T/;s/^u/U/;s/^v/V/;s/^w/W/;s/^x/X/
s/^y/Y/;s/^z/Z/' > ${TMP}c
#
# Find out which of those spell objects to, saving the failures in ${TMP}d.
#
spell ${TMP}c > ${TMP}d
#
# Extract the words which were correctly capitalized at the first letter,
# combine them with an all-capitals version of the ones that weren't, and
# put the result into ${TMP}e.
#
(comm -23 ${TMP}c ${TMP}d; tr '[a-z]' '[A-Z]' < ${TMP}d) \
| sort -o ${TMP}e
#
# At this point, ${TMP}b contains the words that spell just plain doesn't
# like, and ${TMP}e contains the words that are now capitalized correctly.
#
/bin/rm ${TMP}[cd]
#
# Put it all together, rewriting the dictionary in place.
#
sed -f ${EXPAND1} $1 | sed -f ${EXPAND2} \
| sed -f ${EXPAND3} | sed -f ${EXPAND4} \
| tr '[A-Z]' '[a-z]' \
| sort \
| comm -23 - ${TMP}a \
| sort -f -o $1 - ${TMP}b ${TMP}e
#
# Finally, write the list of words that have questionable capitalization
# to the standard output.
#
cat ${TMP}b
/bin/rm ${TMP}*