home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
games
/
volume12
/
ag
/
part01
/
anagram.c
next >
Wrap
C/C++ Source or Header
|
1991-05-20
|
16KB
|
676 lines
/**********************************************************
* *
* *
* *
* A program to generate anagrams *
* Based on an idea in Byte, Nov 1987 *
* *
* Written by *
* *
* Morten Lerskau Ronseth *
* morten@cs.qmw.ac.uk *
* *
* 9|3|1988 *
* *
* *
**********************************************************/
/*
* Jan. 30, 1991
* rewrote the dictionary loader, now uses a pool for allocation.
*/
/*
* Sept. 28, 1990
* removed "islower" & "isupper" - they didn't work...
*/
/*
* Sept. 26, 1990
* wrote my own "toupper" & "tolower" so as to be compatible
* with any system.
*/
#include <stdio.h>
#include <signal.h>
extern char *xmalloc (), *xrealloc ();
extern void xfree ();
extern char *cat_strings ();
extern char *xindex ();
extern void fatal ();
extern void _abort ();
/* various definitions */
#define STD_DICTIONARY "/usr/dict/words"
#define MAX_LINELEN 70
#define MAX_WORDLEN 32
#define MAXMASKS 3
#define bitmask long
#define maskwidth (8 * sizeof (bitmask))
#define large_pool_space 8192
#define large_word_space 80
#define large_stack_space 200
#define tolower(c) hi2lo[c - 65]
#define toupper(c) lo2hi[c - 65]
/* some systems cannot handle lowercase in tolower
and vice versa...use my own table for fast lookup */
char hi2lo[] = {
'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z',0,0,0,0,0,0,
'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z'
};
char lo2hi[] = {
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W', 'X','Y','Z', 0,0,0,0,0,0,
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W', 'X','Y','Z'
};
char *program;
char *usage = "[-d `dictionary'] [-a] [-v] [-V] [-w] [-W] [-s `SIZE'] [-o `outfile'] [phrase(s)]";
/* 1 if only action is to print out the ellegible words [-W]*/
int words_only = 0;
/* 1 if the ellegible words are to be printed out [-w]*/
int words_out = 0;
/* size of smallest word in an angram.
i.e. -s 2 would prevent angrams with singlelettered words [-s]*/
int min_size = 2;
/* should we count `a' and `i' as complete words? [-a]*/
int do_ai = 0;
/* speak up! [-v] */
int verbose = 0;
/* 1 if write out version info [-V] */
int version = 0;
/* file to write anagrams to, if any */
char *outfile = NULL;
char *infile = STD_DICTIONARY;
char version_string[] = "Anagram Finder v. 1.0b3";
int no_of_phrases = 0;
/* phrase variables */
typedef bitmask bitsig[MAXMASKS];
int freqs[26], letmask[26], letbit[26], letwidth[26], lastmask;
bitsig uflosig;
char *phrase;
bitmask phrasesig[MAXMASKS];
/* pool information */
char *pool = NULL;
/* dictionary information */
char **wordlist = NULL;
int wordcount = 0;
bitmask *wordsigs;
/* for printing anagrams */
char **analist;
long anacount = 0;
long total = 0;
/*************************************************************************/
#define IS_NUM(X) ((X) >= '0' && (X) <= '9')
#define IS_LETTER(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
int
str_to_num (s)
char *s;
{
int result = 0;
while (*s) {
if (IS_NUM(*s))
result = (result * 10) + *s++ - '0';
}
return result;
}
int
candidate (word)
register char *word;
{
register char c;
register int count = 0;
if (strlen (word) < min_size && !do_ai) return 0;
while (c = word[count++]) {
if (!IS_LETTER (c) ||
(!xindex (phrase, tolower (c)) &&
!xindex (phrase, toupper (c))))
return 0;
}
if (do_ai && !word[1])
return (xindex ("aAiI", word[0]) != 0);
return 1;
}
/* prints out all the anagrams found so far, if any */
void
printlist (list, no)
char **list;
int no;
{
register int count;
register int linelen = 0;
for (count = 0; count < no; count++) {
linelen += strlen (list[count]) + 1;
if (linelen >= MAX_LINELEN) {
linelen = 0;
(void)fprintf (stdout, "\n");
}
(void)fprintf (stdout, "%s ", list[count]);
}
(void)fprintf (stdout, "\n");
(void)fflush (stdout);
}
/* old version using log didn't work too well...
use smalltalk's version of highBit instead */
int
fieldwidth (v)
int v;
{
int i = 1, bit = 1;
if (v < 0) _abort ("Fieldwidth: negative number");
if (!v) return 2;
while (v > bit) {
i++; bit += bit + 1;
}
return (i + 2);
}
void
makefreqs (str, freq)
register char *str;
register int freq[];
{
register char c;
register int f;
for (f = 0; f < 26; f++)
freq[f] = 0;
while (c = *(str++)) {
freq[tolower (c) - 97] += 1;
}
}
void
choosefields (frq)
int frq[];
{
int letter;
int curmask = 0,curbit = 0;
int width;
for (letter = 0; letter < 26; letter++)
if (frq[letter] != 0) {
width = fieldwidth (frq[letter]);
if ((curbit + width) > maskwidth) {
if (++curmask >= MAXMASKS)
_abort ("Phrase too long to handle.");
curbit = 0;
}
letmask[letter] = curmask;
letbit[letter] = curbit;
letwidth[letter] = width;
curbit += width;
}
lastmask = curmask;
}
void
makeonesig (str, sig)
register char *str;
register bitmask sig[];
{
register int i, j = 0;
int sfreqs[26];
register bitmask fr;
makefreqs (str, sfreqs);
for (i = 0; i <= lastmask; i++)
sig[i] = 0;
for (i = 0; i < 26; i++)
if (sfreqs[i]) {
fr = ((bitmask)sfreqs[i]) << letbit[i];
sig[letmask[i]] += fr;
j += fr;
}
}
void
makeuf (frq)
int frq[];
{
int i, bnum, bwidth;
for (i = 0; i < MAXMASKS; i++)
uflosig[i] = 0;
for (i = 0; i < 26; i++)
if (frq[i]) {
bnum = letbit[i];
bwidth = letwidth[i];
uflosig[letmask[i]] += (1L << (bnum + bwidth - 1));
}
}
#define DOMASK(MASK) { \
newmask = curnode[MASK] - cursig[MASK]; \
if (newmask & uflosig[MASK]) break; \
newsig[MASK] = newmask; \
bitsleft |= newmask; \
}
void
findanagrams (curword, curnode)
register int curword;
register bitmask *curnode;
{
bitsig newsig;
register bitmask newmask, *cursig;
register long bitsleft;
int bsize = large_stack_space;
cursig = &wordsigs[curword * (lastmask + 1)];
while (curword < wordcount) {
bitsleft = 0;
switch (lastmask) {
case 2:DOMASK(2)
case 1:DOMASK(1)
case 0:DOMASK(0)
if (anacount == bsize) {
bsize *= 2;
analist = (char **)xrealloc (analist, bsize * sizeof (char *));
}
analist[anacount++] = wordlist[curword];
if (!bitsleft) {
printlist (analist, anacount);
total++;
}
else findanagrams (curword, newsig);
--anacount;
}
curword++;
cursig += (lastmask + 1);
}
}
/* If realloc moves the list around in memory (not very likely, but still...)
* then update all pointers in the wordlist.
*/
void
adjust_list (list, offset, count)
char **list;
int offset, count;
{
register int i;
for (i = 0; i < count; i++)
list[i] += offset;
}
void
read_dict (name)
char *name;
{
FILE *dict;
int psize = large_pool_space;
int bsize = large_stack_space;
int msize = large_stack_space * MAXMASKS;
char *pend, *next_slot;
if (!(dict = fopen (name, "r"))) {
char *s = cat_strings ("Couldn't open <", name, ">");
_abort (s);
}
if (verbose) {
(void)fprintf (stderr, "\nLoading dictionary...");
(void)fflush (stderr);
}
/* analist has to be set up here, as "findanagrams" is recursive */
analist = (char **)xmalloc (bsize * sizeof (char *));
wordlist = (char **) xmalloc (bsize * sizeof (char *));
wordsigs = (bitmask *) xmalloc (msize * sizeof (bitmask));
pool = (char *)xmalloc (psize * sizeof (char));
pend = pool + psize;
next_slot = pool;
while (fscanf (dict, "%s", next_slot) != EOF) {
if (candidate (next_slot)) {
/* if this entry would overflow stack, expand it */
if (wordcount == bsize) {
bsize *= 2; msize *= 2;
wordlist = (char **)xrealloc (wordlist, bsize * sizeof (char *));
wordsigs = (bitmask *)xrealloc (wordsigs, msize * sizeof (bitmask));
}
wordlist[wordcount] = next_slot;
makeonesig (next_slot, &wordsigs[wordcount * (lastmask + 1)]);
next_slot += strlen (next_slot) + 1;
if ((next_slot + MAX_WORDLEN) >= pend) {
char *old_pool = pool;
psize *= 2;
pool = (char *)xrealloc (pool, psize * sizeof (char));
if (old_pool != pool)
adjust_list (wordlist, pool - old_pool, wordcount);
pend = pool + psize;
}
wordcount++;
}
}
if (verbose) {
(void)fprintf (stderr, "done\n");
(void)fflush (stderr);
}
(void)fclose (dict);
}
void
clean_up ()
{
/* first, free all strings allocated */
xfree (pool);
/* then, free the array of pointers to these strings */
xfree (wordlist);
/* then, free the array of pointers to the anagrams */
xfree (analist);
/* At last, free the array `wordsigs' */
xfree (wordsigs);
}
void
parse_flags (argc, argv)
int argc;
char *argv[];
{
int i;
for (i = 1; i < argc; i++) {
if (argv[i][0] == '-' && argv[i][1] != 0) {
register char *str = argv[i] + 1;
switch (str[0]) {
case 'd':
if (argv[i + 1] != NULL) {
infile = argv[i + 1];
argv[i + 1] = NULL;
}
break;
case 's':
if (argv[i + 1] != NULL) {
min_size = str_to_num (argv[i + 1]);
argv[i + 1] = NULL;
}
else
_abort ("No size specified.");
break;
case 'o':
if (outfile != NULL)
_abort ("Outfile specified twice");
if (argv[i + 1] != NULL) {
outfile = argv[i + 1];
if (!strncmp (outfile, "-", 1))
outfile = "";
else
argv[i + 1] = NULL;
}
else
_abort ("No outfile specified.");
break;
case 'a':
do_ai = 1;
break;
case 'v':
verbose = 1;
break;
case 'V':
version = 1;
break;
case 'w':
words_out = 1;
break;
case 'W':
words_only = 1;
break;
case '-':
case '\n':
if (outfile == NULL)
outfile = "";
argv[i] = NULL;
break;
default : {
char *s = cat_strings ("Unrecognized flag: \"", str, "\"");
_abort (s);
}
}
}
else if (argv[i] != NULL && ++no_of_phrases != i) {
argv[no_of_phrases] = argv[i];
argv[i] = NULL;
}
}
}
void
do_all (argv)
char *argv[];
{
int i, j;
for (i = 1; i <= no_of_phrases; i++) {
phrase = argv[i];
wordcount = 0;
anacount = 0;
total = 0;
makefreqs (phrase, freqs);
choosefields (freqs);
makeonesig (phrase, phrasesig);
makeuf (freqs);
read_dict (infile);
if (words_out || words_only) {
(void)fprintf (stdout, "number of words read : %d\n",
wordcount);
(void)fflush (stdout);
printlist (wordlist, wordcount);
}
if (words_only)
continue;
if (verbose) {
(void)fprintf (stderr, "\nStarting generator...");
(void)fprintf (stdout, "\n Anagrams generated from \"%s\" :", phrase);
(void)fprintf (stdout, "\n--------------------------");
for (j = 0; j < strlen (phrase); j++)
(void)fprintf (stdout, "-");
(void)fprintf (stdout, "---\n");
(void)fflush (stdout);
}
/* Find all anagrams asociated with the specified pattern */
findanagrams (0, phrasesig);
if (verbose) {
(void)fprintf (stdout, "\nNo. of anagrams : %ld\n", total);
(void)fprintf (stderr, "done\n");
}
clean_up ();
}
}
main (argc, argv)
int argc;
char **argv;
{
/* set up interrupt handler */
if (signal (SIGINT, SIG_IGN) != SIG_IGN)
signal (SIGINT, fatal);
if (signal (SIGKILL, SIG_IGN) != SIG_IGN)
signal (SIGKILL, fatal);
program = argv[0];
if (argc == 1)
_abort (usage);
parse_flags (argc, argv);
if (version) {
(void)fprintf (stderr, "%s\n", version_string);
if (no_of_phrases == 0)
exit (0);
}
if (no_of_phrases == 0)
_abort ("No pattern(s) specified");
if (!outfile || !strcmp (outfile, ""))
outfile = "stdout";
else if (!freopen (outfile, "w", stdout)) {
char *s = cat_strings ("Couldn't open ", outfile, "");
_abort (s);
}
do_all (argv);
return (0);
}
char *
xrealloc (ptr, size)
char *ptr;
int size;
{
register char *result = (char *)realloc (ptr, size);
if (!result)
_abort ("Virtual memory exhausted.");
return result;
}
char *
xmalloc (size)
int size;
{
register char *result = (char *)calloc (1, size);
if (!result)
_abort ("Virtual memory exhausted.");
return result;
}
void
xfree (ptr)
char *ptr;
{
if (ptr) free (ptr);
}
void
fatal (signum)
int signum;
{
signal (signum, SIG_DFL);
}
char *
cat_strings (s1, s2, s3)
char *s1, *s2, *s3;
{
int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
char *s = (char *) xmalloc (len1 + len2 + len3 + 1);
(void)strcpy (s, s1);
(void)strcpy (s + len1, s2);
(void)strcpy (s + len1 + len2, s3);
*(s + len1 + len2 + len3) = 0;
return s;
}
/* BSD and SYSV use different names, so use my own */
char *
xindex (s, c)
register char *s, c;
{
while (*s) {
if (*s == c) return (s);
s++;
}
return 0;
}
void
_abort (arg)
char *arg;
{
(void)fprintf (stderr, "%s: ", program);
(void)fprintf (stderr, arg);
(void)fprintf (stderr, "\n");
(void)fflush (stderr);
exit (-1);
}