home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
games
/
volume2
/
autopun
/
autopun.c
< prev
next >
Wrap
C/C++ Source or Header
|
1987-11-25
|
14KB
|
597 lines
/*
* autopun - Phrase reparser.
*
* Usage:
* autopun [-e English_dict] [-p Phone_dict]
* or
* autopun [-e English_dict] -c -p Phone_dict
*
* Given an English phrase (as stdin), autopun prints (to stdout) a table
* that can be used to create phonemically-similar phrases. E.g.,
* "Happy Birthday" can be recast as "Hub pip Earth tee".
*/
#include <stdio.h>
#include "phoneme.h"
#define TRUE 1
#define FALSE 0
char *cmd; /* name of this program */
int phread = FALSE; /* "read the phone dict" */
int phcreate = FALSE; /* "create the phone dict" */
char *engldict = "/usr/dict/words"; /* English dictionary file */
char *phonedict; /* phonemic dictionary file */
char *indictname; /* name of the dict being read */
FILE *indict; /* the open dict being read */
FILE *outdict; /* the open dict being written */
/* ...name == phonedict */
/*
* struct amatch - representation of the phonemic match of a word with
* some part of the phrase.
*/
struct amatch {
struct amatch *next; /* next word matching at this position */
char *text; /* English text of the matching word */
short nextpos; /* position of phoneme after this word */
};
/*
* struct posinfo - information about one phonemic position in the phrase.
*/
struct posinfo {
struct amatch *wlist; /* list of words that match at this point */
unsigned num_parents; /* # of words leading directly to this point */
};
/*
* posinfo[] - indexed by phoneme position in the phrase,
*/
#define MAX_PHONES 300 /* Max # of phonemes in a phrase */
struct posinfo posinfo[MAX_PHONES];
short target[MAX_PHONES]; /* phonemes of the phrase to reparse */
int targlen; /* # of phonemes in target[] (less P_end) */
/*
* phmap[] - phoneme map. Maps a set of similar-sounding phonemes into one.
* Used in preprocessing strings before comparison.
*/
short phmap[P_NUM] = {
P_end, /* P_end */
P_IY, /* P_IY */
P_IY, /* P_IH */
P_EY, /* P_EY */
P_EY, /* P_EH */
P_AE, /* P_AE */
P_AE, /* P_AA */
P_AE, /* P_AO */
P_AE, /* P_OW */
P_AE, /* P_UH */
P_UW, /* P_UW */
P_UW, /* P_ER */
P_AE, /* P_AX */
P_AE, /* P_AH */
P_AY, /* P_AY */
P_AE, /* P_AW */
P_OY, /* P_OY */
P_p, /* P_p */
P_p, /* P_b */
P_t, /* P_t */
P_t, /* P_d */
P_k, /* P_k */
P_g, /* P_g */
P_f, /* P_f */
P_f, /* P_v */
P_TH, /* P_TH */
P_f, /* P_DH */
P_s, /* P_s */
P_s, /* P_z */
P_s, /* P_SH */
P_s, /* P_ZH */
P_HH, /* P_HH */
P_m, /* P_m */
P_n, /* P_n */
P_n, /* P_NG */
P_l, /* P_l */
P_l, /* P_w */
P_y, /* P_y */
P_r, /* P_r */
P_CH, /* P_CH */
P_CH, /* P_j */
P_WH, /* P_WH */
P_end /* P_PAS */
};
main(argc, argv)
int argc;
char **argv;
{
char *cp;
char line[300];
char *strrchr();
/*
* get the basename of the command name,
* for use in error messages.
*/
if ((cmd = strrchr(argv[0], '/'))) {
++cmd;
} else {
cmd = argv[0];
}
while (++argv, --argc > 0) {
cp = *argv;
if (*cp == '-' && *(cp + 1) != '\0') {
switch(*++cp) {
case 'c': /* "create the phonemic dictionary */
/* (requires -p flag) */
phcreate = TRUE;
break;
case 'p': /* "filename of phonemic dictionary" */
if (++argv, --argc <= 0 ||
(**argv == '-' && *(*argv + 1) != '\0')) {
bomb("missing -p filename");
}
phonedict = *argv;
break;
case 'e': /* "filename of English dictionary" */
if (++argv, --argc <= 0) {
bomb("missing -e filename");
}
engldict = *argv;
break;
default:
bomb("unknown switch `%c'", *cp);
}
} else {
bomb("extra filename `%s'", *argv);
}
}
if (phcreate && !phonedict) {
bomb("missing -p flag");
}
phread = (phonedict && !phcreate);
/*
* open up the necessary files:
* Input is either an English or phonemic dictionary;
* Output is a phonemic dictionary (if requested).
*/
if (phread) {
indictname = phonedict;
} else {
indictname = engldict;
}
if (!(indict = fopen(indictname, "r"))) {
fprintf(stderr, "%s: can't open \"%s\" -- ", cmd, indictname);
perror("");
exit(1);
}
if (phcreate) {
if (strcmp(indictname, phonedict) == 0) {
bomb("can't overwrite `%s'", phonedict);
}
if (!(outdict = fopen(phonedict, "w"))) {
fprintf(stderr, "%s: can't create \"%s\" -- ", cmd, phonedict);
perror("");
exit(1);
}
}
/*
* Grab a phrase and process it.
*/
if (isatty(fileno(stdin))) {
fputs("Enter English text: ", stderr);
fflush(stderr);
}
if (fgets(line, 300, stdin)) {
line[strlen(line) - 1] = '\0'; /* removes the terminating \n */
reparse(line);
}
/*
* Close up shop, making sure that any I/O errors are reported
*/
if (ferror(indict)) {
fprintf(stderr, "%s: problem reading \"%s\" -- ", cmd, indictname);
perror("");
exit(1);
}
(void) fclose(indict);
if (phcreate) {
if (fclose(outdict) != 0) {
fprintf(stderr, "%s: problem writing \"%s\" -- ", cmd, phonedict);
perror("");
exit(1);
}
}
exit(0);
}
/*
* reparse() - given a line of English text,
* Find and print the info necessary to reparse that line's phonemes
* into other English phrases.
*/
reparse(text)
char *text;
{
char dictword[MAX_PHONES]; /* an English word from the dictionary */
char *textcopy; /* dynamic copy of the text */
short phrase[MAX_PHONES]; /* the mapped, phonemic version of the phrase */
short testword[MAX_PHONES]; /* ditto for a word from the dictionary */
int twordlen; /* # of phonemes in testword[] (less P_end) */
register short *sp, *dp; /* temp source and dest phoneme pointers */
int idx; /* index where a match started */
short *xlate_line();
short *mapphrase();
char *strsave();
/*
* Translate the input phrase and copy it to a safe place.
*/
sp = xlate_line(text);
(void) mapphrase(sp);
dp = target;
while (*sp != P_end) {
*dp++ = *sp++;
}
*dp = P_end;
targlen = dp - &target[0];
/*
* For each word in the dictionary,
* Convert that word into phonemic codes;
* Write the converted codes to the phonemic dictionary (if necessary);
* Record where that word would fit into the input phrase.
*/
while (fgets(dictword, MAX_PHONES, indict)) {
dictword[strlen(dictword) - 1] = '\0';
if (phread) {
twordlen = encphones(dictword, testword);
} else {
sp = xlate_line(dictword);
twordlen = mapphrase(sp) - sp;
if (twordlen == 0) {
continue; /* (loop leap) */
}
dp = testword;
while (*sp != P_end) {
*dp++ = *sp++;
}
*dp = P_end;
if (phcreate) {
writephones(dictword, testword);
}
}
/*
* Search for and record matches until
* one can't possibly exist (too few phonemes left).
*/
sp = target;
dp = &target[targlen];
textcopy = (char *) 0;
while (dp - sp >= twordlen &&
(idx = wordidx(sp, testword)) != -1) {
sp += idx + 1;
if (!textcopy) {
textcopy = strsave(dictword);
}
recmatch((sp - 1) - target, twordlen, textcopy);
}
}
prune();
saymatches(text);
}
/*
* prune() - prune away useless matches:
* remove potential matches that lead to unmatchable parts of the phrase;
* remove unreachable match lists.
*/
prune()
{
int pos;
struct amatch *prevm;
struct amatch *curm;
/*
* note and remove all the matches that lead to an unmatchable point.
*/
for (pos = 0; pos < MAX_PHONES; ++pos) {
posinfo[pos].num_parents = 0;
}
for (pos = MAX_PHONES - 1; pos >= 0; --pos) {
prevm = (struct amatch *) 0;
curm = posinfo[pos].wlist;
while (curm) {
/*
* If this word leads us to the end, everything is o.k.
* If this word leads us to a matchable point,
* note that we can reach that point.
* Otherwise, this word is a dead-end -- remove it.
*/
if (curm->nextpos >= targlen) {
prevm = curm;
} else if (posinfo[curm->nextpos].wlist) {
++posinfo[curm->nextpos].num_parents;
prevm = curm;
} else {
if (!prevm) {
posinfo[pos].wlist = curm->next;
} else {
prevm->next = curm->next;
}
/* (we should free curm here if we are reclaiming space) */
}
curm = curm->next;
}
}
/*
* Find and remove each unreachable point in the phrase
* (except the first one).
* This traversal cascades forward.
*/
for (pos = 1; pos < MAX_PHONES; ++pos) {
if (posinfo[pos].num_parents > 0) continue;
for (curm = posinfo[pos].wlist; curm; curm = curm->next) {
if (curm->nextpos >= targlen) continue;
--posinfo[curm->nextpos].num_parents;
}
posinfo[pos].wlist = (struct amatch *) 0;
/* (if we were reclaiming space, here's where we'd do it */
}
}
/*
* saymatches() - print the phrase match information table.
*/
saymatches(text)
char *text; /* the original text */
{
int pos;
int curcol;
int addcols;
struct amatch *curm;
printf("%s\n", text);
for (pos = 0; pos < MAX_PHONES; ++pos) {
if (!posinfo[pos].wlist) continue;
printf("%02d:\n", pos);
curcol = 0;
for (curm = posinfo[pos].wlist; curm; curm = curm->next) {
addcols = 1 + strlen(curm->text) + 1 + 2;
if (curcol + addcols >= 70) {
printf("\n");
curcol = 0;
}
if (curcol == 0) {
printf(" ");
curcol += 1;
}
printf(" %s:", curm->text);
if (curm->nextpos >= targlen) {
printf("$ ");
} else {
printf("%02d", curm->nextpos);
}
curcol += addcols;
}
printf("\n");
}
}
/*
* mapphrase() - given a phonetic word or phrase, map it in place via phmap[],
* returning a pointer to the new end.
*/
short *
mapphrase(pp)
short *pp; /* a P_end-terminated word/phrase to map */
{
short *dp; /* points to where to put the next phoneme */
dp = pp;
while (*pp != P_end) {
*dp = phmap[*pp];
if (*dp != P_end) ++dp;
++pp;
}
*dp = P_end;
return(dp);
}
/*
* wordidx() - given a phrase and a comparison word,
* return the index in the phrase where the word was found (-1 if not found).
*
* Wordidx() assumes both the phrase and the word have been mapped by phmap[].
*/
int
wordidx(phrase, word)
short *phrase; /* a P_end-terminated list of phonemes */
short *word; /* ditto */
{
short *start; /* the starting phoneme being compared */
register short *pp; /* the current phrase phoneme being compared */
register short *wp; /* the current word phoneme being compared */
for (start = phrase; *start != P_end; ++start) {
wp = word;
pp = start;
while (*wp != P_end) {
if (*pp != *wp) break;
++pp, ++wp;
}
if (*wp == P_end) {
return(start - phrase);
}
}
return(-1);
}
/*
* recmatch() - record a match.
*/
recmatch(pos, phlen, text)
int pos; /* position of the match within the phrase */
int phlen; /* # of phonemes matched */
char *text; /* text that matched */
{
struct amatch *prevm;
struct amatch *newm;
struct amatch *nextm;
struct amatch *matchalloc();
newm = matchalloc();
newm->text = text;
newm->nextpos = pos + phlen;
prevm = (struct amatch *) 0;
for (nextm = posinfo[pos].wlist; nextm && newm->nextpos < nextm->nextpos;
prevm = nextm, nextm = nextm->next) {
/* (empty body) */
}
if (!prevm) {
newm->next = posinfo[pos].wlist;
posinfo[pos].wlist = newm;
} else {
newm->next = prevm->next;
prevm->next = newm;
}
}
/*
* writephones() - write a list of phonemes to a file.
*/
writephones(ep, sp)
char *ep; /* english text */
short *sp; /* corresponding phonemes */
{
fputs(ep, outdict);
putc(' ', outdict);
while (*sp != P_end) {
putc((int) *sp + (int) '!', outdict);
++sp;
}
putc('\n', outdict);
if (ferror(outdict)) {
fprintf(stderr, "Error: problem writing \"%s\" -- ", phonedict);
perror("");
exit(1);
}
}
/*
* encphones() - encode ascii from a phoneme file into phonetic codes.
*/
int /* returns # of phonemes in word[] */
encphones(text, word)
register char *text; /* encoded phonemes (less the newline) */
short *word; /* where to put the phonemes */
{
register short *sp;
char *strchr();
/*
* separate the English text from its encoded form
*/
text = strchr(text, ' ');
*text++ = '\0';
sp = word;
while (*text) {
*sp = (short) (*text - '!');
++text, ++sp;
}
*sp = P_end;
return(sp - word);
}
/*
* matchalloc() - allocate a new match element.
*/
struct amatch *
matchalloc()
{
#define MAX_MATCHES 1000 /* max # of matches in a phrase */
static struct amatch matchpool[MAX_MATCHES];
static struct amatch *nextpool = matchpool;
if (nextpool >= &matchpool[MAX_MATCHES]) {
fprintf(stderr, "Error: too many matches (over %d)\n", MAX_MATCHES);
exit(1);
}
return (nextpool++);
}
/*
* strsave() - copy the given string to a malloc'ed area,
* returning the resultant pointer.
*/
char *
strsave(s)
char *s;
{
char *ret;
char *malloc();
if (!(ret = malloc(strlen(s) + 1))) {
fprintf(stderr, "Error: out of memory saving \"%s\"\n", s);
exit(1);
}
(void) strcpy(ret, s);
return(ret);
}
/* VARARGS 1 */
bomb(str, a1, a2, a3)
char *str;
int a1, a2, a3;
{
fprintf(stderr, "%s: ", cmd);
fprintf(stderr, str, a1, a2, a3);
fprintf(stderr, "\n");
fprintf(stderr, "Usage:\n %s [-c] [-e English_dict] [-p Phone_dict]\n",
cmd);
exit(1);
}