home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
games
/
volume2
/
autopun
/
phoneme.c
< prev
next >
Wrap
C/C++ Source or Header
|
1987-11-25
|
9KB
|
453 lines
#include <stdio.h>
#include <ctype.h>
#define DATA
#include "phoneme.h"
#undef DATA
/*
* xlate_line() - given an English-text phrase or word,
* Translate that thing into a phoneme-list.
*
* xlate_line() returns a pointer to the *static* resultant phoneme-list.
*/
short *
xlate_line(text)
char *text; /* English text to convert */
{
static short phonelist[MAXPHONES];
char word[1 + MAXPHONES + 1]; /* the current parsed word */
short *nxtphones;
char *dp;
short *xlate_word();
nxtphones = phonelist;
word[0] = ' ';
while (*text) {
while (*text && !(isalpha(*text) || *text == '\'')) {
++text;
}
dp = &word[1];
if (!*text) break; /* Loop Exit */
while (isalpha(*text) || *text == '\'') {
if (islower(*text)) {
*dp = toupper(*text);
} else {
*dp = *text;
}
++dp, ++text;
}
*dp++ = ' ';
*dp = '\0';
nxtphones = xlate_word(word, nxtphones);
}
*nxtphones = P_end;
return(phonelist);
}
/*
* xlate_word() - translate the given English word into a phoneme stream.
* The word has the following form:
* ' [-A-Z']* '
* That is, it begins and ends with a space and it contains only
* upper-case letters, apostrophes, and hyphens.
*/
short * /* where to put any following phonemes (or a terminator) */
xlate_word(word, phonedst)
char *word;
short *phonedst; /* where to put the new phonemes */
{
char *apply1rule();
short *newphones; /* array of phonemes to append */
++word; /* Skip the initial blank */
while (*word) {
word = apply1rule(word, &newphones);
while (*newphones != P_end) {
*phonedst++ = *newphones++;
}
}
return(phonedst);
}
/*
* apply1rule() - apply the appropriate translation rule to the start of
* the word provided, setting a pointer to the resultant phonemes and
* returning a pointer to the unconverted part of the word.
*/
char * /* returns the place to convert next */
apply1rule(word, newphp)
char *word; /* the part of the word to start in */
short **newphp; /* where to put a pointer to the set of translated phonemes */
{
int rtype; /* rule type to use */
struct rule *rule; /* the current rule being tested */
register char *cp; /* temp pointer for matching */
register char *rem; /* points to the first char past the match */
static short firstcall = TRUE; /* "first call to this routine" */
static short nosym = P_end; /* an empty phoneme list */
if (firstcall) {
firstcall = FALSE;
ruleinit();
}
if (isupper(*word)) {
rtype = (int) (*word - 'A') + RIDX_A;
} else {
rtype = RIDX_PUNC;
}
for (rule = Rules[rtype]; rule->match; ++rule) {
for (cp = rule->match, rem = word; *cp; cp++, rem++) {
if (*cp != *rem) break;
}
if (*cp) continue; /* failed to match */
if (!leftmatch(rule->prefix, rule->preflen, word - 1)) continue;
if (!rightmatch(rule->suffix, rule->sufflen, rem)) continue;
*newphp = rule->outsyms;
return(rem);
}
fprintf(stderr, "Error: Can't find rule for: '%c' in \"%s\"\n",
*word, word);
*newphp = &nosym;
return(word + 1); /* Skip the annoyance */
}
int
leftmatch(pattern, patlen, context)
char *pattern; /* first char of pattern to match in text */
short patlen; /* strlen(pattern) */
char *context; /* last char of text to be matched */
{
char *pat;
char *text;
int count;
if (!*pattern) return(TRUE); /* null string matches any context */
/* point to last character in pattern string */
count = patlen;
pat = pattern + (count - 1);
text = context;
for (; count > 0; pat--, count--) {
switch (*pat) {
case '\'':
case ' ':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
if (*pat != *text) return(FALSE);
text--;
continue;
case '#': /* One or more vowels */
if (!isvowel(*text)) return(FALSE);
text--;
while (isvowel(*text)) text--;
break;
case ':': /* Zero or more consonants */
while (isconsonant(*text)) text--;
break;
case '^': /* One consonant */
if (!isconsonant(*text)) return(FALSE);
text--;
break;
case '.': /* B, D, V, G, J, L, M, N, R, W, Z */
#ifdef NOTDEF
if (*text != 'B' && *text != 'D' && *text != 'V'
&& *text != 'G' && *text != 'J' && *text != 'L'
&& *text != 'M' && *text != 'N' && *text != 'R'
&& *text != 'W' && *text != 'Z') {
return FALSE;
}
text--;
#endif
switch (*text) {
case 'B':
case 'D':
case 'V':
case 'G':
case 'J':
case 'L':
case 'M':
case 'N':
case 'R':
case 'W':
case 'Z':
text--;
break;
default:
return(FALSE);
}
break;
case '+': /* E, I or Y (front vowel) */
#ifdef NOTDEF
if (*text != 'E' && *text != 'I' && *text != 'Y') return FALSE;
text--;
#endif
switch(*text) {
case 'E':
case 'I':
case 'Y':
text--;
break;
default:
return(FALSE);
}
break;
default:
fprintf(stderr, "Bad char in left rule: '%c'\n", *pat);
return(FALSE);
}
}
return(TRUE);
}
int
rightmatch(pattern, patlen, context)
char *pattern; /* first char of pattern to match in text */
short patlen; /* strlen(pattern) [ignored] */
char *context; /* last char of text to be matched */
{
char *pat;
char *text;
if (!*pattern) return(TRUE); /* null string matches any context */
pat = pattern;
text = context;
for (pat = pattern; *pat != '\0'; pat++) {
switch (*pat) {
case '\'':
case ' ':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
if (*pat != *text) return(FALSE);
text++;
continue;
case '#': /* One or more vowels */
if (!isvowel(*text)) return FALSE;
text++;
while (isvowel(*text)) text++;
break;
case ':': /* Zero or more consonants */
while (isconsonant(*text)) text++;
break;
case '^': /* One consonant */
if (!isconsonant(*text)) return FALSE;
text++;
break;
case '.': /* B, D, V, G, J, L, M, N, R, W, Z */
#ifdef NOTDEF
if (*text != 'B' && *text != 'D' && *text != 'V'
&& *text != 'G' && *text != 'J' && *text != 'L'
&& *text != 'M' && *text != 'N' && *text != 'R'
&& *text != 'W' && *text != 'Z') {
return(FALSE);
}
text++;
#endif
switch (*text) {
case 'B':
case 'D':
case 'V':
case 'G':
case 'J':
case 'L':
case 'M':
case 'N':
case 'R':
case 'W':
case 'Z':
text++;
break;
default:
return(FALSE);
}
break;
case '+': /* E, I or Y (front vowel) */
#ifdef NOTDEF
if (*text != 'E' && *text != 'I' && *text != 'Y') return(FALSE);
text++;
#endif
switch(*text) {
case 'E':
case 'I':
case 'Y':
text++;
break;
default:
return(FALSE);
}
break;
case '%': /* ER, E, ES, ED, ING, ELY (a suffix) */
if (*text == 'E') {
text++;
if (*text == 'L') {
text++;
if (*text == 'Y') {
text++;
break;
} else {
text--; /* Don't gobble L */
break;
}
} else if (*text == 'R' || *text == 'S' || *text == 'D') {
text++;
}
break;
} else if (*text == 'I') {
text++;
if (*text == 'N') {
text++;
if (*text == 'G') {
text++;
break;
}
}
return(FALSE);
}
return(FALSE);
default:
fprintf(stderr, "Bad char in right rule:'%c'\n", *pat);
return(FALSE);
}
}
return(TRUE);
}
/*
* ruleinit() - initialize the remaining fields of the phoneme rule table.
*/
ruleinit()
{
struct rule *rule;
int rtype;
for (rtype = 0; rtype < RULECNT; ++rtype) {
for (rule = Rules[rtype]; rule->match; ++rule) {
rule->preflen = strlen(rule->prefix);
rule->sufflen = strlen(rule->suffix);
}
}
}
int
isvowel(chr)
char chr;
{
#ifdef NOTDEF
return(chr == 'A' || chr == 'E' || chr == 'I' || chr == 'O' || chr == 'U');
#endif
switch (chr) {
case 'A':
case 'E':
case 'I':
case 'O':
case 'U':
return(TRUE);
}
return(FALSE);
}
int
isconsonant(chr)
char chr;
{
return(isupper(chr) && !isvowel(chr));
}
/*
* outphonemes() - output the given P_end-terminated array of phonemes.
*/
outphonemes(php)
short *php; /* phoneme pointer */
{
while (*php != P_end) {
outstring(phochars[*php]);
++php;
}
}
outstring(string)
char *string;
{
while (*string != '\0') fputc(*string++, stderr);
}