home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Simtel MSDOS 1992 September
/
Simtel20_Sept92.cdr
/
msdos
/
c
/
lex.arc
/
HWORD.LXI
< prev
next >
Wrap
Text File
|
1980-01-01
|
3KB
|
110 lines
/*
* Word recognizer (with hyphenation)
*
* This program acts as a very simple filter for files of
* text that may have hyphenated words at the end of an input
* line. Output consists of each word on a seperate line
* with hyphenated words rejoined. Note: a word is said to
* start with the first alphabetic character and end with the
* last alphabetic character. Embedded graphics will be removed.
*/
/*
* Basic elements
*/
white = [\n\r\t ]; /* End of a word */
bol = [\n] white*; /* Beginning of a line */
eol = [\0\n\r]; /* End of input line */
letter = [A-Za-z]; /* Is a letter */
graphic = [!-@\[-`{-~]; /* Not a letter */
text = [!-~]; /* All printing chars. */
garbage = [\1-\377]; /* Whatever remains */
/*
* A word contains "junk", at least one letter, then at
* least another letter, then more junk.
*
* A hyphenated word is a word-<NEWLINE> followed by a word
* on the next line.
*/
word = graphic* letter text* letter graphic*;
junk = (letter white) | (graphic* white);
%{
main()
{
while (yylex())
;
}
%}
%%
/*
* A hyphenated word
*/
word "-" eol / bol letter letter
{
output(TRUE);
return(LEXSKIP);
}
/*
* An ordinary word
*/
word {
output(FALSE);
return(LEXSKIP);
}
/*
* Junk (one letter words or all graphics)
*/
junk
{
return(LEXSKIP);
}
/*
* Other stuff
*/
eol | white | garbage
{
return(LEXSKIP);
}
%%
output(flag)
int flag;
/*
* Output the current token. The parameter is TRUE if this is
* the start of a hyphenated word.
*/
{
register char *tokptr; /* Locate token start */
char *tokend; /* Locate token end */
char *token();
char buffer[100];
tokptr = token(&tokend);
/*
* Skip over leading and trailing non-alpha stuff
*/
while (!isalpha(*tokptr) && tokptr < tokend)
tokptr++;
while (!isalpha(*--tokend) && tokend > tokptr);
strncpy(buffer, tokptr, tokend-tokptr+1);
buffer[tokend-tokptr+1]='\0';
printf("%s", buffer);
if (!flag)
putchar('\n');
}