home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Fresh Fish 8
/
FreshFishVol8-CD2.bin
/
bbs
/
text
/
detex-1.0.lha
/
detex
/
detex.l
(
.txt
)
< prev
next >
Wrap
LaTeX Document
|
1994-05-20
|
17KB
|
480 lines
#ifndef lint
static char rcsid[] = "$Header: /usr/src/local/bin/detex/RCS/detex.l,v 2.16 1993/01/14 16:48:25 trinkle Exp $";
#endif
* detex [-e environment-list] [-c] [-l] [-n] [-s] [-t] [-w] [file[.tex]]
* This program is used to remove TeX or LaTeX constructs from a text
* file.
* Written by:
* Daniel Trinkle
* Department of Computer Science
* Purdue University
#ifdef _DCC
#include <string.h>
#define index strchr
#define rindex strrchr
#endif /* _DCC */
#include "detex.h"
#ifdef USG
#include <string.h>
#define index strchr
#define rindex strrchr
#else
#include <strings.h>
#endif
#ifndef MAXPATHLEN
#include <sys/param.h>
#endif
#define LaBEGIN if (fLatex) BEGIN
#define CITEBEGIN if (fLatex && !fCite) BEGIN
#define IGNORE if (fSpace && !fWord) putchar(' ')
#define SPACE if (!fWord) putchar(' ')
#define NEWLINE if (!fWord) putchar('\n')
#ifndef NO_MALLOC_DECL
char *malloc();
#endif
char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */
char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */
char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */
char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */
char *sbProgName; /* name we were invoked with */
FILE *rgfp[NOFILE+1]; /* stack of input/include files */
int cfp = 0; /* count of files in stack */
int cOpenBrace = 0; /* count of `{' in <LaMacro2> */
int csbEnvIgnore; /* count of environments ignored */
int csbIncList = 0; /* count of includeonly files */
int csbInputPaths; /* count of input paths */
int fLatex = 0; /* flag to indicated delatex */
int fWord = 0; /* flag for -w option */
int fFollow = 1; /* flag to follow input/include */
int fCite = 0; /* flag to echo \cite and \ref args */
int fSpace = 0; /* flag to replace \cs with space */
int fForcetex = 0; /* flag to inhibit latex mode */
S [ \t\n]*
W [a-zA-Z]+
%Start Define Display IncludeOnly Input Math Normal Control
%Start LaBegin LaDisplay LaEnd LaEnv LaFormula LaInclude
%Start LaMacro LaMacro2 LaVerbatim
<Normal>"%".* /* ignore comments */ ;
<Normal>"\\begin"{S}"{"{S}"document"{S}"}" {fLatex = !fForcetex; IGNORE;}
<Normal>"\\begin" /* environment start */ {LaBEGIN LaBegin; IGNORE;}
<LaBegin>{S}"{"{S}"verbatim"{S}"}" { if (BeginEnv("verbatim"))
BEGIN LaEnv;
else
BEGIN LaVerbatim;
IGNORE;
}
<LaVerbatim>"\\end"{S}"{"{S}"verbatim"{S}"}" /* verbatim mode */
{BEGIN Normal; IGNORE;}
<LaVerbatim>. ECHO;
<LaBegin>{W} { if (BeginEnv(yytext))
BEGIN LaEnv;
else
BEGIN LaMacro;
IGNORE;
}
<LaBegin>"\n" NEWLINE;
<LaBegin>. ;
<LaEnv>"\\end" /* absorb some environments */ {LaBEGIN LaEnd; IGNORE;}
<LaEnv>"\n" NEWLINE;
<LaEnv>. ;
<LaEnd>{W} /* end environment */ { if (EndEnv(yytext))
BEGIN Normal;
IGNORE;
}
<LaEnd>"}" {BEGIN LaEnv; IGNORE;}
<LaEnd>"\n" NEWLINE;
<LaEnd>. ;
<Normal>"\\bibitem" /* ignore args */ {LaBEGIN LaMacro2; IGNORE;}
<Normal>"\\bibliography" /* of these \cs */ {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\bibstyle" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\cite" {CITEBEGIN LaMacro2; IGNORE;}
<Normal>"\\documentstyle" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\end" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\index" {LaBEGIN LaMacro2; SPACE;}
<Normal>"\\label" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\pageref" {CITEBEGIN LaMacro; IGNORE;}
<Normal>"\\pagestyle" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\ref" {CITEBEGIN LaMacro; IGNORE;}
<Normal>"\\setcounter" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\verb" /* ignore \verb<char>...<char> */
{ if (fLatex) {
char verbchar, c;
verbchar = input();
while ((c = input()) != verbchar)
if (c == '\n')
NEWLINE;
}
IGNORE;
}
<LaMacro>"}" BEGIN Normal;
<LaMacro>"\n" NEWLINE;
<LaMacro>. ;
<LaMacro2>"{" { cOpenBrace++; }
<LaMacro2>"}" { cOpenBrace--;
if (cOpenBrace == 0)
BEGIN Normal;
}
<LaMacro2>"\n" NEWLINE;
<LaMacro2>. ;
<Normal>"\\def" /* ignore def begin */ {BEGIN Define; IGNORE;}
<Define>"{" BEGIN Normal;
<Define>"\n" NEWLINE;
<Define>. ;
<Normal>"\\(" /* formula mode */ {LaBEGIN LaFormula; IGNORE;}
<LaFormula>"\\)" BEGIN Normal;
<LaFormula>"\n" NEWLINE;
<LaFormula>. ;
<Normal>"\\[" /* display mode */ {LaBEGIN LaDisplay; IGNORE;}
<LaDisplay>"\\]" BEGIN Normal;
<LaDisplay>"\n" NEWLINE;
<LaDisplay>. ;
<Normal>"$$" /* display mode */ {BEGIN Display; IGNORE;}
<Display>"$$" BEGIN Normal;
<Display>"\n" NEWLINE;
<Display>. ;
<Normal>"$" /* math mode */ {BEGIN Math; IGNORE;}
<Math>"$" BEGIN Normal;
<Math>"\n" NEWLINE;
<Math>"\\$" ;
<Math>. ;
<Normal>"\\include" /* process files */ {LaBEGIN LaInclude; IGNORE;}
<LaInclude>[^{ \t\n}]+ { IncludeFile(yytext);
BEGIN Normal;
}
<LaInclude>"\n" NEWLINE;
<LaInclude>. ;
<Normal>"\\includeonly" {BEGIN IncludeOnly; IGNORE;}
<IncludeOnly>[^{ \t,\n}]+ AddInclude(yytext);
<IncludeOnly>"}" { if (csbIncList == 0)
rgsbIncList[csbIncList++] = '\0';
BEGIN Normal;
}
<IncludeOnly>"\n" NEWLINE;
<IncludeOnly>. ;
<Normal>"\\input" {BEGIN Input; IGNORE;}
<Input>[^{ \t\n}]+ { InputFile(yytext);
BEGIN Normal;
}
<Input>"\n" NEWLINE;
<Input>. ;
<Normal>\\(aa|AA|ae|AE|oe|OE|ss)[ \t]*[ \t\n}] /* handle ligatures */
{printf("%.2s", yytext+1);}
<Normal>\\[OoijLl][ \t]*[ \t\n}] {printf("%.1s", yytext+1);}
<Normal>\\[a-zA-Z@]+ /* ignore other \cs */ {BEGIN Control; IGNORE;}
<Normal>"\\ " SPACE;
<Normal>\\. IGNORE;
<Control>\\[a-zA-Z@]+ IGNORE;
<Control>[a-zA-Z@0-9]*[-'=`][^ \t\n{]* IGNORE;
<Control>"\n" {BEGIN Normal; NEWLINE;}
<Control>[ \t]*[{]* {BEGIN Normal; IGNORE;}
<Control>. {yyless(0);BEGIN Normal;}
<Normal>[{}\\|] /* special characters */ IGNORE;
<Normal>[!?]"`" IGNORE;
<Normal>~ SPACE;
<Normal>{W}[']*{W} { if (fWord)
printf("%s\n", yytext);
else
ECHO;
}
<Normal>[0-9]+ if (!fWord) ECHO;
<Normal>(.|\n) if (!fWord) ECHO;
/******
** main --
** Set sbProgName to the base of arg 0.
** Set the input paths.
** Check for options
** -c echo LaTeX \cite, \ref, and \pageref values
** -e <env-list> list of LaTeX environments to ignore
** -l force latex mode
** -n do not follow \input and \include
** -s replace control sequences with space
** -t force tex mode
** -w word only output
** Set the list of LaTeX environments to ignore.
** Process each input file.
** If no input files are specified on the command line, process stdin.
******/
main(cArgs,rgsbArgs)
int cArgs;
char *rgsbArgs[];
char *pch, *sbEnvList = DEFAULTENV, sbBadOpt[2];
FILE *TexOpen();
int fSawFile = 0, iArgs = 1;
/* get base nam