home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Borland Programmer's Resource
/
Borland_Programmers_Resource_CD_1995.iso
/
utils
/
rtfprsr
/
rtf2text.c
< prev
next >
Wrap
C/C++ Source or Header
|
1995-05-18
|
5KB
|
265 lines
/*
rtf2text - read rtf input, write text of document (text extraction).
This installs callbacks for the ascii and control token classes.
The control class is necessary so that special characters such as
\par, \tab, \sect, etc. can be converted.
It's problematic what to do with text in headers and footers, and
what to do about tables.
This really is quite a stupid program, for instance, it could keep
track of the current leader character and dump that out when a tab
is encountered.
04 Feb 91 Paul DuBois dubois@primate.wisc.edu
04 Feb 91 V1.0. Created.
27 Feb 91 V1.01. Updated for distribution 1.05.
*/
# include <stdio.h>
# include "rtf.h"
/*
structure for mapping character values >= 128 to text strings
for different character sets.
*/
typedef struct CharMap CharMap;
struct CharMap
{
int charVal;
char *charStr;
};
extern CharMap ansiCharMap[]; /* these are defined below */
extern CharMap macCharMap[];
extern CharMap pcCharMap[];
extern CharMap pcaCharMap[];
/*
Default is ANSI but I hope we don't see \ansi, since its char
map is empty...
*/
CharMap *charMap = ansiCharMap;
static void Text ();
static void Control ();
static void CharSet ();
static void Destination ();
static void SpecialChar ();
int main (argc, argv)
int argc;
char **argv;
{
RTFInit ();
--argc;
++argv;
/* not clever; only allows stdin or one named file to be read */
if (argc > 0)
{
if (freopen (argv[0], "r", stdin) == (FILE *) NULL)
{
fprintf (stderr, "Can't open \"%s\"\n", argv[0]);
exit (1);
}
}
/* install class callbacks and process the input stream */
RTFSetClassCallback (rtfText, Text);
RTFSetClassCallback (rtfControl, Control);
RTFRead ();
exit (0);
}
static void Text ()
{
PutChar (rtfMajor);
}
static void Control ()
{
switch (rtfMajor)
{
case rtfCharSet:
CharSet ();
break;
case rtfDestination:
Destination ();
break;
case rtfSpecialChar:
SpecialChar ();
break;
}
}
static void CharSet ()
{
switch (rtfMinor)
{
case rtfAnsiCharSet:
charMap = ansiCharMap;
break;
case rtfMacCharSet:
charMap = macCharMap;
break;
case rtfPcCharSet:
charMap = pcCharMap;
break;
case rtfPcaCharSet:
charMap = pcaCharMap;
break;
}
}
/*
This function notices destinations that should be ignored
and skips to their ends. This keeps, for instance, picture
data from being considered as plain text.
*/
static void Destination ()
{
switch (rtfMinor)
{
case rtfPict:
case rtfFNContSep:
case rtfFNContNotice:
case rtfInfo:
case rtfIndexRange:
case rtfITitle:
case rtfISubject:
case rtfIAuthor:
case rtfIOperator:
case rtfIKeywords:
case rtfIComment:
case rtfIVersion:
case rtfIDoccomm:
RTFSkipGroup ();
break;
}
}
static void SpecialChar ()
{
switch (rtfMinor)
{
case rtfPage:
case rtfSect:
case rtfRow:
case rtfLine:
case rtfPar:
PutChar ('\n');
break;
case rtfCell:
PutChar (' '); /* make sure cells are separated */
break;
case rtfNoBrkSpace:
PutChar (' ');
break;
case rtfTab:
PutChar ('\t');
break;
case rtfNoBrkHyphen:
PutChar ('-');
break;
}
}
/*
Eventually this should keep track of the destination of the
current state and only write text when in the initial state.
*/
PutChar (c)
int c;
{
CharMap *cmp;
char *p = "X";
if (c < 128)
putchar (c);
else
{
for (cmp = charMap; cmp->charStr != (char *) NULL; cmp++)
{
if (c == cmp->charVal)
{
p = cmp->charStr;
break;
}
}
fputs (p, stdout);
}
}
CharMap ansiCharMap [] =
{
0, NULL
};
CharMap macCharMap [] =
{
0xa0, "+", /* dagger */
0xa1, "deg.", /* degree */
0xa2, "cents", /* cent */
0xa5, "o", /* bullet */
0xa7, "B", /* German B? */
0xa8, "reg.", /* registered */
0xa9, "(c)", /* copyright */
0xaa, "(TM)", /* trademark */
0xab, "'", /* acute accent */
0xad, "!=", /* not equal */
0xae, "AE", /* joined A-E */
0xb1, "+/-", /* plus or minus */
0xb2, "<=", /* less than or equal */
0xb3, ">=", /* greater than or equal */
0xb5, "u", /* micro */
0xb6, "d", /* delta */
0xbe, "ae", /* joined a-e */
0xc5, "~", /* approximately */
0xc7, "<<", /* alternate quote */
0xc8, ">>", /* alternate end-quote*/
0xc9, "...", /* ellipsis */
0xca, " ", /* unbreakable space */
0xd0, "-", /* short dash */
0xd1, "--", /* long dash */
0xd2, "\"", /* left curly double quote */
0xd3, "\"", /* right curly double quote */
0xd4, "`", /* left curly single quote */
0xd5, "'", /* right curly single quote */
0xd6, "/", /* divide */
0, NULL
};
CharMap pcCharMap [] =
{
0, NULL
};
CharMap pcaCharMap [] =
{
0, NULL
};