home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
GEMini Atari
/
GEMini_Atari_CD-ROM_Walnut_Creek_December_1993.iso
/
files
/
program
/
kr2ansi
/
kr2ansi.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-10-23
|
26KB
|
669 lines
/* ------------------------------------------------------------------------ *
* K R 2 A N S I . C *
* ------------------------------------------------------------------------ *
* 9/23/91
* Author: Harry Karayiannis
* ______________________E-MAIL:_____________________
* INTERnet: | BITnet:
* harryk@bucsf.bu.edu | cscrzcc@buacca.bu.edu
* --------------------------------------------------
*
* Purpose: Read a C-code source file written in Kernighan-Ritchie's
* programming style, and produce ANSI prototypes for all
* functions. (Hint: using output redirection you can create
* a file with prototypes of all the functions in your K&R C
* source-file. The program is automagically putting the word
* "extern" in front of the prototype-line, so you can #include
* the file in you current source code and compile it with an
* ANSI compiler).
*
* Usage: kr2ansi [-p] [ [-r file1] | [usr_def1 ... usr_defn] ] file2
* -p : show parameters
* -r file1 : read user-defined types from 'file1'
* usr_def1 ... usr_defn :
* user-defined types present in 'file2'
* file2 : the file to read C-code from
*
* Notes: This program is useful for people (like myself) who like
* K&R programming style, but want to take advantage of the
* parameter-cheking, during compilation, typically done by
* ANSI compilers (e.g. gcc, Turbo-C, Prospero-C, etc).
* Others simply want their K&R code to compile under an ANSI
* compiler without watching all those "irritating" warnings
* saying: "Function call without prototyping".
* For either case, kr2ansi can prove very handy.....
*
* Caveats: The program is not bug-free. I tried to make it work with
* files that follow the programming style presented in
* "The C Programming Language (1st Edition)", by Kernighan
* and Ritchie.
* In other words, it expects K&R function declarations to
* be in the following form:
* (NOTE: I write comments with backslashes '\' cause
* MWC does not parse nested comments)
*
* FUNCTION-TYPE FUNCTION-NAME(P1,P2,...,PN) \* comments *\
* \* also you can have ... *\
* \* ...more comments here *\
* PARAMETER-TYPE P1,P2; \* comments *\
* \* ... or here *\
* PARAMETER-TYPE P3; \* more comments *\
* PARAMETER-TYPE P4,...,PN; \* yet more comments *\
* { \* ... or even here *\
* function-body
* }
*
* The program gets confused when it finds 1) comments inside the
* parameter list, 2) semicolons placed after comments, 3) comments
* that open in one line and close in a different one, 4) curly
* braces '{' that do not appear as the first char in a separate
* line, and perhaps in some more case I've not figured out yet.
* But you shouldn't use such a bad programming style anyways 8*)
* (Hint: I think that unix-like preprocessors (like "cpp" in
* MWc, gcc, etc) take a flag that causes comments to be
* removed from the source-file...check it out)
*
* However, it is almost guaranteed to work with files that follow
* K & R, or Rochkind's programming style (except in one case...)
*
*
* Bugs: kr2ansi fails to find integer functions that do not include
* the data-type in the beginning of the function-declaration:
* e.g.
* main(argc,argv)
* int argc;
* char *argv[];
* {
* ...
* }
*
* The reason is that the program identifies a K&R function header
* by checking the first word of the line, which *must* be a valid
* data-type (standard or user-defined). If you come up with a bet-
* ter algorithm please let me know. (Please don't tell me to write
* a complete C parser, cause I won't)
*/
#include <stdio.h> /* don't tell me you don't know this one */
#include "kr2ansi.h" /* constants, macros & user-defined types */
char *progname; /* the name of the program */
char *data_types[]={ DATA_TYPES }; /* array of valid data-types */
unsigned options = NONE; /* bit map with command-line options */
/* ======================================================================== *
* main
* ======================================================================== */
int main(argc, argv)
int argc;
char *argv[];
{
char fname_in[MAXNAME]; /* name of input file */
GLOBAL char *data_types[]; /* array of valid data-types */
void kr2ansi();
BOOLEAN parse(); /* return FALSE on command_line error */
extern void usage(); /* in file: ERROR.C */
progname = argv[0]; /* set the program's name */
if ( !parse(argc, argv, data_types, fname_in) )
usage("[-p] [ [-r file1] | [usr_def1 ... usr_def10] ] file2");
else
kr2ansi(fname_in);
return(0);
}
/* ======================================================================== *
* parse
* ======================================================================== */
BOOLEAN parse(argc, argv, data_types, fname_in)
int argc; /* number of args in command-line */
char *argv[]; /* the arguments themselves */
char *data_types[]; /* array of valid data-types */
char *fname_in; /* name of the input file */
{
/* Check the validity of the command line,
* make valid specified user-defined data-types,
* and assign the specified filename to : fname_in
*/
register int i,u; /* integer counters */
void set_dtypes(); /* read user-def. types from a file */
GLOBAL unsigned options; /* bit map with command-line options */
STD_CLIB char *strcpy(); /* part of the standard C-library */
if (argc < 2) /* too few arguments */
return(FALSE);
i=1; /* you can add your own options in this loop */
while ( argv[i][0] == '-' )
{
if ( !strcmp(argv[i],"-p") ) /* option -p: */
{ /* show parameters */
if (i == argc-1)
return(FALSE);
options |= SHOW_PARA;
}
else if ( !strcmp(argv[i],"-r") ) /* option -r: */
{ /* read user-defined types from file */
if (i != argc-3)
return(FALSE);
options |= RD_TYPES;
set_dtypes(argv[i+1], data_types);
}
else if ( !strcmp(argv[i],"") )
options |= UNUSED4; /* unused slot */
else if ( !strcmp(argv[i],"") )
options |= UNUSED5; /* unused slot */
else if ( !strcmp(argv[i],"") )
options |= UNUSED6; /* unused slot */
else if ( !strcmp(argv[i],"") )
options |= UNUSED7; /* unused slot */
else if ( !strcmp(argv[i],"") )
options |= UNUSED8; /* unused slot */
else /* invalid option */
return(FALSE); /* return FALSE */
i++;
}
if ( !(options & RD_TYPES) ) /* get user-def types from command-line */
for (u=i; (u<=N_DT_USR && u<argc-1); u++)
strcpy(data_types[DT_USR1+u-i], argv[u]);
strcpy(fname_in, argv[argc-1]); /* get name of the input-file */
return(TRUE);
}
/* ======================================================================== *
* kr2ansi
* ======================================================================== */
void kr2ansi(fname)
char *fname; /* name of the input file */
{
/* This function opens the input file, and reads all
* the lines, one at a time. If a line is a valid K&R
* function decleration (header) it calls make_ansi()
* to convert the line into an ANSI function decleration
*/
FILE *fp_in; /* used for reading the file: fname */
char ln[MAXLINE]; /* next line in the file: fname */
BOOLEAN is_KR_header(); /* TRUE if ln is a valid K&R function header */
void make_ansi(); /* make ln an ANSI function header */
extern void fatal(); /* in file: ERROR.C */
STD_CLIB char *fgets(); /* part of the standard C-library */
fp_in = fopen(fname, "r");
demand(fp_in != NULL, "file2 does not exist");
while (fgets(ln, MAXLINE, fp_in) != NULL)
{
if ( is_KR_header(ln) )
{
make_ansi(fp_in, ln);
printf("%s",ln);
}
}
fclose(fp_in);
}
/* ======================================================================== *
* set_dtypes()
* ======================================================================== */
void set_dtypes(fname, data_types)
char *fname; /* name of the file to read user-def types from */
char *data_types[]; /* array with valid data-types */
{
/*
* set_dtypes() reads up to N_DT_USR words from file 'fname' and assigns
* them to the array 'data_types[]'. Each word can be up to DT_MAXWORD
* characters long (words longer than DT_MAXWORD are truncated).
*/
FILE *fp; /* used for reading the file: fname */
int c;
BOOLEAN onword; /* TRUE if we are on a word */
register int ccount; /* counter for word's letters (up to DT_MAXWORD) */
register int wcount; /* counter for words (up to N_DT_USR) */
extern void fatal(); /* in file: ERROR.C */
STD_CLIB int fgetc(); /* part of the standard C-library */
fp = fopen(fname, "r");
demand( fp != NULL, "file1 does not exist");
while ((c=fgetc(fp)) != EOF && IS_BLANK(c)); /* skip leading blanks */
if (c == EOF) /* if file is empty, exit */
{
fclose(fp);
fatal("file1 is empty");
}
/*
* IMPORTANT:
* at this point we know for a fact that 'c' is
* the first letter of the first word in the file
*/
wcount = DT_USR1; /* the first slot for user-def types */
ccount = 0; /* initialize char-counter */
data_types[wcount][ccount++] = c; /* set the first character */
onword = TRUE; /* we are on the first word */
while ( wcount < DT_USR1+N_DT_USR && (c=fgetc(fp)) != EOF )
{
if ( IS_BLANK(c) ) /* we are on a blank character */
{
if (onword) /* if it immediately follows a word */
{ /* we should take care of some stuff */
onword = FALSE; /* we're not on a word anymore */
data_types[wcount][ccount] = '\0'; /* terminate previous word */
wcount++; /* increase word-counter */
ccount = 0; /* reset char-counter */
}
}
else /* we are on a letter... */
{
onword = TRUE; /* ..thus we are on a word */
if (ccount < DT_MAXWORD-1) /* no more DT_MAXWORD chars allowed */
data_types[wcount][ccount++] = c; /* append 'c' in current data-type */
}
}
fclose(fp);
}
/* ======================================================================== *
* is_KR_header
* ======================================================================== */
BOOLEAN is_KR_header(header)
char *header; /* potential K&R func. header */
{
/* This function recognises a "valid" K&R func. header line
* by testing three basic conditions (the order is significant):
*
* 1. the first word _must_ be a valid data-type.
*
* 2. the line _must not_ contain a semicolon.
* (BUG: we screw up if line contains ';' inside a comment)
*
* 3. a) after we remove potentially commented characters,
* b) the last non-blank character _must_ be a closing parenthesis: ')'
*
* If any of the above conditions fail then the function returns FALSE,
* otherwise the line is considered to be valid, it is modified a little
* (see below), and the function returns TRUE.
*
* If the function concludes that the line is a valid K&R func-header
* it modifies the line in order to bring it in the form expected by
* the function make_ansi(): a) removes any white spaces and potential
* comments after the closing parenthesis, and b) appends a semicolon
* and a newline character.
* (Actually potential comments are _always_ removed)
*/
char word[MAXWORD]; /* the first word of the line */
char *get_1st_word(); /* returns the 1st word in a string */
register int i; /* just a counter */
BOOLEAN valid(); /* TRUE if the 1st param. is a valid data-type */
STD_CLIB int strlen(); /* part of the standard C-library */
STD_CLIB char *strcpy(); /* part of the standard C-library */
strcpy(word,get_1st_word(header));/* C o n d i t i o n # 1: */
if ( !valid(word) ) /* 1st word must be a valid data-type */
return(FALSE);
i = strlen(header); /* C o n d i t i o n # 2: */
while (i > 0){ /* no ';' allowed in the header */
if ( header[i] == ';' )
return(FALSE);
i--;
}
/* C o n d i t i o n # 3: */
i = 0; /* a. remove potential comments */
while (header[i] != '\0' && header[i] != '/') i++;
header[i] = '\0';
i = strlen(header)-1; /* b. last non-blank char must be ')' */
while ( IS_BLANK(header[i]) && i>0 ) i--;
if ( header[i] != ')' )
return(FALSE);
/* M o d i f y L i n e: */
header[++i] = ';'; /* append a semicolon, and */
header[++i] = '\n'; /* a newline character */
header[++i] = '\0';
return(TRUE);
}
/* ======================================================================== *
* get_1st_word
* ======================================================================== */
char *get_1st_word(line)
char *line;
{
/* Return the first word in the parameter string
* If the word is longer than MAXWORD,
* the function returns W_TOO_LONG
* NOTE:
* W_TOO_LONG should contain _at most_ MAXWORD chars in the quotes
*/
char word[MAXLINE]; /* reserve space for MAXLINE chars (see below) */
register int i; /* just a counter */
STD_CLIB char *strcpy(); /* part of the standard C-library */
i=0; /* skip leading blanks and... */
while (line[i] != '\0' && IS_BLANK(line[i]))
i++;
strcpy(word, &line[i]); /* ...put result in: 'word' */
/* NOTE: 'word' has room for MAXLINE chars */
i=0; /* keep only the 1st word */
while (word[i] != '\0' && !IS_BLANK(word[i]))
i++;
word[i] = '\0';
/* return 'word' (or W_TOO_LONG) */
return( (strlen(word) > MAXWORD) ? W_TOO_LONG : word );
}
/* ======================================================================== *
* valid
* ======================================================================== */
BOOLEAN valid(word)
char *word;
{
/* Compare the string 'word' against all
* strings listed in the array 'data_types'.
* Return TRUE on the first match, or FALSE
* if 'word' is not listed in 'data_types'.
*/
register int i; /* just a counter */
GLOBAL char *data_types[]; /* array with valid data-types */
STD_CLIB int strcmp(); /* part of the standard C-library */
for (i=DT_STD1; i<=(LAST_DT_STD+N_DT_USR); i++)
if ( !strcmp(word, data_types[i]) )
return(TRUE);
return(FALSE);
}
/* ======================================================================== *
* make_ansi
* ======================================================================== */
void make_ansi(fp, proto)
FILE *fp; /* pointer to file: fname_in */
char *proto; /* ANSI-prototyping to be produced */
{
/* This function takes the string 'proto' and converts it to
* a valid ANSI function prototype:
*
* First it inserts the word "extern" into the string 'line',
* just in front of the function's data-type.
* Then it checks if the parameter-list is empty, via the
* function: has_param(), and removes all the chars after the
* opening parenthesis (i.e. 'proto' becomes:
* "extern fn_type fn_name(" ).
* Now, if the parameter list was empty, the string: "void);\n"
* is appended to 'proto' and the function returns. Otherwise it
* is expecting to find the parameter-declarations between the
* function-declaration and the first '{' character. For each
* such line, it removes potential comments and checks the first
* word (parameter-type) against all valid data-types.
* If everything is ok, the parameter-type along with the
* parameter itself are appended to 'proto' (via the function:
* append_param() ). Otherwise the parameter-type is "assumed"
* to be invalid and the string constant: UNDEFINED_DATA_TYPE
* is used instead.
* BUG: The above algorithm fails when a comment is opened in
* one line and is closed in a different line.
* The result is that in the output line commented words
* will appear as a parameters of type UNDEFINED_DATA_TYPE.
*/
char *cp;
char par_decl[MAXLINE]; /* parameter-declaration line */
char par_type[MAXWORD]; /* parameter's data-type */
char *get_1st_word();
BOOLEAN no_err = TRUE;
BOOLEAN append_param(); /* see below */
BOOLEAN has_params(); /* see below */
BOOLEAN valid();
STD_CLIB int strlen(); /* part of the standard C-library */
STD_CLIB char *strcpy(); /* part of the standard C-library */
STD_CLIB char *strcat(); /* part of the standard C-library */
STD_CLIB char *strchr(); /* part of the standard C-library */
strcpy(par_decl, "extern "); /* put "extern" in the front */
strcat(par_decl, proto); /* (note: here we use 'par_decl' */
strcpy(proto, par_decl); /* as temporary string storage) */
if ( !has_params(proto) ) /* check & remove parameter-list */
{
strcat(proto, "void);\n");
return;
}
/* get next 'par_decl' */
while ( fgets(par_decl, MAXLINE, fp) != NULL && no_err)
{
char *ptr;
if ( ptr = strchr(par_decl,'/') ) /* remove potential comments */
*ptr = '\0';
strcpy(par_type, get_1st_word(par_decl)); /* get the parameter-type */
if (par_type[0] == '{') /* if we hit a '{' we stop */
break;
if ( !valid(par_type) ) /* check for valid 'par_type' */
strcpy(par_type, UNDEFINED_DATA_TYPE);
/* append ANSI parameter-list */
no_err = append_param(proto, par_type, par_decl);
}
/* The function append_param() converts 'proto' to the following form: */
/* "extern fn_type fn_name(ptype p1, ptype p2, ..., ptype pn, " */
/* So we need to fix 'proto''s tail by 1. erasing the last two chars */
/* (namely ' ' and ',') and 2. appending the string: ");\n" */
cp = proto + (strlen(proto)-2); /* go two chars back */
*cp = ')';
*(cp+1) = ';';
*(cp+2) = '\n';
*(cp+3) = '\0';
}
/* ======================================================================== *
* has_params
* ======================================================================== */
BOOLEAN has_params(header)
char *header; /* the function-header line */
{
/* This function checks if the parameter-list is empty,
* and removes all chars after the opening parenthesis.
* Its task is to modify 'header' and to return TRUE if
* the parameter-list was empty.
* NOTICE that the parameter-list is considered empty
* when either the char ')' comes right after char '('
* or it consists of white(BLANK) characters.
*/
char *cp1, *cp2; /* temporary pointers */
BOOLEAN param_yes = FALSE; /* what the function returns */
STD_CLIB char *strchr(); /* part of the standard C-library */
cp1 = cp2 = strchr(header,'('); /* save the start of param-list in cp2 */
if ( *(cp1+1) == ')' ) /* if ')' comes right after '(' */
{ /* Remove all charactes coming */
*(cp1+1) = '\0'; /* right after '(', and... */
return(FALSE); /* ...return FALSE */
}
/* check for BLANK parameter-list */
while ( *cp2 != '\0' && IS_BLANK(*cp2) )
cp2++;
if (*cp2 != ')')
param_yes = TRUE;
*(cp1+1) = '\0';
return(param_yes);
}
/* ======================================================================== *
* append_param
* ======================================================================== */
BOOLEAN append_param(proto, par_type, par_decl)
char *proto; /* the output ANSI-prototype */
char *par_type; /* the data-type of the parameter */
char *par_decl; /* the parameter-declaration line */
{
/* Get a parameter-declaration line, construct the appropriate
* ANSI-prototyped-declaration string, and append it to the ANSI
* prototype.
* *** I m p o r t a n t ***
* 'par_decl' has been ensured (by function make_ansi()), to be
* valid (i.e. the 1st word is a valid data-type). So the string
* coming after the 1st word(='par_type') should be a list of
* parameters. BUT if 'par_type' is one of the strings "unsigned",
* short" or "long", then the 2nd word might be "int" (which should
* not be treated as a parameter, but as part of the parameters'
* data-type). Furthermore, 'par_type' may be "register", in which
* case we only the 2nd word only (or the string "register" if the
* 2nd word is not a valid data-type).
*/
char *s, *param;
char _str[MAXWORD];
register int i;
void construct_ANSI_declaration();
BOOLEAN valid();
STD_CLIB int *strcmp();
STD_CLIB char *strcat(), *strtok();
i = 0; /* skip the first word of 'par_decl' */
while (par_decl[i] != '\0' && IS_BLANK(par_decl[i])) i++;
while (par_decl[i] != '\0' && !IS_BLANK(par_decl[i])) i++;
s = &par_decl[i];
/* check for: register data-type,*
* or unsigned/short/long int */
strcpy(_str, get_1st_word(s));
if ( valid(_str) )
{
if ( !strcmp(par_type,"register") ) /* handle "register" cases */
strcpy(par_type, _str);
else /* handle unsigned/long/short */
{
strcat(par_type, " "); /* cat " int" in 'par_type'*/
strcat(par_type, _str);
}
while( *s != '\0' && IS_BLANK(*s) ) s++; /* skip the 2nd word: "int" */
while( *s != '\0' && !IS_BLANK(*s) ) s++;
}
/* append ANSI param-declaration to 'proto' */
while ( (param = strtok(s, ",; \t\n")) != NULL )
{
if ( strlen(proto) >= MAXLINE-strlen(par_type)-strlen(param)-4 )
{ /* error-check for "output line too long" */
strcat(proto, "<...>, ");
return(FALSE);
}
construct_ANSI_declaration(proto, par_type, param);
s = (char *)NULL;
}
return(TRUE);
}
/* ======================================================================== *
* construct_ANSI_declaration
* ======================================================================== */
void construct_ANSI_declaration(proto, par_type, param)
char *proto; /* the functin prototype */
char *par_type; /* parameter's data-type */
char *param; /* the parameter itself */
{
/* Construct the ANSI parameter-declaration,
* so it can be appended in the parameter-list
* (if DONT_SHOW_PARAMETERS is not defined, the
* parameter itself is also included)
*/
register int i; /* just a counter */
GLOBAL unsigned options; /* bit map with command-line options */
STD_CLIB int strlen(); /* part of the standard C-library */
STD_CLIB char *strcat(); /* part of the standard C-library */
strcat(proto, par_type); /* append the parameter's data-type */
strcat(proto, " ");
if (options & SHOW_PARA) /* include parameter in parameter-list */
strcat(proto, param);
else /* exclude parameter from parameter-list */
{
for (i=0; param[i] != '\0'; i++) /* look for any pointers */
if (param[i] == '[' || param[i] == '*')
strcat(proto, "*");
}
i = strlen(proto)-1; /* separate parameters with ", " */
if ( proto[i] == ' ' )
proto[i] = '\0';
strcat(proto, ", ");
}