home *** CD-ROM | disk | FTP | other *** search
- /*++
- /* NAME
- /* unproto 1
- /* SUMMARY
- /* ANSI C to old C converter
- /* PACKAGE
- /* unproto
- /* SYNOPSIS
- /* /lib/cpp ... | unproto
- /*
- /* /somewhere/cpp ...
- /* DESCRIPTION
- /* This document describes a filter that sits between the
- /* C preprocessor (usually \fI/lib/cpp\fP) and the next C compiler
- /* pass. It rewrites ANSI-C style function headers, function type
- /* declarations, function pointer types, and function pointer casts
- /* to old style. Other ANSI-isms are passed on without modification
- /* (token pasting, pragmas, etcetera).
- /*
- /* For maximal flexibility, the "cpp | unproto" pipeline can be
- /* packaged as an executable shell script named "/somewhere/cpp".
- /* This script should then be specified to the C compiler as a
- /* non-default preprocessor. It will not work if your C compiler
- /* specifies output file names to the preprocessor.
- /*
- /* The overhead of shell script interpretation can be avoided by
- /* having the unprototyper itself open the pipe to the preprocessor.
- /* In that case, the source should be compiled with the PIPE_THROUGH_CPP
- /* macro defined (usually as "/lib/cpp"), and the resulting binary
- /* should be installed as "/somewhere/cpp".
- /* SEE ALSO
- /* .ad
- /* .fi
- /* cc(1), how to specify a non-default C preprocessor.
- /*
- /* Some versions of the lint command are implemented as a shell
- /* script. It should require only minor modification for integration
- /* with the unprotoizer. Other versions of the lint command accept the same
- /* command syntax as the C compiler for the specification of a non-default
- /* preprocessor. Some research may be needed.
- /* DIAGNOSTICS
- /* The progam will complain if it unexpectedly
- /* reaches the end of input.
- /* BUGS
- /* Should be run on preprocessed source only, i.e. after macro expansion.
- /*
- /* Declarations of (whatever) are misunderstood and will result in
- /* syntax errors.
- /*
- /* Does not generate explicit type casts for function argument
- /* expressions.
- /* AUTHOR(S)
- /* Wietse Venema (wietse@wzv.win.tue.nl)
- /* Eindhoven University of Technology
- /* Department of Mathematics and Computer Science
- /* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
- /* LAST MODIFICATION
- /* 91/09/22 21:21:35
- /* VERSION/RELEASE
- /* 1.2
- /*--*/
-
- static char unproto_sccsid[] = "@(#) unproto.c 1.3 91/11/30 21:10:30";
-
- /* C library */
-
- #include <stdio.h>
- #include <errno.h>
-
- extern void exit();
- extern int optind;
- extern char *optarg;
- extern int getopt();
-
- /* Application-specific stuff */
-
- #include "vstring.h"
- #include "stdarg.h"
- #include "token.h"
- #include "error.h"
- #include "symbol.h"
-
- /* Forward declarations. */
-
- static struct token *dcl_flush();
- static void block_flush();
- static void block_dcls();
- static struct token *show_func_ptr_type();
- static struct token *show_struct_type();
- static void show_arg_name();
- static void show_type();
- static void pair_flush();
- static void check_cast();
-
- #define check_cast_flush(t) (check_cast(t), tok_free(t))
-
- #ifdef PIPE_THROUGH_CPP
- static int pipe_stdin_through_cpp();
- #endif
-
- /* Disable debugging printfs while preserving side effects. */
-
- #ifdef DEBUG
- #define DPRINTF printf
- #else
- #define DPRINTF (void)
- #endif
-
- /* An attempt to make some complicated expressions a bit more readable. */
-
- #define STREQ(x,y) (*(x) == *(y) && !strcmp((x),(y)))
-
- #define LAST_ARG_AND_EQUAL(s,c) ((s)->next == 0 && (s)->head \
- && ((s)->head == (s)->tail) \
- && (STREQ((s)->head->vstr->str, (c))))
-
- #define LIST_BEGINS_WITH_STAR(s) (s->head->head && s->head->head->tokno == '*')
-
- #define IS_FUNC_PTR_TYPE(s) (s->tokno == TOK_LIST && s->next \
- && s->next->tokno == TOK_LIST \
- && LIST_BEGINS_WITH_STAR(s))
-
- /* main - driver */
-
- int main(argc, argv)
- int argc;
- char **argv;
- {
- register struct token *t;
- #ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */
- int cpp_status;
- int wait_pid;
- int cpp_pid;
-
- cpp_pid = pipe_stdin_through_cpp(argv);
- #endif
-
- sym_init(); /* prime the symbol table */
-
- while (t = tok_class(DO_WSPACE)) {
- if (t = dcl_flush(t)) { /* try declaration */
- if (t->tokno == '{') { /* examine rejected token */
- block_flush(t); /* body */
- } else {
- tok_flush(t); /* other, recover */
- }
- }
- }
-
- #ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */
- while ((wait_pid = wait(&cpp_status)) != -1 && wait_pid != cpp_pid)
- /* void */ ;
- return (wait_pid != cpp_pid || cpp_status != 0);
- #else
- return (0);
- #endif
- }
-
- #ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */
-
- /* pipe_stdin_through_cpp - avoid shell script overhead */
-
- static int pipe_stdin_through_cpp(argv)
- char **argv;
- {
- int pipefds[2];
- int pid;
- char **cpptr = argv;
-
- /*
- * With most UNIX implementations, the second non-option argument to
- * /lib/cpp specifies the output file. If an output file other than
- * stdout is specified, we must force /lib/cpp to write to stdout, and we
- * must redirect our own standard output to the specified output file.
- */
-
- #define IS_OPTION(cp) ((cp)[0] == '-' && (cp)[1] != 0)
-
- /* Skip to first non-option argument, if any. */
-
- while (*++cpptr && IS_OPTION(*cpptr))
- /* void */ ;
-
- /*
- * Assume that the first non-option argument is the input file name. The
- * next argument could be the output destination or an option (System V
- * Release 2 /lib/cpp gets the options *after* the file arguments).
- */
-
- if (*cpptr && *++cpptr && **cpptr != '-') {
-
- /*
- * The first non-option argument is followed by another argument that
- * is not an option ("-stuff") or a hyphen ("-"). Redirect our own
- * standard output before we clobber the file name.
- */
-
- if (freopen(*cpptr, "w", stdout) == 0) {
- perror(*cpptr);
- exit(1);
- }
- /* Clobber the file name argument so that /lib/cpp writes to stdout */
-
- *cpptr = "-";
- }
- /* Set up the pipe that connects /lib/cpp to our standard input. */
-
- if (pipe(pipefds)) {
- perror("pipe");
- exit(1);
- }
- switch (pid = fork()) {
- case -1: /* error */
- perror("fork");
- exit(1);
- case 0: /* child */
- close(pipefds[0]); /* close reading end */
- close(1); /* connect stdout to pipe */
- if (dup(pipefds[1]) != 1)
- error(1, "dup() problem");
- close(pipefds[1]); /* close redundant fd */
- execv(PIPE_THROUGH_CPP, argv);
- perror(PIPE_THROUGH_CPP);
- exit(1);
- default: /* parent */
- close(pipefds[1]); /* close writing end */
- close(0); /* connect stdin to pipe */
- if (dup(pipefds[0]) != 0)
- error(1, "dup() problem");
- close(pipefds[0]); /* close redundant fd */
- return (pid);
- }
- }
-
- #endif
-
- /* header_flush - rewrite new-style function header to old style */
-
- static void header_flush(t)
- register struct token *t;
- {
- register struct token *s;
-
- /* Do argument names, but suppress void and rewrite trailing ... */
-
- if (LAST_ARG_AND_EQUAL(t->head, "void")) {
- put_str("()\n"); /* no arguments */
- } else {
- for (s = t->head; s; s = s->next) { /* foreach argument... */
- if (LAST_ARG_AND_EQUAL(s, "...")) {
- #ifdef _VA_ALIST_ /* see ./stdarg.h */
- put_ch(s->tokno); /* ',' */
- put_str(_VA_ALIST_); /* varargs magic */
- #endif
- } else {
- put_ch(s->tokno); /* opening '(' or ',' */
- show_arg_name(s); /* extract argument name */
- }
- }
- put_str(")\n"); /* closing ')' */
- }
-
- /* Do argument types, but suppress void and trailing ... */
-
- if (!LAST_ARG_AND_EQUAL(t->head, "void")) {
- for (s = t->head; s; s = s->next) { /* foreach argument... */
- if (!LAST_ARG_AND_EQUAL(s, "...")) {
- if (s->head != s->tail) { /* really new-style argument? */
- show_line_control(); /* fix line number */
- show_type(s); /* rewrite type info */
- put_str(";\n");
- }
- }
- }
- }
- tok_free(t);
- show_line_control(); /* because '{' follows */
- }
-
- /* show_arg_name - extract argument name from argument type info */
-
- static void show_arg_name(s)
- register struct token *s;
- {
- if (s->head) {
- register struct token *p;
- register struct token *t = 0;
-
- /* Find the last interesting item. */
-
- for (p = s->head; p; p = p->next) {
- if (p->tokno == TOK_WORD) {
- t = p; /* remember last word */
- } else if (IS_FUNC_PTR_TYPE(p)) {
- t = p; /* or function pointer */
- p = p->next;
- }
- }
-
- /* Extract argument name from last interesting item. */
-
- if (t) {
- if (t->tokno == TOK_LIST)
- show_arg_name(t->head); /* function pointer, recurse */
- else
- tok_show(t); /* print last word */
- }
- }
- }
-
- /* show_type - rewrite type to old-style syntax */
-
- static void show_type(s)
- register struct token *s;
- {
- register struct token *p;
-
- for (p = s->head; p; p = p->next) {
- if (IS_FUNC_PTR_TYPE(p)) {
- p = show_func_ptr_type(p); /* function pointer type */
- } else {
- tok_show(p); /* other */
- }
- }
- }
-
- /* show_func_ptr_type - display function_pointer type using old-style syntax */
-
- static struct token *show_func_ptr_type(t)
- struct token *t;
- {
- register struct token *s;
-
- /*
- * Rewrite (list1) (list2) to (list1) (). Only (list1) is given to us;
- * the caller must have verified the presence of (list2). Account for the
- * rare case that (list1) is a comma-separated list. That should be an
- * error, but we do not want to waste any information.
- */
-
- for (s = t->head; s; s = s->next) {
- put_ch(s->tokno); /* opening paren or ',' */
- show_type(s); /* recurse */
- }
- put_str(")()"); /* closing paren */
- return (t->next);
- }
-
- /* show_struct_type - display structured type, rewrite function-pointer types */
-
- static struct token *show_struct_type(p)
- register struct token *p;
- {
- tok_show(p); /* opening brace */
-
- while (p->next) { /* XXX cannot return 0 */
- p = p->next;
- if (IS_FUNC_PTR_TYPE(p)) {
- p = show_func_ptr_type(p); /* function-pointer member */
- } else if (p->tokno == '{') {
- p = show_struct_type(p); /* recurse */
- } else {
- tok_show(p); /* other */
- if (p->tokno == '}') {
- return (p); /* done */
- }
- }
- }
- DPRINTF("/* missing '}' */");
- return (p);
- }
-
- /* is_func_ptr_cast - recognize function-pointer type cast */
-
- static int is_func_ptr_cast(t)
- register struct token *t;
- {
- register struct token *p;
-
- /*
- * Examine superficial structure. Require (list1) (list2). Require that
- * list1 begins with a star.
- */
-
- if (!IS_FUNC_PTR_TYPE(t))
- return (0);
-
- /*
- * Make sure that there is no name in (list1). Do not worry about
- * unexpected tokens, because the compiler will complain anyway.
- */
-
- for (p = t->head->head; p; p = p->next) {
- switch (p->tokno) {
- case TOK_LIST: /* recurse */
- return (is_func_ptr_cast(p));
- case TOK_WORD: /* name in list */
- return (0);
- }
- }
- return (1); /* no name found */
- }
-
- /* check_cast - display ()-delimited, comma-separated list */
-
- static void check_cast(t)
- struct token *t;
- {
- register struct token *s;
- register struct token *p;
-
- /*
- * Rewrite function-pointer types and function-pointer casts. Do not
- * blindly rewrite (*list1)(list2) to (*list1)(). Function argument lists
- * are about the only thing we can discard without provoking diagnostics
- * from the compiler.
- */
-
- for (s = t->head; s; s = s->next) {
- put_ch(s->tokno); /* opening paren or ',' */
- for (p = s->head; p; p = p->next) {
- switch (p->tokno) {
- case TOK_LIST:
- if (is_func_ptr_cast(p)) { /* not: IS_FUNC_PTR_TYPE(p) */
- p = show_func_ptr_type(p); /* or we might take away */
- } else { /* function-call arguments */
- check_cast(p); /* recurse */
- }
- break;
- case '{':
- p = show_struct_type(p); /* rewrite func. ptr. types */
- break;
- default:
- tok_show(p);
- break;
- }
- }
- }
- put_ch(')'); /* closing paren */
- }
-
- /* block_dcls - on the fly rewrite decls/initializers at start of block */
-
- static void block_dcls()
- {
- register struct token *t;
-
- /*
- * Away from the top level, a declaration should be preceded by type or
- * storage-class information. That is why inside blocks, structs and
- * unions we insist on reading one word before passing the _next_ token
- * to the dcl_flush() function.
- *
- * Struct and union declarations look the same everywhere: we make an
- * exception for these more regular constructs and pass the "struct" and
- * "union" tokens to the type_dcl() function.
- */
-
- while (t = tok_class(DO_WSPACE)) {
- switch (t->tokno) {
- case TOK_WSPACE: /* preserve white space */
- case '\n': /* preserve line count */
- tok_flush(t);
- break;
- case TOK_WORD: /* type declarations? */
- tok_flush(t); /* advance to next token */
- t = tok_class(DO_WSPACE); /* null return is ok */
- case TOK_COMPOSITE: /* struct or union */
- if ((t = dcl_flush(t)) == 0)
- break;
- /* FALLTRHOUGH */
- default: /* end of declarations */
- DPRINTF("/* end dcls */");
- /* FALLTRHOUGH */
- case '}': /* end of block */
- tok_unget(t);
- return;
- }
- }
- }
-
- /* block_flush - rewrite struct, union or statement block on the fly */
-
- static void block_flush(t)
- register struct token *t;
- {
- static int count = 0;
-
- tok_flush(t);
- DPRINTF("/*%d*/", ++count);
-
- /*
- * Rewrite function pointer types in declarations and function pointer
- * casts in initializers at start of block.
- */
-
- block_dcls();
-
- /* Remainder of block: only rewrite function pointer casts. */
-
- while (t = tok_class(DO_WSPACE)) {
- if (t->tokno == TOK_LIST) {
- check_cast_flush(t);
- } else if (t->tokno == '{') {
- block_flush(t);
- } else {
- tok_flush(t);
- if (t->tokno == '}') {
- DPRINTF("/*%d*/", count--);
- return;
- }
- }
- }
- DPRINTF("/* missing '}' */");
- }
-
- /* pair_flush - on the fly rewrite casts in grouped stuff */
-
- static void pair_flush(t, start, stop)
- register struct token *t;
- register int start;
- register int stop;
- {
- tok_flush(t);
-
- while (t = tok_class(DO_WSPACE)) {
- if (t->tokno == start) { /* recurse */
- pair_flush(t, start, stop);
- } else if (t->tokno == TOK_LIST) { /* expression or cast */
- check_cast_flush(t);
- } else { /* other, copy */
- tok_flush(t);
- if (t->tokno == stop) { /* done */
- return;
- }
- }
- }
- DPRINTF("/* missing '%c' */", stop);
- }
-
- /* initializer - on the fly rewrite casts in initializer */
-
- static void initializer()
- {
- register struct token *t;
-
- while (t = tok_class(DO_WSPACE)) {
- switch (t->tokno) {
- case ',': /* list separator */
- case ';': /* list terminator */
- tok_unget(t);
- return;
- case TOK_LIST: /* expression or cast */
- check_cast_flush(t);
- break;
- case '[': /* array substript, may nest */
- pair_flush(t, '[', ']');
- break;
- case '{': /* structured data, may nest */
- pair_flush(t, '{', '}');
- break;
- default: /* other, just copy */
- tok_flush(t);
- break;
- }
- }
- }
-
- /* func_ptr_dcl_flush - rewrite function pointer declaration */
-
- static struct token *func_ptr_dcl_flush(list)
- register struct token *list;
- {
- register struct token *t;
-
- /*
- * Ignore blanks because they would be output earlier than the list that
- * preceded them... Recover gracefully from syntax errors.
- */
-
- while (t = tok_class(NO_WSPACE)) {
- switch (t->tokno) {
- case '\n': /* preserve line count */
- tok_flush(t);
- break;
- case TOK_LIST:
- /* Function pointer type: (list1) (list2) -> (list1) () */
- (void) show_func_ptr_type(list); /* may be recursive */
- tok_free(list);
- tok_free(t);
- return (0);
- default: /* not a declaration */
- tok_unget(t);
- return (list);
- }
- }
-
- /* Hit EOF; must be mistake, but do not waste any information. */
-
- return (list);
- }
-
- /* function_dcl_flush - rewrite function { heading, type declaration } */
-
- static struct token *function_dcl_flush(list)
- register struct token *list;
- {
- register struct token *t;
-
- /*
- * Ignore blanks because they would be output earlier than the list that
- * preceded them...
- */
-
- while (t = tok_class(NO_WSPACE)) {
- switch (t->tokno) {
- case '\n':
- /* Preserve line count */
- tok_flush(t);
- break;
- case '{':
- /* Function heading: word (list) { -> old style heading */
- header_flush(list);
- tok_unget(t);
- return (0);
- case TOK_WORD:
- /* Old-style function heading: word (list) word...{ */
- tok_flush(list);
- tok_unget(t);
- return (0);
- case TOK_LIST:
- /* Function typedef? word (list1) (list) -> word (list1) () */
- tok_flush(list);
- put_str("()");
- tok_free(t);
- return (0);
- case ',':
- case ';':
- /* Function type declaration: word (list) -> word () */
- tok_free(list);
- put_str("()");
- tok_unget(t);
- return (0);
- default:
- /* Something else, reject the list. */
- tok_unget(t);
- return (list);
- }
- }
-
- /* Hit EOF; must be mistake, but do not waste any information. */
-
- return (list);
- }
-
- /* dcl_flush - parse declaration on the fly, return rejected token */
-
- static struct token *dcl_flush(t)
- register struct token *t;
- {
- register int got_word;
-
- /*
- * Away from the top level, type or storage-class information is required
- * for an (extern or forward) function type declaration or a variable
- * declaration.
- *
- * With our naive word-counting approach, this means that the caller should
- * read one word before passing the next token to us. This is how we
- * distinguish, for example, function declarations from function calls.
- *
- * An exception are structs and unions, because they look the same at any
- * level. The caller should give is the "struct" or "union" token.
- */
-
- for (got_word = 0; t; t = tok_class(DO_WSPACE)) {
- switch (t->tokno) {
- case TOK_WSPACE: /* advance past blanks */
- case '\n': /* advance past newline */
- case '*': /* indirection: keep trying */
- tok_flush(t);
- break;
- case TOK_WORD: /* word: keep trying */
- case TOK_COMPOSITE: /* struct or union */
- got_word = 1;
- tok_flush(t);
- break;
- default:
-
- /*
- * Function pointer types can be preceded by zero or more words
- * (at least one when not at the top level). Other stuff can be
- * accepted only after we have seen at least one word (two words
- * when not at the top level). See also the above comment on
- * structs and unions.
- */
-
- if (t->tokno == TOK_LIST && LIST_BEGINS_WITH_STAR(t)) {
- if (t = func_ptr_dcl_flush(t)) {
- return (t); /* reject token */
- } else {
- got_word = 1; /* for = and [ and , and ; */
- }
- } else if (got_word == 0) {
- return (t); /* reject token */
- } else {
- switch (t->tokno) {
- case TOK_LIST: /* function type */
- if (t = function_dcl_flush(t))
- return (t); /* reject token */
- break;
- case '[': /* dimension, does not nest */
- pair_flush(t, '[', ']');
- break;
- case '=': /* initializer follows */
- tok_flush(t);
- initializer(); /* rewrite casts */
- break;
- case '{': /* struct, union, may nest */
- block_flush(t); /* use code for stmt blocks */
- break;
- case ',': /* separator: keep trying */
- got_word = 0;
- tok_flush(t);
- break;
- case ';': /* terminator: succeed */
- tok_flush(t);
- return (0);
- default: /* reject token */
- return (t);
- }
- }
- }
- }
- return (0); /* hit EOF */
- }
-