Otherware

home *** CD-ROM | disk | FTP | other *** search

/ Otherware / Otherware_1_SB_Development.iso / mac / developm / source / suntar1.cpt / gc / par.c < prev

Wrap

C/C++ Source or Header | 1992-06-13 | 13.9 KB | 539 lines

/************************\ * * * Parser * * * \************************/ /* One pass optimizing compiler for typeless expressions written by Gabriele Speranza on a Sinclair QL (summer 1990), adapted to C language expressions and modified gen.c (november 1990) quickly ported to the Macintosh (jan-feb 1991) added comments & dynamic choice between the two calling protocols (june 92) Algorithms used: lexical analyzer: table lookup expression parser: left corner parsing optimizer: lazy evaluation & strength reduction */ /* the QL screen is small, and the QL keyboard is rather awkward to use, hence a rather strange graphical appearence of the listing and almost no original comments. Now that I'm writing on a Mac I tried to add more comments, but more than one year has passed since I wrote the program and these comments are necessarily imprecise */ #include <stdio.h> #include <ctype.h> #include <setjmp.h> #include "gc.h" /* #define hidden static*/ #define hidden /* i prossimi dovrebbero stare in ctype.h ma evidentemente qui non ci sono... */ #define iscsym(c) (isalnum(c)||(c)=='_') #define iscsymf(c) (isalpha(c)||(c)=='_') struct t token; FILE *out; localmem currentExpr; static char buffer[160], *textPointer; static jmp_buf mainProgram; #define sTsize 64 #define predefined 6 /* table of predefined symbols:the code generator supports constant values and dynamic variables, but all new symbols are considered absolute (the C static variables), hence to try the capabilities of the program I have 3 predefined constants and 3 predefined dynamic variables */ tabel *lastEntry,symbolTable[sTsize]={ {dyn,"x",0},{dyn,"y",0},{dyn,"z",0}, {const,"u",1},{const,"v",2}, {const,"w",0}}; /* the lexical analyzer is table driven, and here are the tables: */ static char tabBichar[][2]={ '-','>','<','<','>','>','+','+', '-','-','=','=','!','=','&','&', '|','|','<','=','>','='}; static byte valBichar[][2]={ 'P',255,0,4,1,4,0,255,1,255,EQ,1, NE,1,0,2,1,2,LE,1,GE,1}; static word prioBichar[]={ 0,0100135,0100135,060161,060161, 0115,0115,055,045,0125,0125}; static char tabMono[]={ '+','-','&','|','*','/','%','!', '~','<','>','^','?',',','=' }; static byte valMono[]={ 3,3,3,3,4,4,4,0,0,1,1,3,6,0,10}; static word prioMono[]={ 0100145,0140145,0140105,0100065, 0140155,0100155,0100155,0162,0162, 0125,0125,0100075,034,017,020027}; /* coding of the priority: least significant 3 bits (l.s. octal digit) the associativity: see the #define's for (unary) prefix, (unary) postfix, leftass, rightass...; next 9 bits (three more octal digits) are the priority: 0 is reserved, hence up to 511 levels of precedence are possible. Only about 20 are really used for C expressions. The remaining 4 bits will be shifted to the "features" bytes, containing some attributes of the operator (lval1op,lval2op,unaryToo,asgnOpToo) The input parameter of E_parser has this same coding, but the upper four bit have a different meaning (bits 12 to 14 are always cleared, bit 15 means "this sub-expression is not nested within a parenthesis" and is not used currently, but it was used in the summer 90 version which used a Pascal-like syntax rather than a C-like one) Since it's the lexical analyzer which determines priority and associativity of operators, it would be extremely easy to dynamically insert and delete user-defined operators (e.g. using identifiers or sequences of chars which, by some simple rule, are guaranteed not to create ambiguities to the lexical analyzer). Question: why do Ada and C++ allow to redefine existing operators, but not to define new ones ? Answer: Ichbiah and Stroustrup never heard about left-corner parsing (the one used here). Probably they knew that C-Prolog allows to redefine operators, but Prolog means AI and AI algorithms are horribly slow and memory hungry (the left corner expression parser should be slightly faster and should use less memory than any other expression parsing algorithm) */ #define UNARYPRIO 0162 #define ASSIGNPRIO 027 #define ASSIGNOPFEA 022 void lexanal(),E_parser(), gestioneSel(),Statement(); void main() { /*static word w2[4]={206,200,284,8}; static word w1[4]={260,200,22,8};*/ int width,heigth; FILE*fopen_win(); lastEntry= &symbolTable[predefined]; InitConsole(); screen_size(&width,&heigth); if(width>=640){ config_console(44,4,"\pinput",-1,25,40,0,0,0); printf("\f"); out=fopen_win(44,320,"\poutput",0,25,40,0,0); } else{ config_console(44,4,"\pinput",-1,25,40,0,0,0); printf("\f"); out=fopen_win(44,256,"\poutput",0,25,40,0,0); } /*printf("Programma di esempio\n"); printf("Parsing espressioni e\n"); printf("generazione di codice\n\n"); */ printf("\nDemo code generator\nby Gabriele Speranza November 1990\n"); printf("Type expressions in C, for example\"2+2\",\n\"t=(a[x]-b.u)^f(y,3+s); b= y*8+1\"\n"); printf("or \"a=b?(!!c||d):(foo+=j),g(h|r)\"\n\n"); if(setjmp(mainProgram)<0) { /*printf("Riprova !!\n");*/ printf("Try again !\n"); fflush(out); } for(;;){ printf("Expr: "); /*getline(&buffer,sizeof(buffer));*/ fgets(buffer,sizeof(buffer),stdin); if(buffer[1]=='\0'){ fclose(out); return; } fprintf(out,"\f"); textPointer= &buffer[0]; Statement(); fflush(out); printf("\n"); }} /*********************/ void Statement() { /* it does NOT parse C-like statements, only sequences of expressions separated by ';', but above the "expression" level it's typical to find something named "statement" */ static char *classes[]={ "immediate","*immediate","simplevar", "*simplevar","relocatable","based", "generated","flags"}; do{ lexanal(); if(token.asc!='Z'){ E_parser(Normal|externalCall); /* printf("class=%s\nlval=%d CExpr=%d\n", classes[currentExpr.class], currentExpr.lval,currentExpr.isCExpr); */ if(currentExpr.class==flags){ ForceJump(¤tExpr,0); Emit("..."); } else ForceA(¤tExpr); } } while(token.asc==';'); Emit("END"); if(token.asc!='Z')errore("extra chars"); } /**********************/ /* parameter parsing must be done in a different way according to how parameters are passed (on the Mac, the two methods are called C and Pascal), since the "parse tree" must be built in two different ways (this programs builds no parse tree, but conceptually it exists, simply it's destroyed during the same pass which should construct it). This parser has both methods, with a global variable choosing /* hidden */void FunParameters() {localmem first; lexanal(); first.op='(';first.opfea=0; InitSecOp(&first,8); while(token.asc!=')'){ E_parser(commaIsNotOp); BinaryOp(&first,7); if(token.asc==',')lexanal(); else if(token.asc!=')')errore(") expected"); } first.op=')';first.opfea=32; BinaryOp(&first,8); lexanal(); } /**********************/ hidden void restOfPars() {localmem first; first.op=')';first.opfea=0; E_parser(commaIsNotOp); InitSecOp(&first,11); if(token.asc==','){ lexanal(); restOfPars(); } else if(token.asc!=')') errore(") expected"); first.opfea=32; BinaryOp(&first,11); } /**********************/ hidden void FunParsInv() {localmem first; first.op='(';first.opfea=0; lexanal();InitSecOp(&first,12); if(token.asc!=')') restOfPars(); first.opfea=32; BinaryOp(&first,12);lexanal(); } /**********************/ hidden void RecordOffset() {localmem first; first.op='.';first.opfea=lval1op; InitSecOp(&first,9); lexanal(); if(token.asc!='I'|| token.symTable->name[0]=='\0'|| token.symTable->typ!=const) errore("field name expected"); primary(token.symTable); BinaryOp(&first,9); lexanal(); } /*****************/ hidden void ArrayIndex() {localmem first; static localmem aux={ '*',0,0,{0,immediate /* e il resto 0 */}}; first.op='[';first.opfea=32; /* for(;;){ solo per indici multipli uncomment for Pascal, which allows a[x,y] */ InitSecOp(&first,9); lexanal(); E_parser(Normal); /* solo per limite inf != 0 uncomment for Pascal, where the lower limit is not 0 aux.eval= - limite inferiore ; aux.op='+'; BinaryOp(&aux,3); aux.op='*'; */ aux.eval=4; /* size dell'entit puntata */ BinaryOp(&aux,4); /* solo per indici multipli uncomment for Pascal, which allows a[x,y] if(token.asc==',') BinaryOp(&first,9); else break; } */ match(']'); BinaryOp(&first,9); } /**********************/ hidden void gestioneSel() { extern word parsInverted; for(;;){ switch(token.asc){ case 'P' /*'->'*/: UnaryPre('*',0); /* RecordOffset(); break; */ case '.': RecordOffset(); break; case '[': ArrayIndex(); break; case '(': if(!parsInverted) FunParameters(); else FunParsInv(); break; default: return; }} } /*********************/ hidden void E_parser(input) word input; { /* left-corner expression parser */ if(token.priority!=0 && (token.features&unaryToo)){ token.priority=UNARYPRIO; if(token.asc=='&')token.features=lval1op; } if(token.asc=='I'){ primary(token.symTable); if(token.symTable->name[0]!='\0'){ /* well, I must use some arbitrary criteria to tell which functions have a C interface and which ones have a Pascal interface... */ parsInverted=((token.symTable->name[0])|0x20)<'m'; lexanal(); gestioneSel(); } else lexanal(); } else if(token.asc=='('){ lexanal(); E_parser(Normal); parsInverted=0; match(')');gestioneSel(); /* converting it to Pascal syntax, remember that somewhere there must be a this.lval=false */ } else if((token.priority&7)==prefix){ char c=token.asc,f=token.features; lexanal(); E_parser(UNARYPRIO-prefix); /* that's for C, where all unary prefix operators have the same precedence, the highest, and right-associate with other operators at the same priority (*p++ means *(p++) ). Converting to Pascal syntax, where the precedence is not the highest, one should remember that a<-b is OK but a*-b is illegal; and since they left-associate with other operators at the same priority (-a-b means (-a)-b...) you should do this: int i=token.priority; lexanal(); E_parser(i+8-prefix); */ UnaryPre(c,f); } else errore("missing token"); /* MAIN LOOP */ {word p,t,minprio=input&0xFFF; localmem first; /*OK, in theory I should mask out the lowest three bits (associativity) in both token.priority and minprio. However, not masking them and cleverly coding the associativity field allowed me to have some control about what happens when operators of different associativity are placed at the same priority, and what I obtained seems better than what happens by the "theoric" law obtained by masking those bits */ while(token.priority>=minprio){ t=token.priority&7; if(t==prefix) errore("missing operator"); if(t==postfix){ p=token.asc;lexanal(); UnaryPost(p,input); } else { int i=token.index; p=token.priority; first.op=token.asc; first.opfea=token.features; InitSecOp(&first,i); lexanal(); switch(t){ case leftass: E_parser(p+(8-leftass));break; case rightass: E_parser(p);break; case noass: /* OK, I know that C has no no-ass operators, but they are supported anyway. And <,>,<= and so on should have been no-ass, the C meaning of a<b<c is absolutely silly */ E_parser(p+(8-noass)); if(token.priority==p) errore("noass !"); break; case ternary: E_parser(p); if(token.asc!=':') errore("misused ternary"); BinaryOp(&first,5); lexanal(); E_parser(p); break; default: errore("???"); } BinaryOp(&first,i); } } } } /*********************/ hidden void lexanal() { /* a very very simple lexical analyzer, containing a very simple symbol table handling */ char *p=textPointer; static tabel lit={const,{'\0'},0}; token.priority=0; while(*p>0&&isspace(*p)) p++; if(*p=='\0') {token.asc='Z';token.index=255;} else if(*p<0) {token.asc= *p++;token.index=255;} else if(*p>0&&isdigit(*p)){ lit.val=0; do lit.val=lit.val*10+ *p++ -'0'; while(*p>0&&isdigit(*p)); token.asc='I';token.index=255; token.symTable=&lit; } else if(*p>0&&iscsymf(*p)){ /*car iniziale id */ char *p1=&(lastEntry->name[0]); while(*p>0&&iscsym(*p)) /*car non iniziale id*/ *p1++ = *p++; *p1='\0'; token.symTable= &symbolTable[0]; while(strcmp(token.symTable->name,lastEntry->name)) token.symTable++; if(token.symTable==lastEntry) {lastEntry->typ=ass; if(lastEntry==&symbolTable[sTsize-1]) errore("sym table full"); lastEntry++; } token.asc='I';token.index=255; } else{ char ricerca[2]; int i=0; ricerca[0]=*p++;ricerca[1]=*p; do if(*((word*)&ricerca[0])==*((word*)&tabBichar[i])) break; while(++i<sizeof(tabBichar)/2); if(i<sizeof(tabBichar)/2){ token.asc=valBichar[i][0]; token.index=valBichar[i][1]; token.priority=prioBichar[i]; p++; } else{ token.asc=ricerca[0]; i=0; do if(token.asc==tabMono[i]) break; while(++i<sizeof(tabMono)); if(i<sizeof(tabMono)){ token.index=valMono[i]; token.priority=prioMono[i]; if(token.index==1) token.asc=token.asc=='<'?LT:GT; } else token.index=255; } token.features=0; if(token.priority!=0){ /* operatore... */ token.features=(token.priority>>12)&0xF; token.priority&=0xFFF; if(*p=='='&&(token.features&asgnOpToo)){ token.features=ASSIGNOPFEA; token.priority=ASSIGNPRIO; p++; } } } textPointer=p; } /*******************/ match(c) char c; { static char p[]="missing "; if(token.asc==c) lexanal(); else{ p[sizeof(p)-2]=c; errore(p); } } /*******************/ errore(mesg) char *mesg; { printf("\n%s\n",mesg); printf("%s\n",textPointer); longjmp(mainProgram,-1); exit(0); /*inutile...*/ }