Simtel MSDOS 1992 September

home *** CD-ROM | disk | FTP | other *** search

/ Simtel MSDOS 1992 September / Simtel20_Sept92.cdr / msdos / ddjmag / ddj8905.arc / TAWK.ASC < prev next >

Wrap

Text File | 1989-05-12 | 22KB | 712 lines

_TAWK, A Simple Interpreter in C++_ by Bruce Eckel [LISTING ONE] // FIELD.HXX: used by csascii class to build a single field. // Fields are collected by csascii to create a record. // by Bruce Eckel, #include <stream.hxx> class field { // one field in a comma-separated ASCII record istream * input; // where to get the data char * data; int length, fsize; int end_of_file; // flag to indicate the end of file happened void getfield(); // recursive function to read in a field; // treats data, length & input as globals int infield; // flag used by getfield() to determine whether // it's inside a quoted field public: field(istream & instream); ~field(); friend ostream& operator<<(ostream &s, field & f) { s << f.data; return s; } int eof() { return end_of_file; } // to check for end int size() { return fsize;} int last_length() {return length; } char * string() { return data; } }; [LISTING TWO] // FIELD.CXX: definitions for class field // A "recursive descent" scanning scheme is used because field // length is always unknown. // by Bruce Eckel #include "field.hxx" field::field(istream & instream) { input = &instream; length = 0; end_of_file = 0; // set flag to say "we're not at the end" infield = 0; // set flag to say "we're not inside a field" data = (char *)0; // to show no memory has been allocated getfield(); // recursively get characters until end of field } field::~field() { delete data; // if no memory has been allocated, // data = (char *)0 so this will have no effect. } // A Comma-separated ASCII field is contained in quotes to allow // commas within the field; these quotes must be stripped out void field::getfield() { char c; // This happens when DEscending: if((input->get(c)).eof() ) { end_of_file++; // just say we reached the end... return; } else // watch out for the Unix vs. DOS LF/CR problem here: if (((c != ',') || infield) && (c != '\n')) { if ( (c != '"') && (c != '\r')) // watch for quotes or CR length++; // no quotes -- count this character else { if ( c == '"') infield = !infield; // if we weren't inside a field // and a quote was encountered, we are now inside // a field. If we were inside a field and a quote // was found, we're out of the field. c = 0; // a quote or CR; mark it so it isn't included } getfield(); // recursively get characters in field // after returning from function call, we jump past // the following "else" part to finish the recursion } else { // This happens once, when the terminator is found: fsize = length; // remember how long the string is data = new char[length + 1]; // space for null terminator data[length] = '\0'; // highest index is "length" // when you allocate an array of length + 1 length--; // notice we don't insert the delimiter // Now the first "if" statement evaluates to TRUE and // the function rises back up. return; } // This happens when Ascending: if ( c ) // if it wasn't a quote or CR, data[length--] = c; // put chars in as we rise back up... } [LISTING THREE] // CSASCII.HXX: class to manipulate comma-separated ASCII // database files. //by Bruce Eckel #include <stream.hxx> #include "field.hxx" class csascii { // manipulates comma-separated ascii files, // generated by most database management systems (generated and // used by the BASIC programming language). Each field // is separated by a comma; records are separated by newlines. int fieldcount; field ** data; // an array to hold the entire record istream * datafile; // file with comma separated ASCII input int readrecord(); // private function to read a record public: csascii( char * filename ); // Open file, get first record ~csascii(); // destructor int next(); // get next record, return 0 when EOF field & operator[](int index); // select a field int number_of_fields() { return fieldcount; } }; [LISTING FOUR] // CSASCII.CXX: function definitions for comma-separated // ascii database manipulation class // by Bruce Eckel, #include "csascii.hxx" int csascii::readrecord() { for (int fieldnum = 0; fieldnum < fieldcount; fieldnum++ ) { data[fieldnum] = new field(*datafile); if (data[fieldnum]->eof()) return 0; } return 1; } csascii::csascii( char * filename ) { char c; fieldcount = 0; int quote = 0; // first, determine the number of fields in a record: { // See text for dangers of opening files this way: istream infile(new filebuf->open(filename, input)); while(infile.get(c), c != '\n') { // keep track of being inside a quoted string: if (c == '"') quote = !quote; // fields are delimited by unquoted commas: if ( c == ',' && !quote) fieldcount++; } } // infile goes out of scope; file closed fieldcount++; // last field terminated by newline, not comma // an array of field pointers: data = new field * [ fieldcount ]; // re-open at start; dynamically allocate so it isn't scoped: datafile = new istream(new filebuf->open(filename, input)); readrecord(); } csascii::~csascii() { delete data; delete datafile; // calls istream destructor to close file } int csascii::next() { for (int i = 0; i < fieldcount; i++ ) delete data[i]; // free all the data storage return readrecord(); // 0 when end of file } field & csascii::operator[](int index) { if (index >= fieldcount) { cerr << "index too large for number of fields in record\n"; exit(1); } return *(data[index]); } [LISTING FIVE] // LOOKUP.CXX: simple use of csascii to find name in a database // by Bruce Eckel, #include "csascii.hxx" #include <string.h> main(int argc, char ** argv) { if (argc < 2) { cerr << "usage: lookup lastname\n"; exit(1); } // This puts the database file in the root directory: csascii file("\\ppquick.asc"); // create object & open file int found = 0; // indicates one record was found do { if (strcmp(file[0].string(),argv[1]) == 0) { found++; // found one. File is sorted, so if we stop // finding them, quit instead of wasting time. cout << chr(27) << "[2J"; // ANSI clear screen for (int i = 0; i < file.number_of_fields(); i++) cout << file[i] << "\n"; cout << chr(27) << "[7m" << "press any key" << chr(27) << "[0m"; if( getch() == 27) break; } else if (found) exit(0); // quit if that was the last } while (file.next()); } [LISTING SIX] // PARSE.HXX: class to parse a tawk script file. Creates // a structure which can be used at run-time to "execute" // the tawk script. // by Bruce Eckel, #include <stream.hxx> // types of tokens the scanner can find: enum tokentype { fieldnumber, string, if_, else_, endif_, phase_change }; // preamble and conclusion of the tawk script are only executed // once, while main is executed once for every data record enum phase { preamble, tmain, conclusion}; class token { tokentype ttype; union { // an "anonymous union" int fieldnum; // if type is a fieldnumber unsigned char * literal; // if type is a string }; int if_level; // if this is an if_, then_, or else_ // private functions: void get_token(); // recursive descent scanner // Functions to help in scanning: void getnext(char & c); // used by get_token(); unsigned char get_value(char delimiter, char * msg); void dumpline(); // for @! comments void error(char * msg = "", char * msg2 = ""); public: token(istream & input); ~token(); friend ostream & operator<<(ostream &s, token &t); int field_number() { return fieldnum; } int token_type() { return ttype; } int nesting_level() { return if_level;} }; // The following is called a "container class," since its sole // purpose is to hold a list of objects (tokens, in this case): class parse_array { token ** tokenarray; // an array of token pointers istream * parse_stream; int token_count; int end; // the size of the array phase p_section; // of the program (preamble, etc.) void build_array(); // another recursive function public: parse_array(istream & input); ~parse_array(); int size() { return end; } // how big is it? token & operator[](int index); // select a token phase section() { return p_section; } }; [LISTING SEVEN] // PARSE.CXX: class parse function definitions // by Bruce Eckel, #include "csascii.hxx" #include "parse.hxx" #include <ctype.h> #include <stdlib.h> // The following are "file static," which means no one outside // this file can know about them. This is the meaning when a // global variable is declared "static." static istream * tokenstream; static int length; // to remember size of string static int line_number = 1; // line counting for errors static int if_counter = 0; // monitors "if" statement nesting static phase program_section = preamble; // ... until @main static int end_of_file = 0; // zero means not end of file token::token(istream & input) { // initialize values and start the descent tokenstream = &input; length = 0; get_token(); // recursively get characters to end of token } token::~token() { // delete heap if any has been allocated: if (ttype == string) delete literal; } void token::error(char * msg, char * msg2) { cerr << "token error on line " << line_number << ": " << msg << " " << msg2 << "\n"; exit(1); } ostream & operator<<(ostream &s, token &t) { switch (t.ttype) { case string: s << (char *)t.literal; break; case fieldnumber: // only for testing s << " fieldnumber: " << t.fieldnum << "\n"; } return s; } // Get a character from the tokenstream, checking for // end-of-file and newlines void token::getnext(char & c) { if(end_of_file) error("attempt to read after @end statement\n", "missing @conclusion ?"); if((tokenstream->get(c)).eof() ) error("@end statement missing"); if (c == '\n') line_number++; // keep track of the line count } // See text for description of tokens void token::get_token() { char c; // This happens when DEscending: getnext(c); if ( c == '@') { if (length == 0) { // length 0 means start of token getnext(c); switch(c) { case '!': // comment line dumpline(); // dump the comment get_token(); // get a real token break; case 'p' : case 'P' : // preamble statement if ( program_section != preamble ) error("only one preamble allowed"); dumpline(); // just for looks, ignore it get_token(); // get a real token break; case 'm' : case 'M' : // start of main loop dumpline(); // toss rest of line program_section = tmain; ttype = phase_change; return; // very simple token case 'c' : case 'C' : // start conclusion dumpline(); program_section = conclusion; ttype = phase_change; return; // very simple token case 'e' : case 'E': // end statement end_of_file++; // set flag ttype = fieldnumber; // so destructor doesn't // delete free store for this token. if (if_counter) error("unclosed 'if' statement(s)"); return; case '(' : if ( program_section == preamble || program_section == conclusion ) error("@() not allowed in preamble or conclusion"); fieldnum = get_value(')',"@()"); ttype = fieldnumber; // This is a complete token, so quit return; case '<' : c = get_value('>',"@<>"); length++; get_token(); // get more... break; case '?' : // beginning of an "if" statement if ( program_section == preamble || program_section == conclusion ) error("@? not allowed in preamble or conclusion"); fieldnum = get_value('@',"@?@"); ttype = if_; getnext(c); // just eat the colon if(c != ':') error("@? must be followed by @: (then)"); if_level = ++if_counter; // for nesting return; case '~' : // the "else" part of an "if" statement ttype = else_; if_level = if_counter; return; case '.' : // "endif" terminator of an "if" statement ttype = endif_; if_level = if_counter--; if(if_counter < 0) error("incorrect nesting of if-then-else clauses"); return; case '@' : // two '@' in a row mean print an '@' length++; // just leave '@' as the value of c get_token(); break; default: error("'@' must be followed by:", "'(', '<', '?',':','~','.','p','m','c' or '@'"); } } else { // an '@' in the middle of a string; terminate // the string. Putback() is part of the stream class. // It is only safe to put one character back on the input tokenstream->putback(c); // to be used by the next token // allocate space, put the null in and return up the stack literal = new unsigned char[length + 1]; // space for '\0' literal[length--] = '\0'; // string delimiter ttype = string; // what kind of token this is return; // back up the stack } } else { // not an '@', must be plain text length++; get_token(); } // This occurs on the "tail" of the recursion: literal[length--] = c; // put chars in as we rise back up... } // This function is used by get_token when it encounters a @( // or a @< to get a number until it finds "delimiter." // If an error occurs, msg is used to notify the user what // kind of statement it is. unsigned char token::get_value(char delimiter, char * msg) { char c; char buf[5]; int i = 0; while(getnext(c), c != delimiter) { if (!isdigit(c)) error("must use only digits inside", msg); buf[i++] = c; } buf[i] = 0; return atoi(buf); } void token::dumpline() { // called when '@!' encountered char c; while(getnext(c), c != '\n') ; // just eat characters until newline } // Since there's no way to know how big a parse_array is // going to be until the entire tawkfile has been tokenized, // the recursive approach is again used: parse_array::parse_array(istream & input) { parse_stream = &input; token_count = 0; p_section = program_section; // so we know at run-time build_array(); } void parse_array::build_array() { token * tk = new token(*parse_stream); if( ! end_of_file && tk->token_type() != phase_change) { // normal token, not end of file or phase change: token_count++; // recursively get tokens until eof or phase change: build_array(); } else { // end of file or phase change // only done once per object: // allocate memory and return up the stack tokenarray = new token * [end = token_count]; if(token_count) token_count--; // only if non-zero return; } tokenarray[token_count--] = tk; // performed on the "tail" } parse_array::~parse_array() { for (int i = 0; i < end; i++) delete tokenarray[i]; delete tokenarray; } token & parse_array::operator[](int index) { if ( index >= end ) { cerr << "parse_array error: index " << index << " out of bounds\n"; exit(1); } return *tokenarray[index]; } [LISTING EIGHT] // TAWK.CXX: parses a tawk script and reads an ascii file; // generates results according to the tawk script. // by Bruce Eckel, #include "csascii.hxx" #include "parse.hxx" main (int argc, char * argv[]) { int screen = 0; // flag set true if screen output desired if (argc < 3) { cerr << "usage: tawk tawkfile datafile\n" << "trailing -s pages output to screen"; exit(1); } if (argc == 4) { if (argv[3][0] != '-') { cerr << "must use '-' before trailing flag\n"; exit(1); } else if (argv[3][1] != 's') { cerr << "'s' is only trailing flag allowed"; exit(1); } else screen++; // set screen output flag true } istream tawkfile(new filebuf->open(argv[1], input)); parse_array Apreamble(tawkfile); // the @preamble parse_array Amain(tawkfile); // the @main section parse_array Aconclusion(tawkfile); // the @conclusion csascii datafile(argv[2]); // make a comma-separated ASCII // object from the second arg // ------ @preamble ------ for (int i = 0; i < Apreamble.size(); i++) cout << Apreamble[i]; // preamble can only contain strings if(screen) { // ANSI reverse video sequence: cout << chr(27) << "[7m" << "press any key" << chr(27) << "[0m"; getch(); } // ------ The Central Loop (@main) ------- do { // for each record in the data file if(screen) cout << chr(27) << "[2J"; // ANSI clear screen for(int i = 0; i < Amain.size(); i++) { switch(Amain[i].token_type()) { case fieldnumber: cout << datafile[Amain[i].field_number()]; break; case string: cout << Amain[i]; break; case if_: int fn = Amain[i].field_number(); if (datafile[fn].size() == 0) { // conditional false int level = Amain[i].nesting_level(); // find the "else" statement on the same level: while ( !(Amain[i].token_type() == else_ && Amain[i].nesting_level() == level)) i++; } // conditional true -- just continue break; case else_: // an "if" conditional was true so skip // all the statements in the "else" clause int level = Amain[i].nesting_level(); // find the "endif" statement on the same level: while ( !(Amain[i].token_type() == endif_ && Amain[i].nesting_level() == level)) i++; break; case endif_: // after performing the "else" clause break; // ignore it; only used to find the end // of the conditional when "if" is true. default: // should never happen (caught in parsing) cerr << "unknown statement encountered at run-time\n"; exit(1); } } if(screen) { cout << chr(27) << "[7m" << "press a key (ESC quits)" << chr(27) << "[0m"; if( getch() == 27) break; } } while (datafile.next()); // matches do { ... // ------ @conclusion ------ for ( i = 0; i < Aconclusion.size(); i++) cout << Aconclusion[i]; //conclusion contains only strings } [LISTING NINE] # makefile for tawk.exe & lookup.exe # Zortech C++: CPP = ztc # Glockenspiel C++ w/ MSC 4: #CPP = ccxx !4 all: tawk.exe lookup.exe tawk.exe : tawk.obj parse.obj csascii.obj field.obj $(CPP) tawk.obj parse.obj csascii.obj field.obj lookup.exe : lookup.cxx csascii.obj field.obj $(CPP) lookup.cxx csascii.obj field.obj tawk.obj : tawk.cxx parse.hxx csascii.hxx field.hxx $(CPP) -c tawk.cxx parse.obj : parse.cxx parse.hxx $(CPP) -c parse.cxx csascii.obj : csascii.cxx csascii.hxx field.hxx $(CPP) -c csascii.cxx field.obj : field.cxx field.hxx $(CPP) -c field.cxx [LISTING TEN] @! REFORM.TWK @! A tawk script to reformat a comma-separated ASCII file @! with 6 fields. This creates a new CS-ASCII file with @! fields 4 and 5 combined. @main "@(0)","@(1)","@(2)","@(3)","@(4)@?4@: @~@.@(5)" @conclusion @end [LISTING ELEVEN] @! WALLET.TWK @! Tawkfile to create a tiny phone listing for a wallet @! on a Hewlett-Packard Laserjet-compatible printer @! From a comma-separated ASCII file generated by a DBMS @preamble @<27>&l5C@! approximately 10 lines per inch @<27>(s16.66H@! small typeface, built into Laserjet @main @! last, first, (area code) phone1 @(0),@(1)(@(2))@?3@:@(3) @ phone2, if it exists @?4@:@(4) @~@.@~@?4@:@(4) @~ @.@.@conclusion @<27>E @! Reset the Laserjet @end [EXAMPLE 1] class tiny { // private stuff here (this is a comment) int i; public: // public stuff here: print() { // an "in-line" function printf("i = %d\n",i); } tiny(int j); // constructors have the class name ~tiny() {} // destructors use a tilde }; // classes end with a brace and a semicolon tiny::tiny(int j) { // non inline definition i = j; } main() { tiny A(2); // implicit constructor call // A.i = 30; // error! private member A.print(); // calling a member function // implicit destructor call at end of scope } [EXAMPLE 2] #include <stream.hxx> // cout automatically defined main() { cout << "Hello, world!\n" << "I am " << 6 << "today!\n"; } [EXAMPLE 3] filebuf f1; if (f1.open(argv[1],input) == 0) { cout << "cannot open " << argv[1] << "\n"; exit(1); } istream infile(&f1); [EXAMPLE 4] "Ball","Mike","Oregon Software C++ Compiler" "Bright","Walter","Zortech C++ Compiler" "Carolan","John","Glockenspiel C++ Translator" "Stroustrup","Bjarne","AT&T, C++ Creator" "Tiemann","Michael","Free Software Foundation C++ Compiler"