Usenet 1994 January

home *** CD-ROM | disk | FTP | other *** search

/ Usenet 1994 January / usenetsourcesnewsgroupsinfomagicjanuary1994.iso / sources / x / volume21 / kdrill / part01 / readfile.c < prev next >

Wrap

C/C++ Source or Header | 1993-10-07 | 7.0 KB | 342 lines

/* * This file is for just setting up the structs, etc */ #include <stdlib.h> #include <stdio.h> #include <ctype.h> #include <Xfuncs.h> #include <Xlib.h> #include <Xatom.h> #include <Xutil.h> #include <Intrinsic.h> #include <StringDefs.h> #include <Xos.h> #include "defs.h" #include "externs.h" /* translations[] keeps track of which kanji it is okay to test the * user on. Likewise with numberofkanji, highest, and lowest. * YES, it is best to keep in a large array, otherwise * it would be difficult to switch between grade levels. */ struct translationstruct *translations[0x8000]; int numberofkanji,highest,lowest; /* random debugging util? */ void printline(s) unsigned char *s; { while(*s){ putchar(*s++); } putchar('\n'); } /* Since there is no standard util to convert hex ascii to int,... * have to supply our own.. * It isn't incredibly efficient.. let's hope the compiler is smart. * arrg.. */ int xtoi(s) char * s; { int out=0; sscanf(s,"%x",&out); return out; } /* getline: * reads a line (from dictionary). * returns true (1) if read aline, otherwise, * returns false (0); * * used in "readstructs", below. */ unsigned char instring[2][512]; unsigned char *inptr=NULL; int whichstring; int getline(fp,s) FILE *fp; unsigned char *s; { if(inptr==NULL){ inptr= &instring[0][100]; whichstring=0; } for(;;){ int i; if(inptr == &instring[whichstring][100]){ whichstring = 1-whichstring; inptr=instring[whichstring]; i=fread(instring[whichstring],1,100,fp); if(i<100){ instring[whichstring][i]='\0'; /* okay, we put the termination signal in. * But if there is a complete line in there, * it should be read with our buffering */ } } switch(*inptr){ case 0: *s = '\0'; return 0; case 10: case 13: *inptr='\0'; *s++ = *inptr++; return 1; default: *s++ = *inptr++; } } } /* nextword: * Goes to first whitespace, then sets pointer to * beginning of non-white-space. * * Returns 1 on success, 0 on fail */ int nextword(stringp) char ** stringp; { while(!isspace(**stringp)){ if(stringp == '\0') return 0; *stringp +=1; } /* now on space */ while(isspace(**stringp)){ if(stringp == '\0') return 0; *stringp +=1; } return 1; } /* nextchar: * returns pointer to next non-whitespace char */ unsigned char *nextchar(c) unsigned char *c; { while(isspace(*c)){ if(*c == '\0') break; c++; } return c; } /* StripBrackets: * Gets rid of those annoying {enlish}{english2} brackets. * PRESUMES first char of source is '{'!! * Well, actually, it nicely sets a null string if otherwise. */ void StripBrackets(dest,source) char *dest,*source; { char *parse = &source[1]; if(source[0] != '{'){ dest[0] = '\0'; return; } /* (*dest) is always assumed to be needing a write */ do { switch(*parse){ case '{': *dest++ = ':'; *dest++ = ' '; break; case '}': break; default: *dest++ = *parse; } parse++; } while((*parse != '\n') && (*parse != '\0')); *dest = '\0'; return; } /* Okay, it's not actually pronunciation we're reading. * We are reading the "no-yoni" and "kun-yoni" interpretation * in kanjidic */ void ReadPronunciation(Pstring,kanjinum) int kanjinum; char **Pstring; { XChar2b kanabuffer[256]; XChar2b *pronunciation; int pronun_len; unsigned char *parse = (unsigned char *) *Pstring; pronunciation = kanabuffer; /* "bad" job here.. this is just to get things working.. * this method will probably skip a few kana */ if(*parse == '{'){ /* only english exists, so set to null */ translations[kanjinum] == NULL; return; } while((*parse > 127) && (*parse != '\0')){ /* kanjidic seems to have high bit set on * Kana.. which we need to strip both of!! */ pronunciation->byte1= (*parse++ & 0x7f); pronunciation->byte2= (*parse++ & 0x7f); pronunciation++; /* skip space, and put in divider if * second reading is there */ while(*parse == ' ') { parse++; if(*parse == '\0') break; if(*parse >127){ pronunciation->byte1 = 0x21; pronunciation->byte2 = 0x27; pronunciation++; } } } pronunciation->byte1=pronunciation->byte2 = 0; pronun_len = strlen((char *) kanabuffer); translations[kanjinum]->pronunciation = (XChar2b *) malloc(pronun_len+4); if(translations[kanjinum]->pronunciation == NULL){ fprintf(stderr,"Not enough memory to read in dictionary\n"); exit(0); } strncpy(translations[kanjinum]->pronunciation, kanabuffer, pronun_len+1); /* now position parse pointer for next step: * reading english */ while(*parse != '{'){ if(*parse == '\0'){ return; } parse++; } *Pstring = (char *)parse; } /* readstructs: * the main dictionary reading routine. * Fills in the global translationstruct with * all that is available for each selected kanji, in * Grade, "pronunciation", english translation, and * frequency of use (by native speakers) */ void readstructs(){ unsigned char instring[256]; FILE *fp; lowest = highest = 0x3000; fp = fopen(dictname,"r"); if(fp == NULL){ perror("cannot open kanji translation file"); fprintf(stderr,"Looking for %s\n",dictname); exit(0); } while (getline(fp,instring) != 0) { int Kanji; int freq,grade; unsigned char *parse; int instrlen; /* length of pronunciation */ if(strlen(instring) <10) continue; /*try to get kanji Index right away */ Kanji = xtoi(&instring[2]); /* skip comments, and kanji not specified in * the usefile */ if( (Kanji < 0x3000) || (translations[Kanji] != NULL) ){ continue; } parse = &instring[2]; if(parse == NULL){ continue; } /* now parse for grade level, frequency, and english */ freq = grade = 0; nextword(&parse); /* check for high bit set, which means * start of kana definition of kana */ /* used to have (*parse != '{') */ while ( (*parse < 127) && (*parse != '{') ) { switch(*parse){ case 'G': grade = atoi(++parse); break; case 'F': freq = atoi(++parse); break; default: break; } nextword(&parse); } /********************************************** * Now we know that we have a useable/wanted * * dictionary definition * *********************************************/ if(Kanji<lowest) lowest = Kanji; if (Kanji >highest) highest = Kanji; translations[Kanji] = (struct translationstruct *) malloc(sizeof(struct translationstruct)); if (translations[Kanji] == NULL){ perror("Cannot allocate memory for translation table\n"); exit(errno); } translations[Kanji]->frequency = freq; translations[Kanji]->grade_level = grade; ReadPronunciation(&parse,Kanji); /* * translations[Kanji]->pronunciation[0].byte1=0; * translations[Kanji]->pronunciation[0].byte2=0; */ instrlen = strlen(parse)+1; translations[Kanji]->english = (char *) malloc(instrlen); if(translations[Kanji]->english == NULL){ perror("Cannot allocate memory for translation table\n"); exit(errno); } StripBrackets(translations[Kanji]->english,parse); } /* and repeat until end of file */ }