Source Code 1992 March

home *** CD-ROM | disk | FTP | other *** search

/ Source Code 1992 March / Source_Code_CD-ROM_Walnut_Creek_March_1992.iso / usenet / altsrcs / 2 / 2953 < prev next >

Wrap

Internet Message Format | 1991-03-03 | 53KB

From: lee@sq.sq.com (Liam R. E. Quin) Newsgroups: alt.sources Subject: lq-text Full Text Retrieval Database Part 05/13 Message-ID: <1991Mar4.020405.16387@sq.sq.com> Date: 4 Mar 91 02:04:05 GMT : cut here --- cut here -- : To unbundle, sh this file #! /bin/sh : part 05 echo x - lq-text/src/liblqtext/asciitrace.c 1>&2 sed 's/^X//' >lq-text/src/liblqtext/asciitrace.c <<'@@@End of lq-text/src/liblqtext/asciitrace.c' X/* asciitrace.c -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X * This file simply declares AsciiTrace, which is used for verbose (-v) X * output (when set to 1), and for debugging/tracing at higher levels. X */ X Xint AsciiTrace = 0; X X/* $Id: asciitrace.c,v 1.3 90/10/06 00:21:12 lee Rel1-10 $ X * X */ @@@End of lq-text/src/liblqtext/asciitrace.c echo x - lq-text/src/liblqtext/bcopy.c 1>&2 sed 's/^X//' >lq-text/src/liblqtext/bcopy.c <<'@@@End of lq-text/src/liblqtext/bcopy.c' X/* bcopy.c -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X#ifdef BCOPYTEST X# include <stdio.h> X#endif X X/* this is a simple replacement for bcopy() where the native bcopy() X * does not handle overlapping blocks. X * do X * cc -DBCOPYTEST -o bcopy bcopy.c X * and run "./bcopy" for a simple test. You should get three X * identical lines of output. X * X * $Id: bcopy.c,v 1.3 90/10/06 00:21:24 lee Rel1-10 $ X */ X Xbcopy(src, dest, nbytes) X char *dest; X char *src; X int nbytes; X{ X /* We have to be clever about this... X * If src < dest then we copy from the top down X * otherwise, copy from the bottom up... X */ X X register char *p, *q; X X if (src < dest) { X for (p = &src[nbytes - 1], q =&dest[nbytes - 1]; nbytes--; q--, p--) { X *q = *p; X } X } else { X for (p = src, q = dest; nbytes--; p++, q++) { X *q = *p; X } X } X} X X#ifdef BCOPYTEST Xmain() X{ X char buffer[4096]; X char *s = "The naked children hugged each other"; X X puts(s); /* first line */ X (void) sprintf(&buffer[12], "%s", s); X bcopy(&buffer[12], buffer, strlen(s) + 1); X printf("[%s]\n", buffer); /* 2nd line */ X bcopy(buffer, &buffer[12], strlen(s) + 1); X printf("[%s]\n", &buffer[12]); /* 3rd line */ X} X#endif X X/* $Log: bcopy.c,v $ X * Revision 1.3 90/10/06 00:21:24 lee X * Prepared for first beta release. X * X * X */ @@@End of lq-text/src/liblqtext/bcopy.c echo x - lq-text/src/liblqtext/cmdname.c 1>&2 sed 's/^X//' >lq-text/src/liblqtext/cmdname.c <<'@@@End of lq-text/src/liblqtext/cmdname.c' X/* cmdname.c -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X Xchar *cmdname = (char *) 0; X X/* $Id: cmdname.c,v 1.2 90/10/06 00:12:08 lee Rel1-10 $ X * X * $Log: cmdname.c,v $ X * Revision 1.2 90/10/06 00:12:08 lee X * Prepared for first beta release. X * X * Revision 1.1 90/03/24 17:08:19 lee X * Initial revision X * X * X */ @@@End of lq-text/src/liblqtext/cmdname.c echo x - lq-text/src/liblqtext/malloc.c 1>&2 sed 's/^X//' >lq-text/src/liblqtext/malloc.c <<'@@@End of lq-text/src/liblqtext/malloc.c' X/* malloc.c -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* X * malloc.c -- wrapper routines for free/malloc, that do checking and X * print errors. X * X * $Id: malloc.c,v 1.6 91/03/03 00:14:27 lee Rel1-10 $ X * X * $Log: malloc.c,v $ X * Revision 1.6 91/03/03 00:14:27 lee X * Simpler interface if MALLOCTRACE undefined. X * X * Revision 1.5 90/10/06 00:12:11 lee X * Prepared for first beta release. X * X * Revision 1.4 90/09/29 23:51:31 lee X * Added MALLTRACE to detect memory leaks. X * X * Revision 1.3 90/08/29 21:46:55 lee X * Alpha release. X * X * Revision 1.2 90/08/09 19:16:44 lee X * BSD lint and fixes... X * X * Revision 2.2 89/10/08 20:46:10 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * X */ X X#include "globals.h" X X#include <malloc.h> X#include <stdio.h> X#include <ctype.h> X Xextern char *progname; X Xextern void exit(); X Xint _LiamIsInCurses = 0; X /* This should be in error.c */ X XINLINE char * X_emalloc(nbytes X#ifdef MALLOCTRACE X, FileName, Line X#endif X) X unsigned nbytes; X#ifdef MALLOCTRACE X char *FileName; X int Line; X#endif X{ X char *Result; X X if ((Result = malloc(nbytes)) == (char *) 0) { X#ifdef MALLOCTRACE X fprintf(stderr, "%s: %s: %d: emalloc(%u) failed.\n", X progname, FileName, Line, nbytes); X#else X fprintf(stderr, "%s: emalloc(%u) failed.\n", progname, nbytes); X#endif X exit(1); X } X X#ifdef MALLOCTRACE X (void) fprintf(stderr, "malloc %u > 0x%x %s %d\n", nbytes, Result, FileName, Line); X#endif X return Result; X} X XINLINE char * X_ecalloc(Number, Size X#ifdef MALLOCTRACE X , FileName, Line X#endif X) X unsigned Number; X unsigned Size; X#ifdef MALLOCTRACE X char *FileName; X int Line; X#endif X{ X char *Result; X extern char *calloc(); X X if ((Result = calloc(Number, Size)) == (char *) 0) { X#ifdef MALLOCTRACE X fprintf(stderr, "%s: %s: %d: ecalloc(%u x %u) failed.\n", X progname, FileName, Line, Number, Size); X#else X fprintf(stderr, "%s: ecalloc(%u x %u) failed.\n", X progname, Number, Size); X#endif X exit(1); X } X X return Result; X} X XINLINE char * X_erealloc(OldString, NewSize X#ifdef MALLOCTRACE X , FileName, Line X#endif X) X char *OldString; X unsigned NewSize; X#ifdef MALLOCTRACE X char *FileName; X int Line; X#endif X{ X char *Result; X X if ((Result = realloc(OldString, NewSize)) == (char *) 0) { X#ifdef MALLOCTRACE X fprintf(stderr, "%s: %s: %d: erealloc(0x%x, %u) failed.\n", X progname, FileName, Line, OldString, NewSize); X#else X fprintf(stderr, "%s: erealloc(0x%x, %u) failed.\n", X progname, OldString, NewSize); X#endif X exit(1); X } X X#ifdef MALLOCTRACE X (void) fprintf(stderr, "realloc 0x%x %u > 0x%x %s %d\n", OldString, NewSize, Result, FileName, Line); X#endif X return Result; X} X XINLINE void X_efree(String X#ifdef MALLOCTRACE X , FileName, Line X#endif X) X char *String; X#ifdef MALLOCTRACE X char *FileName; X int Line; X#endif X{ X if (!String) { X#ifdef MALLOCTRACE X (void) fprintf(stderr, "%s: %s: %d: Warning: free(0) ignored\n", X progname, FileName, Line); X#else X (void) fprintf(stderr, "%s: Warning: free(0) ignored\n", progname); X#endif X return; X } X#ifdef MALLOCTRACE X (void) fprintf(stderr, "free 0x%x\n", String); X#endif X X (void) free(String); X} @@@End of lq-text/src/liblqtext/malloc.c echo x - lq-text/src/liblqtext/numbers.c 1>&2 sed 's/^X//' >lq-text/src/liblqtext/numbers.c <<'@@@End of lq-text/src/liblqtext/numbers.c' X/* numbers.c -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* Routines to read and write numbers in a compressed format, preserving X * block boundaries. X * The actual compression seems to be about 50%, as most numbers turn X * out to fit in 16 bits. Interestingly, there is room for another one X * or two bits, I think, that could be used for something else, in the X * main pblock index. For example, it could mark whether words were X * plural/-"ing"/"un"-/ with 2 bits. X * X * $Id: numbers.c,v 1.4 90/10/06 00:12:16 lee Rel1-10 $ X * X * $Log: numbers.c,v $ X * Revision 1.4 90/10/06 00:12:16 lee X * Prepared for first beta release. X * X * Revision 1.3 90/08/09 19:16:49 lee X * BSD lint and fixes... X * X * Revision 1.2 90/04/18 19:47:13 lee X * More flexible (and slightly more compact) number format. X * X * Revision 2.2 89/10/08 20:46:36 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 2.1 89/10/02 01:15:15 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * Revision 1.2 89/09/16 21:16:26 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:06:01 lee X * Initial revision X * X * X */ X X#include "globals.h" X X#ifdef SYSV X extern int _filbuf(), _flsbuf(); X#endif X#include <stdio.h> X#include "numbers.h" X X/* ReadNumber and WriteNumber take/return a long, using a compression X * algorithm to reduce the amount of data taken. X * The current algorithm is simply like internet addresses: X * a 0 in the top bit followed by a 0 means it's one byte X * a 0 followed by a 1 means it's 2 bytes X * a 1 followed by a 0 means it's 3 bytes, and X * a 1 followed by a 1 means it's 4 bytes. X * A better alternative might simply use a 1 in the top bit, hence fitting X * 7 bits into each bytes. The advantages of considering more than X * one number at a time and using compress-style LS packing are not clear. X * In particular, speed of recovery is an issue too. X * X * The routines use (char *) pointers instead of files prefixes with an s. X * see numbers.h for some related macros. X * X */ X X X#ifdef TESTNUMBERS Xchar *progname; X Xint Xmain(ac, av) X int ac; X char *av[]; X{ X extern long atol(); X FILE *f; X extern FILE *fopen(); X X progname = av[0]; X X while (--ac) { X unsigned long L = atol(*++av); X unsigned long L2; X X f = fopen("/tmp/boy", "w"); X printf("Write %u\n", L); X fWriteNumber(f, L); X fclose(f); X f = fopen("/tmp/boy", "r"); X L2 = fReadNumber(f); X printf("Read %u\n", L2); X if (L != L2) { X printf("**** ERROR **** %ld != %ld\n", L, L2); X } X fclose(f); X } X return 0; X} X#endif /*TESTNUMBERS*/ X XINLINE void XfWriteNumber(f, Number) X FILE *f; X unsigned long Number; X{ X /* Compressed numbers: X * 7 bit numbers --> single byte; X * 8...14 bits --> 2 bytes X * 15...21 bits --> 3 bytes X * 22..28 bits --> 4 bytes X * 29..32 bits --> 5 bytes X */ X while (Number > 0177) { X putc((Number & 0177) | 0200, f); X Number >>= 7; X } X putc(Number & 0177, f); X} X X#define PutC(ch, S) (*((*S)++) = (char) (ch)) X XINLINE void XsWriteNumber(s, Number) X char **s; X unsigned long Number; X{ X /* Compressed numbers: X * 7 bit numbers --> single byte; X * 8...14 bits --> 2 bytes X * 15...21 bits --> 3 bytes X * 22..28 bits --> 4 bytes X * 29..32 bits --> 5 bytes X */ X while (Number > 0177) { X PutC((Number & 0177) | 0200, s); X Number >>= 7; X } X PutC(Number & 0177, s); X} X XINLINE unsigned long XfReadNumber(f) X FILE *f; X{ X unsigned long Result = 0L; X int ThereIsMore; X int Shift = 0; X X /* Read a number, 7 bits at a time, lsb first, until there is X * a byte without the top bit set -- that's the most significant X * byte, and there is no more of this number. X */ X do { X Result |= ((ThereIsMore = getc(f)) & 0177) << Shift; X ThereIsMore &= 0200; X Shift += 7; X } while (ThereIsMore); X return Result; X} X X#define GetC(S) \ X ( (unsigned int) * (unsigned char *) ((* (unsigned char **)S)++) ) X XINLINE unsigned long XsReadNumber(s) X char **s; X{ X unsigned long Result = 0L; X int ThereIsMore; X int Shift = 0; X X /* Read a number, 7 bits at a time, lsb first, until there is X * a byte without the top bit set -- that's the most significant X * byte, and there is no more of this number. X */ X do { X Result |= ((ThereIsMore = GetC(s)) & 0177) << Shift; X ThereIsMore &= 0200; X Shift += 7; X } while (ThereIsMore); X return Result; X} @@@End of lq-text/src/liblqtext/numbers.c echo x - lq-text/src/liblqtext/pblock.c 1>&2 sed 's/^X//' >lq-text/src/liblqtext/pblock.c <<'@@@End of lq-text/src/liblqtext/pblock.c' X/* pblock.c -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* The physical Word Database for NX-Text... X * X * Interface by Liam Quin, 1989 X * X * The main database is a mesh of linked lists, rather like tagliatelli. X * X * $Id: pblock.c,v 1.10 90/10/13 03:08:36 lee Rel1-10 $ X * X * $Log: pblock.c,v $ X * Revision 1.10 90/10/13 03:08:36 lee X * deleted an illegal dereference! X * X * Revision 1.9 90/10/06 00:12:17 lee X * Prepared for first beta release. X * X * Revision 1.8 90/09/29 23:49:05 lee X * commented out (with #if 0) some code that called WID2WordInfo needlessly. X * X * Revision 1.7 90/08/29 21:47:04 lee X * Alpha release. X * X * Revision 1.6 90/08/09 19:16:52 lee X * BSD lint and fixes... X * X * Revision 1.5 90/08/08 22:26:17 lee X * Many major lint and gcc -Wall fixes... X * X * Revision 1.4 90/03/22 14:26:54 lee X * Fixed the test for Sorting monotonicity (?)..., which had not been X * checking that the FIDS were the same before checking the blocks... X * X * Revision 1.3 90/03/21 19:42:22 lee X * new calls to efree(); X * removed WID from the data blocks. X * doesn't work yet, though, sorry. X * X * Revision 2.2 89/10/08 20:46:53 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 2.1 89/10/02 01:15:24 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * Revision 1.3 89/09/17 23:03:12 lee X * Various fixes; NumberInBlock now a short... X * X * Revision 1.2 89/09/16 21:16:30 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:06:05 lee X * Initial revision X * X * X */ X X#include "globals.h" /* defines and declarations for database filenames */ X X#include <stdio.h> /* stderr, also for fileinfo.h */ X#include <fcntl.h> X#include <malloc.h> X#include <sys/types.h> X#include "fileinfo.h" /* for wordinfo.h */ X#include "wordinfo.h" X#include "pblock.h" X#include "numbers.h" X#include "wordrules.h" X#include "emalloc.h" X X#ifndef STREQ X# define STREQ(boy,girl) ((*(boy) == *(girl)) && (!strcmp((boy),(girl)))) X#endif X X#define new(type) ( ((type) *) emalloc(sizeof(type)) ) X Xextern t_WordInfo *WID2WordInfo(); Xextern t_WID Word2WID(); X X/** Unix system calls that need to be declared: **/ Xextern void exit(); Xextern int open(); Xextern int read(), write(); X X/** C library functions that need to be declared: **/ Xextern char *memcpy(); Xextern void perror(); Xextern void qsort(); Xextern int strcmp(); Xextern char *strcpy(); Xextern int strlen(); X X/** lqtext library functions that need to be declared: **/ Xextern int MkWIB(); Xextern void MkWIBH(); Xextern int PutWordInfoIntoIndex(); Xextern void SortWordPlaces(); Xextern t_WordInfo *MakeWordInfo(); Xextern void SlayWordInfo(); X X/** Functions within this file that need to be declared: **/ Xvoid DeleteWordPlaces(); Xstatic void FlushBlock(); X Xt_WordPlace *GetWordPlaces(); Xt_pblock *Getpblock(); Xunsigned short PutWordPlaces(); Xint _PutByte(), PutLong(); Xunsigned char _GetByte(); Xunsigned long GetLong(); Xunsigned long FindFreeBlock(); Xvoid FillWordPlaces(); X/** **/ X Xextern char *progname; X X/* If you find this macro confusing, see "The C Programming Language", X * Kernighan & Ritchie, 1st. Edition, for a good introduction to X * programming in C. X * :-( :-) X */ X#define PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock) \ X ( (*(sp) - (unsigned char *) *(Blockp) >= *(BlockLength)) ? \ X _PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock) : \ X ((*((*(sp))++) = (Byte)), 0)) X X#define GetByte(WID, sp, Blockp, BlockLength, NextBlock) \ X ( (*(sp) - (unsigned char *) *(Blockp) >= *(BlockLength)) ? \ X _GetByte(WID, sp, Blockp, BlockLength, NextBlock) : *((*(sp))++) ) X Xextern long lseek(); X Xstatic int DataFile = -1; X X#ifdef ASCIITRACE Xextern int AsciiTrace; X#endif X Xchar *ReadBlock(); Xvoid WriteBlock(); Xunsigned long Putpblock(); X X/* Layout of the physical index database (OUT OF DATE) X * ===================================== X * X * This file is the only interface to the database of FID/Offset pairs. X * X * The db is organised in blocks arranged in Tagliatelli format: a linked X * list of blocks for each WID; there is a list for each WID in the Word X * Index. The Word Index contains the block number of the start of the X * chain. X * Block 0 is the start of the free list (but is never itself free!) X * X * block 0... Free list header; otherwise currently unused. X * block 1... first data block: X * +--------------------------- X * | bytes 0...3: Offset of next block in this chain X * | 4...7: Number of valid pairs in this block (0-->unused block) X * | 8..11: WID to which this block refers (for checking) X * | 12..15: Total number of WIDS in the chain X * | (only the first block in each chain has this) X * | The (FID, Offset) pairs follow, in compressed (Internet-style) format. X * | X * block 2... next data block (either the start of a new chain, or a X * continuation of some other chain. Or maybe unused, especially if files X * have been deleted). X * X * The block header is described by t_BlockHeader. X * X */ X X#include "blkheader.h" X X/* Look up a word in the database... X * and return a list of all the WordPlaces where it's found X * BUG: should be called WordInfo2pblock(). X */ Xt_pblock * XGetpblock(WordInfo) X t_WordInfo *WordInfo; X{ X t_pblock *pblock = 0; X unsigned long HowManyToGet = 0L; X t_WordPlace *WordPlaces; X unsigned long CurrentPair = 0L; X X#if 0 X /* This code allows one to call GetPblock with a very minimal X * wordinfo entry: X */ X if (WordInfo->Offset == 0L) { X /* No database entry found yet... */ X if (!WordInfo->WID && WordInfo->Length) { X WordInfo->WID = Word2WID(WordInfo->Word, WordInfo->Length); X } X if (WordInfo->WID) { X WordInfo = WID2WordInfo(WordInfo->WID); X } X } X#endif X X if (!WordInfo->NumberOfWordPlaces) { X return (t_pblock *) 0; /* nothing to write */ X } X X if (!WordInfo->Offset && !WordInfo->NumberOfWordPlaces) { X /* no pblock offset, so no pblock! */ X return (t_pblock *) 0; X } X X HowManyToGet = WordInfo->NumberOfWordPlaces; X X pblock = (t_pblock *) emalloc( sizeof(t_pblock) + X (unsigned) (HowManyToGet + 1) * sizeof(t_WordPlace)); X X WordPlaces = pblock->WordPlaces; X pblock->WID = WordInfo->WID; X pblock->ChainStart = WordInfo->Offset; X pblock->NumberOfWordPlaces = WordInfo->NumberOfWordPlaces; X X /* First, the pairs in the WordInfo might suffice: */ X if (WordInfo->WordPlacesInHere >= HowManyToGet) { X for (; CurrentPair < WordInfo->WordPlacesInHere; CurrentPair++) { X WordPlaces[CurrentPair] = WordInfo->WordPlaces[CurrentPair]; X } X } X X /* If they all fitted in the WordInfo block, well, that was a big win! */ X if (CurrentPair >= HowManyToGet) { X pblock->ChainStart = 0L; X return pblock; X } X X /* So we need to read the entire list of WordPlaces from the database. X * Although we may have already done the first few, I'm going to do them X * all again because that ensures that the last few bytes in the X * WordInfo data block can get used! X */ X X WordPlaces = GetWordPlaces( X WordInfo->WID, X WordInfo->WordPlaceStart, X (unsigned) WIDBLOCKSIZE - X (WordInfo->WordPlaceStart - WordInfo->DataBlock), X WordInfo->Offset, X HowManyToGet); X X X X if (WordPlaces == (t_WordPlace *) 0) { X fprintf(stderr, "%s: SNAFU: no wordplaces for WID %ld, wanted %ld\n", X progname, WordInfo->WID, HowManyToGet); X exit(1); X } X X /* copy the result... */ X (void) memcpy((char *) pblock->WordPlaces, (char *) WordPlaces, X (int) (sizeof(t_WordPlace) * HowManyToGet)); X (void) efree((char *) WordPlaces); X return pblock; X} X X/* This is how many WordPlaces one could write into a block in WIDFILE: X * The Header takes up 1 for the word length, MinWordLength or more for X * the actual word, and another 1 for the number of places. X * (32 - 5) / 3 is 27/3 is 9, but this is rather rare in practice! X * See the comment just before PutPairs(). X * If this is not kept up to date, space may be wasted in the database, X * or you will slow down lqaddfile. X */ X#define MaxWordPlacesInAWordBlock ((WIDBLOCKSIZE - (MinWordLength + 2)/3)) X X/* This should in fact take a (t_WordPlaceList *) or something. X * It returns the number of words written, for checking in X * DumpCache (wordtable.c) X */ Xint XWriteWordChain(WordPlaceList) X t_WordPlaceList *WordPlaceList; X{ X extern t_WID GetNextWID(); X extern t_WordInfo *FindWordInfoFromIndex(); X X t_WID WID; X int Length; X t_pblock *pblock = (t_pblock *) 0; X int NumberOfWordPlacesToAdd = 0; X t_WordPlaceList *WP; X t_WordInfo *IndexEntry = 0; X char *FirstWord; X register int i; X int TotalWordsWritten = 0; X X if (!WordPlaceList) return 0; /* nothing to do */ X X /* Sanity check: */ X if (!(WordPlaceList->Word) || !*(WordPlaceList->Word)) { X fprintf(stderr, "Warning: WriteWordChain() asked to write null word\n"); X return 0; X } X X Length = strlen(WordPlaceList->Word); X X /* This is undoubtedly a bad way to do things: X * if IndexEntry is big, we don't want to fetch it when we could X * simply stuff things on the end! X */ X if ((WID = Word2WID(WordPlaceList->Word, Length)) != (t_WID) 0) { X if ((IndexEntry = WID2WordInfo(WID)) != (t_WordInfo *) 0) { X pblock = Getpblock(IndexEntry); X } X } X X if (!WID) { X WID = GetNextWID(); X } X X#ifdef ASCIITRACE X if (AsciiTrace >= 3) { X fprintf(stderr, "Entry %lu for \"%s\"", WID, WordPlaceList->Word); X if (IndexEntry) { X fprintf(stderr, ", had %d pairs\n", IndexEntry->NumberOfWordPlaces); X } else { X fprintf(stderr, " (new entry)\n"); X } X } X#endif X X /* Count the number of entries we are going to add. X * There may be several different words in the chain, but they are X * sorted in word order. X */ X FirstWord = WordPlaceList->Word; X X for (WP = WordPlaceList; WP; WP = WP->Next) { X if (!STREQ(WP->Word, FirstWord)) break; X ++NumberOfWordPlacesToAdd; X if (WP != WordPlaceList) { X WP->Word = (char *) 0; /* so we don't free it; see AddWord() */ X } X } X X if (pblock == (t_pblock *) 0) { X int oldnp = (IndexEntry) ? IndexEntry->NumberOfWordPlaces : 0; X X pblock = (t_pblock *) emalloc(sizeof(t_pblock) + X (NumberOfWordPlacesToAdd + oldnp) * sizeof(t_WordPlace)); X pblock->NumberOfWordPlaces = 0; X if (IndexEntry && IndexEntry->NumberOfWordPlaces) { X register t_WordPlace *W, *Q; X register unsigned long boy = 0L; X X if (IndexEntry->WordPlacesInHere < IndexEntry->NumberOfWordPlaces) { X FillWordPlaces(IndexEntry); X } X X for (W = IndexEntry->WordPlaces, Q = pblock->WordPlaces; X boy < IndexEntry->NumberOfWordPlaces; boy++) { X *Q++ = *W++; X } X pblock->NumberOfWordPlaces = IndexEntry->NumberOfWordPlaces; X } X } else { X /* Remove the old information from disk. X * This isn't as bad as it sounds, as it will be at the start X * of the freelist, so when we write it out again it will be X * in the buffer cache... X * Although, it would be faster simply to append. X */ X if (IndexEntry->Offset) { X DeleteWordPlaces(IndexEntry->Offset, IndexEntry->WID); X } X /*NOSTRICT*/ X pblock = (t_pblock *) erealloc((char *) pblock, sizeof(t_pblock) + X (unsigned) (NumberOfWordPlacesToAdd + X pblock->NumberOfWordPlaces) * sizeof(t_WordPlace)); X } X X pblock->WID = WID; X pblock->ChainStart = 0L; /* certainly it is now invalid! */ X i = pblock->NumberOfWordPlaces; X pblock->NumberOfWordPlaces += NumberOfWordPlacesToAdd; X X for (WP = WordPlaceList; WP && NumberOfWordPlacesToAdd--; WP = WP->Next) { X /* As we ignore the rest of these WordInfo entries, something X * has to change here for more efficiency! X * Probably we should get passed a linked list of WordPlaces X * instead of WordInfo structures. X */ X pblock->WordPlaces[i] = WP->WordPlace; X i++; X } X X if (WP && STREQ(WP->Word, FirstWord)) { X fprintf(stderr, "Internal error counting pairs to add\n"); X exit(1); X } X X if (NumberOfWordPlacesToAdd > 0) { X fprintf(stderr, "I've got some pairs left over, \"%s\" line %d\n", X __FILE__, __LINE__ - 1); X exit(1); X } X X /* We are going to need a WordInfo here... */ X if (IndexEntry == (t_WordInfo *) 0) { X IndexEntry = MakeWordInfo(WID, Length, WordPlaceList->Word); X } X IndexEntry->NumberOfWordPlaces = pblock->NumberOfWordPlaces; X X /* Now, see if they all fit into the WordInfo itself! */ X X pblock->ChainStart = IndexEntry->Offset = 0L; X X if (pblock->NumberOfWordPlaces <= MaxWordPlacesInAWordBlock) { X (void) MkWIB(IndexEntry, pblock); X } X X if (IndexEntry->WordPlacesInHere == pblock->NumberOfWordPlaces) { X /* no pblock needed! */ X if (PutWordInfoIntoIndex(IndexEntry, (unsigned long) 0L) < 0) { X extern int errno; X int e = errno; X fprintf(stderr, "%s: Couldn't put \"%s\" into the ", X progname, IndexEntry->Word); X errno = e; X perror("index"); X exit(1); X } X } else { X (void) Putpblock(IndexEntry, pblock); /* (this alters *WordInfo) */ X X if (PutWordInfoIntoIndex(IndexEntry, pblock->ChainStart) < 0) { X extern int errno; X int e = errno; X fprintf(stderr, "%s: Couldn't add word \"%s\" to the ", X progname, IndexEntry->Word); X errno = e; X perror("word index"); X exit(1); X } X } X X if (pblock) { X efree((char *) pblock); X pblock = (t_pblock *) 0; X } X X if (IndexEntry) { X SlayWordInfo(IndexEntry); X } X X TotalWordsWritten += NumberOfWordPlacesToAdd; X X /* Now see if there are any more words in the chain: */ X if (WP) { X TotalWordsWritten += WriteWordChain(WP); X } X X return TotalWordsWritten; X} X Xstatic unsigned char pblockBuffer[BLOCKSIZE + 5]; X X/* Write out an entire (presumably new) data entry, and X * return a disk pointer to the start of the chain X */ Xunsigned long XPutpblock(WordInfo, pblock) X t_WordInfo *WordInfo; X t_pblock *pblock; X{ X /* Assume that we can discard the PairBlock in WordInfo -- X * it was a pointer to a static buffer anyway. X */ X X if (WordInfo->DataBlock) { X WordInfo->DataBlock = (char *) 0; X } X X WordInfo->Offset = pblock->ChainStart = FindFreeBlock(WordInfo->WID); X (void) MkWIBH(WordInfo, pblock); X PutWordPlaces( X pblock->WordPlaces, X WordInfo->WID, X (unsigned char *) WordInfo->WordPlaceStart, X (unsigned) X WIDBLOCKSIZE - (WordInfo->WordPlaceStart - WordInfo->DataBlock), X WordInfo->Offset, X pblock->NumberOfWordPlaces); X X return WordInfo->Offset; X} X X/** WordPlaces are now stored as sequences, as follows: X ** FID*2 -- 1, 2, 3 (usually) or 4 bytes 1-4 X ** (very, very occasionaly a variable-size number may be 5 bytes.) X ** . the bottom bit in the stored number determines whether there X ** is more than one FID to follow X ** number of following places (only if prev. bit was 1) -- 1 byte 0-1 X ** For each following entry:- X ** . for each of the following places: X ** Block In File (long, 1, 2, 3 or 4 bytes, usually 1) 1-4 X ** Word In Block -- always 1 byte 1-1 X ** the bottom bit of this says if there are flags X ** Flags -- always 1 byte, if present 0-1 X ** Stuff Before -- 1 byte 0-1 X ** (if there are no flags, there's no Stuff Before byte, and X ** we use the default value of 1) X ** X ** Hence: each sub-place takes from 2 to 7 bytes [was: 0 to 4] X ** each Place sequence takes from 3 [min was 2 with old format] X ** to (4 + 1) + 255 * (2..7) bytes. X ** In most (I guess > 7/10) cases, flags will be 0, and X ** StuffBefore be the default of 1. X ** X ** I am hoping, of course, that the extra information stored is X ** worth while! X ** It might be possible to coalesce WordInBlock and BlockInFile using X ** delta modulation -- i.e., storing the increment from the previous. In X ** this case, a flag bit could mean that those two values each occupy a X ** nibble in a single byte. Or, I could use a single byte, like this: X ** [a b c d e f g h] X ** a == 1 --> (efgh) is word in block inc., (bcd is block in file inc) X ** but I need to do some real measurements to figure out how best to save X ** space. It really is worth while keeping the format as simple as I can, X ** as this speeds retrieval. X ** X **/ X Xunsigned short XPutWordPlaces(WordPlaces, WID, Block, BlockLength, NextOffset, NumberToWrite) X t_WordPlace *WordPlaces; X t_WID WID; X unsigned char *Block; X unsigned BlockLength; X unsigned long NextOffset; X unsigned long NumberToWrite; X{ X unsigned char *q = Block; X unsigned long L; X int CurrentPlace = 0; X unsigned long LastStart = 0L; X t_FID LastFID = 0; X unsigned long LastBlock = 0L; X X /** IMPORTANT NOTICE: keep the definition of MaxWordPlacesInAWordBlock X ** up to date (it's #defined above). X **/ X X /* sort the pblock to simplify subsequent accesses */ X if (NumberToWrite > 1) { X SortWordPlaces(NumberToWrite, WordPlaces); X } X X while (CurrentPlace < 0 || CurrentPlace < NumberToWrite) { X unsigned short NumberOfRepeats; X unsigned char U; X t_FID FID = WordPlaces[CurrentPlace].FID; X int LastPlace; X X /* Determine the number of Places in the same file; X * note that we can write at most 255 in the same place, so X * longer stretches are broken up into clumps of 255. X * This is a reasonable tradeoff, I think. The alternative would X * be to write NumberOfRepeats as a long, and lose if there were X * (say) between 64 (old, 127 new) and 255 of them. This case only X * occurs once in the New Testament anyway, and presumably is X * generally quite rare. X */ X NumberOfRepeats = 0; X LastPlace = CurrentPlace; X while (NumberOfRepeats < 255) { X if (LastPlace >= NumberToWrite) { X break; X } else if (WordPlaces[LastPlace].FID != FID) { X break; X } X ++NumberOfRepeats; X ++LastPlace; X } X X L = (FID - LastFID) << 1; X LastFID = FID; X if (NumberOfRepeats > 1) L |= 01L; X if (PutLong(L, WID, &q, &Block, &BlockLength, X &NextOffset, &LastStart) < 0) { X return CurrentPlace; X } X if (L & 01L) { X if (PutByte(NumberOfRepeats, WID, &q, &Block, &BlockLength, X &LastStart, &NextOffset) < 0) { X return CurrentPlace; X } X } X X LastBlock = 0; X X for (; NumberOfRepeats != 0; --NumberOfRepeats) { X if (CurrentPlace > NumberToWrite) { X fprintf(stderr, X "Entry for file %lu (WID %ld) has more matches than expected\n", X FID, WID); X exit(1); X /* This would represent a rather serious bug, I think! */ X } X /* block number */ X if (WordPlaces[CurrentPlace].BlockInFile < LastBlock) { X fprintf(stderr, "Sort WID %ld failed, non-monatonic blocks\n", X WID); X exit(1); /* can't cope with this one */ X } else if (CurrentPlace && X (WordPlaces[CurrentPlace].FID == X WordPlaces[CurrentPlace - 1].FID) && X (WordPlaces[CurrentPlace].BlockInFile == LastBlock) && X (WordPlaces[CurrentPlace].WordInBlock <= X WordPlaces[CurrentPlace - 1].WordInBlock)) { X fprintf(stderr, X "Sort WID %ld failed, FID %ld: Blk %d: WIB %d <= %d\n", X WID, FID, LastBlock, WordPlaces[CurrentPlace].WordInBlock, X WordPlaces[CurrentPlace - 1].WordInBlock X ); X } X L = WordPlaces[CurrentPlace].BlockInFile - LastBlock; X LastBlock += L; X X if (PutLong(L, WID, &q, &Block, &BlockLength, &NextOffset, X &LastStart) < 0) { X return CurrentPlace; X } X U = (WordPlaces[CurrentPlace].WordInBlock << 1); X if (WordPlaces[CurrentPlace].StuffBefore != 1) { X WordPlaces[CurrentPlace].Flags |= WPF_HASSTUFFBEFORE; X } X if (WordPlaces[CurrentPlace].Flags != WPF_HASSTUFFBEFORE) { X U |= 01; X } X if (U > 255) { X fprintf(stderr, X "WID %lu: WordInBlock (0%o) from FID %lu too big\n", X WID, U, FID); X exit(1); X } X X if (PutByte(U, WID, &q, &Block, &BlockLength, &LastStart, X &NextOffset) < 0) { X return CurrentPlace; X } X if (U & 01) { X if (PutByte(WordPlaces[CurrentPlace].Flags, WID, &q, &Block, X &BlockLength, &LastStart, &NextOffset) < 0) { X return CurrentPlace; X } X } X X /* Even if there are flags, there still might not be a separate X * entry for the number of preceding skipped bytes. X */ X if ((U & 01) && X (WordPlaces[CurrentPlace].Flags & WPF_HASSTUFFBEFORE)) { X if (PutByte(WordPlaces[CurrentPlace].StuffBefore, WID, &q, X &Block, &BlockLength, &LastStart, &NextOffset) < 0) { X return CurrentPlace; X } X } X ++CurrentPlace; X } X if (CurrentPlace > LastPlace) { X fprintf(stderr, "oops- I went wrong and wrote %ld > %ld\n", X CurrentPlace, LastPlace); X } X } X if (LastStart) { X /* NextStart had better not be non-zero, but FlushBlock will X * take care of it (we have wasted a block in that case!). X * LastStart is zero if we fitted it all inside the WordInfo X * block, although this is currently unlikely as we don't get X * called in that case! X * Oh, maybe we do. X */ X FlushBlock((unsigned char *) Block, &NextOffset, &LastStart, WID); X } X return NumberToWrite; X} X Xt_WordPlace * XGetWordPlaces(WID, Block, BlockLength, NextOffset, NumberExpected) X t_WID WID; X char *Block; X unsigned BlockLength; X unsigned long NextOffset; X unsigned long NumberExpected; X{ X unsigned char *q = (unsigned char *) Block; X unsigned long L; X t_WordPlace *Places = (t_WordPlace *) 0; X long CurrentPlace = 0; X t_FID LastFID = (t_FID) 0; X unsigned LastBlock = 0L; X X#ifdef ASCIITRACE X if (AsciiTrace > 10) { X fprintf(stderr, X "GetWordPlaces WID %ld Blk %ld len %d next %ld No. %ld\n", X WID, Block, BlockLength, NextOffset, NumberExpected); X X } X#endif X X if (Block == (char *) 0) { X fprintf(stderr, "GetWordPlaces Error %lu\n", WID); X } X X /*NOSTRICT*/ X Places = (t_WordPlace *) emalloc(sizeof(t_WordPlace) * NumberExpected); X X while (CurrentPlace < NumberExpected) { X unsigned short NumberOfRepeats; X unsigned char Uchar; X t_FID FID; X X /** First get the FID. The bottom bit of the number stored X ** actually determines whether there are multiple Places X ** stored here for the same FID. X **/ X L = GetLong(WID, &q, &Block, &BlockLength, &NextOffset); X FID = (L >> 1) + LastFID; /* Get rid of flag bit */ X LastFID = FID; X NumberOfRepeats = (L & 01L) ? X GetByte(WID, &q, &Block, &BlockLength, &NextOffset) : 1; X X /* Quick Sanity check */ X switch (NumberOfRepeats) { X case 0L: X fprintf(stderr, "Warning: no entries! for FID %lu\n", FID); X case 1L: X if (L & 01L) { X fprintf(stderr, "Warning: FID %lu repeated 1 times!\n", FID); X } X } X X LastBlock = 0L; X if (CurrentPlace + NumberOfRepeats > NumberExpected) { X fprintf(stderr, X "Entry for file %lu WID %ld has %lu matches, not %lu\n", X FID, WID, CurrentPlace + NumberOfRepeats + 1, NumberExpected); X NumberOfRepeats = NumberExpected - CurrentPlace; X } X for (; NumberOfRepeats != 0; --NumberOfRepeats) { X Places[CurrentPlace].FID = FID; X /* block number */ X L = GetLong(WID, &q, &Block, &BlockLength, &NextOffset); X LastBlock += L; X Places[CurrentPlace].BlockInFile = LastBlock; X Uchar = GetByte(WID, &q, &Block, &BlockLength, &NextOffset); X Places[CurrentPlace].WordInBlock = (Uchar >> 1); X X /* Sanity check: */ X if (CurrentPlace > 0 && Places[CurrentPlace].FID == X Places[CurrentPlace - 1].FID) { X if (Places[CurrentPlace - 1].BlockInFile == X Places[CurrentPlace].BlockInFile) { X if ( Places[CurrentPlace - 1].WordInBlock >= X Places[CurrentPlace].WordInBlock) { X fprintf(stderr, X "%s: warning: %dth match for word %ld (FID %ld) WIB goes backwards!\n", X progname, CurrentPlace, WID, FID); X } X } else if (Places[CurrentPlace - 1].BlockInFile > X Places[CurrentPlace].BlockInFile) { X fprintf(stderr, X "%s: warning: %dth match for word %ld (FID %ld) BIF goes backwards!\n", X progname, CurrentPlace, WID, FID); X } X } X /* end of sanity test */ X X if (Uchar & 01) { /* use if, not ?:, for profiler */ X Places[CurrentPlace].Flags = X GetByte(WID, &q, &Block, &BlockLength, &NextOffset); X } else { X Places[CurrentPlace].Flags = 0; X } X X /* If there are flags, there still might not be a separate X * entry for the number of preceding skipped bytes. X */ X if (Places[CurrentPlace].Flags & WPF_HASSTUFFBEFORE) { X Places[CurrentPlace].StuffBefore = X GetByte(WID, &q, &Block, &BlockLength, &NextOffset); X } else { X Places[CurrentPlace].StuffBefore = 1; X } X ++CurrentPlace; X } X } X return Places; X} X Xvoid XFillWordPlaces(WordInfo) X t_WordInfo *WordInfo; X{ X WordInfo->WordPlaces = GetWordPlaces( X WordInfo->WID, X WordInfo->WordPlaceStart, X WIDBLOCKSIZE - (WordInfo->WordPlaceStart - WordInfo->DataBlock), X WordInfo->Offset, X WordInfo->NumberOfWordPlaces X ); X} X Xstatic void XFlushBlock(Block, NextOffset, LastStart, WID) X char *Block; X unsigned long *NextOffset, *LastStart; X t_WID WID; X{ X if (*LastStart && Block) { X /*NOSTRICT*/ X t_BlockHeader *BH = (t_BlockHeader *) Block; X X BH->NextOffset = ((*NextOffset) / BLOCKSIZE) << 1; X WriteBlock(*LastStart, Block); X } X *LastStart = *NextOffset = 0L; X} X X/* This is simply to help keep the source lines getting too long! */ Xtypedef unsigned char *UCP; X Xint X_PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock) X unsigned char Byte; X t_WID WID; X unsigned char **sp; X unsigned char **Blockp; X unsigned *BlockLength; X unsigned long *NextBlock; X unsigned long *LastStart; /* for writing the linked list */ X{ X t_BlockHeader *BH; X X if (*sp - (*Blockp) >= (*BlockLength)) { X if (!*NextBlock && !*LastStart) return -1; /* only do the 1st block */ X if (*NextBlock == (unsigned long) 0) { X *NextBlock = FindFreeBlock(WID); X } X /* Complete the information in the previous block, if required */ X if (*LastStart) { X BH = (t_BlockHeader *) (*Blockp); X BH->NextOffset = ((*NextBlock) / BLOCKSIZE) << 1; X /* Write the old block */ X WriteBlock(*LastStart, *Blockp); X *LastStart = 0L; X } X *LastStart = (*NextBlock); X *BlockLength = BLOCKSIZE; X (*NextBlock) = 0L; X *Blockp = pblockBuffer; /* Use static (to this file) data buffer */ X /*NOSTRICT*/ X BH = (t_BlockHeader *) (*Blockp); X (*sp) = (UCP) BH->Data; X } X **sp = Byte; X (*sp)++; X return 0; X} X Xunsigned char X_GetByte(WID, sp, Blockp, BlockLength, NextBlock) X t_WID WID; X unsigned char **sp; X unsigned char **Blockp; X unsigned long *BlockLength; X unsigned long *NextBlock; X{ X t_BlockHeader *BH; X X if (*sp - (*Blockp) >= (*BlockLength)) { X if (*NextBlock == (unsigned long) 0) { X (*Blockp) = (*sp) = (UCP) 0; X fprintf(stderr, "Database Corrupt, Next is zero\n"); X return 0; X } else { X (*sp) = (*Blockp) = (UCP) ReadBlock(*NextBlock); X } X /* Check the new block */ X if ((*Blockp) == (UCP) 0) { X fprintf(stderr, "Database corrupt, %lu, sigh.\n", *NextBlock); X exit(1); X } X /*NOSTRICT*/ X BH = (t_BlockHeader *) (*Blockp); X *NextBlock = (BH->NextOffset >> 1) * BLOCKSIZE; X *BlockLength = BLOCKSIZE; X (*sp) = (UCP) BH->Data; X } X return *((*sp)++); X} X X/* PutLong -- write a long number in compressed/abbreviated form into a X * string. If this moves the string pointer beyond the block, write out X * the block and start a new one. In that case, the number written may well X * span the gap between the blocks. We use an overflow buffer to copy X * the bytes (if any) that overflowed into it. X * Then we write them at the start of the next block. X * X * This routine returns -1 and writes a partial number (no allocated block) X * if *LastBlock and *NextBlock are zero. This allows PutwOrdPlaces to be X * called to put the WordPlaces into the WIDFILE block without writing out X * an entire chain. X */ Xint XPutLong(Long, WID, sp, Blockp, BlockLength, NextBlock, LastStart) X unsigned long Long; X t_WID WID; X unsigned char **sp; X unsigned char **Blockp; X unsigned *BlockLength; X unsigned long *NextBlock; X unsigned long *LastStart; /* for writing the linked list */ X{ X t_BlockHeader *BH; X unsigned char Buffer[sizeof(unsigned long) + 1]; X unsigned char *Bufp = Buffer; X unsigned char *p; X X sWriteNumber((char **) sp, Long); X X if ((*sp) - (*Blockp) > (*BlockLength)) { /* gone too far! */ X if (!*NextBlock && !*LastStart) return -1; X /* Save the overflow in Buffer: X * the 1st 1 or more characters will fitted into the old block, X * but we need them all in a lump for readnumber(). X * When we write the next block, we need to put the overflow X * characters into the start of the next block. X */ X for (p = &(*Blockp)[*BlockLength]; p < (*sp); p++) { X *Bufp++ = *p; X } X if (*NextBlock == (unsigned long) 0) { X *NextBlock = FindFreeBlock(WID); X } X /* Complete the information in the previous block, if required */ X if (*LastStart) { X BH = (t_BlockHeader *) (*Blockp); X BH->NextOffset = ((*NextBlock) / BLOCKSIZE) << 1; X X /* Write the old block */ X WriteBlock(*LastStart, *Blockp); X } X *LastStart = (*NextBlock); X (*NextBlock) = 0L; X *Blockp = pblockBuffer; X BH = (t_BlockHeader *) (*Blockp); X *BlockLength = BLOCKSIZE; X (*sp) = (UCP) BH->Data; X /* Now write the stuff from Buffer into the new block */ X if (Bufp > Buffer) { X for (p = Buffer; p < Bufp; p++) { X *((*sp)++) = (*p); X } X } X } X return 0; X} X X/* This is the reverse of PutLong. X * Things are slightly complicated by the need to provide sReadNumber X * with a contiguous copy of all of the bytes in a number that spanned X * a gap between data blocks. X */ Xunsigned long XGetLong(WID, sp, Blockp, BlockLength, NextBlock) X t_WID WID; X unsigned char **sp; X unsigned char **Blockp; X unsigned *BlockLength; X unsigned long *NextBlock; X{ X unsigned char Buffer[sizeof(unsigned long) + 1]; X long Result; X t_BlockHeader *BH; X unsigned char *NumberStart = (*sp); X unsigned char *p; X X Result = sReadNumber(sp); X X /* Now, have we fallen off the end of the block? */ X if ((*sp) - (*Blockp) > (*BlockLength)) { X unsigned char *bp = Buffer; X X if (*NextBlock == (unsigned long) 0) { X return 0L; X } X X /* Copy the first half of the number into the overflow buffer */ X for (p = NumberStart; p < &(*Blockp)[*BlockLength]; p++) { X *bp++ = *p; X } X X /** Now: X ** . sp is garbage, as is NumberStart, as they point at the old X ** data block X ** . Buffer contains the first few bytes of the number X ** . we need some more bytes, but don't yet know how many, as X ** this depends on the number representation X ** NOTE that we must have, however, that we know that there X ** are more bytes, so that we know if we need the next block. X ** . bp points 1 beyond the end of the 1st half of the number. X **/ X X (*sp) = *Blockp = (UCP) ReadBlock(*NextBlock); X /* Check the new block */ X if ((*Blockp) == (UCP) 0) { X fprintf(stderr, "Database corrupt, %lu, oh dear!\n", *NextBlock); X exit(1); X } X BH = (t_BlockHeader *) *Blockp; X *NextBlock = (BH->NextOffset >> 1) * BLOCKSIZE; X *BlockLength = BLOCKSIZE; X (*sp) = (UCP) BH->Data; X /* Fill up the buffer from the new block */ X for (p = bp; p - Buffer < sizeof(Buffer) - 1; p++) { X *p = *(*sp)++; X } X /* read the number from the buffer */ X (*sp) = Buffer; X /* Try that number again... */ X Result = sReadNumber(sp); X /* Now put sp where it should be. Part of the buffer was X * from the old block... X */ X (*sp) = (UCP) BH->Data + ((*sp) - bp); X } X return Result; X} X Xvoid XWriteBlock(Block, Data) X unsigned long Block; X char *Data; X{ X if (DataFile < 0) { X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) { X fprintf(stderr, "Can't open database"); X perror(DataBase); X exit(1); X } X } X X if (lseek(DataFile, (long) Block, 0) < 0) { X perror("lseek"); X exit(1); X } X X if (write(DataFile, Data, BLOCKSIZE) != BLOCKSIZE) { X extern int errno; X int e = errno; X fprintf(stderr, X "Warning: %s: E%d -- couldn't write %ld bytes at block %lu: ", X DataBase, e, BLOCKSIZE, Block); X errno = e; X perror("write"); X } X return; X} X Xunsigned long XFindFreeBlock(WID) X t_WID WID; X{ X char Data[BLOCKSIZE]; X t_BlockHeader *HP; X unsigned long Here = 0L; X unsigned long There = 0L; X X if (DataFile < 0) { X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) { X fprintf(stderr, "Can't open database"); X perror(DataBase); X exit(1); X } X } X X do { X (void) lseek(DataFile, (long) Here, 0); X X if (read(DataFile, Data, BLOCKSIZE) < BLOCKSIZE) { X HP = (t_BlockHeader *) 0; X There = 0L; X Here = BLOCKSIZE; X } else { X HP = (t_BlockHeader *) Data; X } X X /* It is free if it has no pairs (or if the block has X * never been written to, of course) X * Freed blocks are given a NextOffset or'd with 1. X */ X if ((!HP || (HP->NextOffset & 01L)) && Here != 0L) { X unsigned long NextFree = HP ? HP->NextOffset : 0L; X X if (NextFree) { X /* make it suitable for use with lseek() */ X NextFree = (NextFree >> 1) * BLOCKSIZE; X } X X /* Make the previous block in the chain point to the one X * after this one in the free list, instead of this one. X */ X X if (HP) { X (void) lseek(DataFile, (long) There, 0); X (void) read(DataFile, Data, BLOCKSIZE); X /* Well, we read it a second ago!*/ X } X HP = (t_BlockHeader *) Data; X HP->NextOffset = (NextFree / BLOCKSIZE) << 1; X (void) lseek(DataFile, (long) There, 0); X (void) write(DataFile, Data, BLOCKSIZE); X X goto GotOne; X } X X There = Here; X Here = (HP->NextOffset >> 1) * BLOCKSIZE; X X } while (Here != 0L); X X if ((Here = lseek(DataFile, 0L, 2)) == 0L) { X Here = BLOCKSIZE; X } X XGotOne: X /* Mark it in use */ X HP->NextOffset = 0L; X (void) lseek(DataFile, (long) Here, 0); X (void) write(DataFile, Data, BLOCKSIZE); X X#ifdef ASCIITRACE X if (AsciiTrace > 10) { X fprintf(stderr, "\tFindFree --> %lu\n", (Here == 0L) ? BLOCKSIZE : Here); X } X#endif X return (Here == 0L) ? BLOCKSIZE : Here; X} X X/* Get a single disk block from the database */ Xchar * XReadBlock(Offset) X unsigned long Offset; X{ X static char Buffer0[BLOCKSIZE]; X X if (DataFile < 0) { X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0664)) < 0) { X return (char *) 0; X } X } X X#ifdef ASCIITRACE X if (AsciiTrace > 15) { X fprintf(stderr, "===ReadBlock(%ld)\n", Offset); X } X#endif X X if (lseek(DataFile, (long) Offset, 0) < 0) { X return (char *) 0; X } X X /* Now we are in the right place */ X X if (read(DataFile, Buffer0, BLOCKSIZE) != BLOCKSIZE) { X return (char *) 0; X } X X /* Now that we have the 1st block of the data.. */ X return Buffer0; X} X Xint XWordPlaceCompare(F1, F2) X t_WordPlace *F1, *F2; X{ X if (F1->FID != F2->FID) return F1->FID - F2->FID; X if (F1->BlockInFile != F2->BlockInFile) { X return F1->BlockInFile - F2->BlockInFile; X } X return F1->WordInBlock - F2->WordInBlock; X} X Xint XRemoveFIDFrompblock(FID, pblock) X t_FID FID; X t_pblock *pblock; X{ X unsigned int ThisPair; X int TotalWordPlacesToMove = 0; X X /* Mark unwanted pairs as deleted */ X for (ThisPair = 0; ThisPair < pblock->NumberOfWordPlaces; ThisPair++) { X if (pblock->WordPlaces[ThisPair].FID == FID) { X pblock->WordPlaces[ThisPair].FID = 0L; X ++TotalWordPlacesToMove; X } X } X if (!TotalWordPlacesToMove) return 0; X X if (pblock->NumberOfWordPlaces > 1) { X SortWordPlaces(pblock->NumberOfWordPlaces, pblock->WordPlaces); X } X X /* shuffle up any FID=0 pairs */ X { X register t_WordPlace *Src, *Dest; X register int n = TotalWordPlacesToMove; X X Src = pblock->WordPlaces; X Dest = &pblock->WordPlaces[TotalWordPlacesToMove]; X X while (n-- > 0) { X *Src++ = *Dest++; X } X } X X return TotalWordPlacesToMove; X} X Xvoid XSortWordPlaces(NumberOfWordPlaces, WordPlaces) X unsigned long NumberOfWordPlaces; X t_WordPlace *WordPlaces; X{ X /* sort the list */ X if (NumberOfWordPlaces > 2) { X qsort(WordPlaces, (int) NumberOfWordPlaces, X sizeof(t_WordPlace), WordPlaceCompare); X } else { X /* don't call qsort in the trivial cases... */ X if (NumberOfWordPlaces == 2) { X if (WordPlaceCompare(&WordPlaces[0], &WordPlaces[1]) > 0) { X t_WordPlace tmp; X tmp = WordPlaces[0]; /* structure copy! */ X WordPlaces[0] = WordPlaces[1]; X WordPlaces[1] = tmp; X } X } X } X} X Xvoid XDeleteWordPlaces(FirstBlock, WID) X unsigned long FirstBlock; X t_WID WID; X{ X char Data[BLOCKSIZE]; X char BlockZero[BLOCKSIZE]; X t_BlockHeader *H; X t_BlockHeader *HZERO; /* for the free list*/ X unsigned long ThisBlock; X unsigned long Here; X X if (!FirstBlock) { X return; X } X X if (DataFile < 0) { X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) { X fprintf(stderr, "Can't open database"); X perror(DataBase); X exit(1); X } X } X X (void) lseek(DataFile, 0L, 0); X X if (read(DataFile, BlockZero, BLOCKSIZE) <= 0) { X return; /* nothing to delete */ X } X X /* Block zero is always the head of the free list */ X HZERO = (t_BlockHeader *) BlockZero; X X Here = FirstBlock; X X do { X if (lseek(DataFile, (long) Here, 0) < 0) { X fprintf(stderr, "Aaaargh:"); X perror("lseek"); X exit(1); X } X X if (read(DataFile, Data, BLOCKSIZE) <= 0) { X return; /* it's not there to delete */ X } X X H = (t_BlockHeader *) Data; X X ThisBlock = Here; /* so we can write the changed block */ X Here = (H->NextOffset >> 1) * BLOCKSIZE; X if (Here == 0L) { /* next block... */ X H->NextOffset = HZERO->NextOffset; X } X H->NextOffset |= 01L; /* mark it as free */ X X /* Write out the newly freed block, having saved its Next pointer X */ X WriteBlock(ThisBlock, Data); X } while (Here); X X /* Now make the free list point to the start of the new chain; X * the end of the newly added chain points to the start of the X * original chain. This means that the new blocks are still in the X * Unix buffer cache, so this helps performance. I hope. X */ X HZERO->NextOffset = (FirstBlock / BLOCKSIZE) << 1; X (void) lseek(DataFile, 0L, 0); X (void) write(DataFile, BlockZero, BLOCKSIZE); X X return; X} X Xvoid XDeletepblock(pblock) X t_pblock *pblock; X{ X char Data[BLOCKSIZE]; X char BlockZero[BLOCKSIZE]; X t_BlockHeader *H; X t_BlockHeader *HZERO; /* for the free list*/ X unsigned long ThisBlock; X unsigned long Here; X X if (!pblock || !pblock->ChainStart) { X return; X } X X if (DataFile < 0) { X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) { X fprintf(stderr, "Can't open database"); X perror(DataBase); X exit(1); X } X } X X if (read(DataFile, BlockZero, BLOCKSIZE) <= 0) { X return; /* nothing to delete */ X } X X /* Block zero is always the head of the free list */ X HZERO = (t_BlockHeader *) BlockZero; X X Here = pblock->ChainStart; X X do { X if (lseek(DataFile, (long) Here, 0) < 0) { X fprintf(stderr, "Aaaargh:"); X perror("lseek"); X exit(1); X } X X if (read(DataFile, Data, BLOCKSIZE) <= 0) { X return; /* it's not there to delete */ X } X X H = (t_BlockHeader *) Data; X X ThisBlock = Here; /* so we can write the changed block later */ X if ((Here = ((H->NextOffset >> 1) * BLOCKSIZE)) == 0L) { X H->NextOffset = HZERO->NextOffset; X } X H->NextOffset |= 01L; /* mark it as free */ X X /* Write out the newly freed block, having saved its Next pointer X */ X WriteBlock(ThisBlock, Data); X } while (Here); X X /* Now make the free list point to the start of the new chain; X * the end of the newly added chain points to the start of the X * original chain. This means that the new blocks are still in the X * Unix buffer cache, so this helps performance. I hope. X */ X HZERO->NextOffset = (pblock->ChainStart / BLOCKSIZE) << 1; X (void) lseek(DataFile, 0L, 0); X write(DataFile, BlockZero, BLOCKSIZE); X X return; X} @@@End of lq-text/src/liblqtext/pblock.c echo end of part 05 -- Liam R. E. Quin, lee@sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337