Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

rawstr4.cpp

00001 /******************************************************************************
00002  *  rawstr.cpp   - code for class 'RawStr'- a module that reads raw text
00003  *                              files:  ot and nt using indexs ??.bks ??.cps ??.vss
00004  *                              and provides lookup and parsing functions based on
00005  *                              class StrKey
00006  */
00007 
00008 #include <stdio.h>
00009 #include <fcntl.h>
00010 #include <errno.h>
00011 
00012 #ifndef __GNUC__
00013 #include <io.h>
00014 #else
00015 #include <unistd.h>
00016 #endif
00017 
00018 #include <string.h>
00019 #include <stdlib.h>
00020 #include <utilfuns.h>
00021 #include <rawstr4.h>
00022 #include <sysdata.h>
00023 
00024 /******************************************************************************
00025  * RawStr Statics
00026  */
00027 
00028 int RawStr4::instance = 0;
00029 
00030 
00031 /******************************************************************************
00032  * RawStr Constructor - Initializes data for instance of RawStr
00033  *
00034  * ENT: ipath - path of the directory where data and index files are located.
00035  *              be sure to include the trailing separator (e.g. '/' or '\')
00036  *              (e.g. 'modules/texts/rawtext/webster/')
00037  */
00038 
00039 RawStr4::RawStr4(const char *ipath, int fileMode)
00040 {
00041         char buf[127];
00042 
00043         nl = '\n';
00044         lastoff = -1;
00045         path = 0;
00046         stdstr(&path, ipath);
00047 
00048 #ifndef O_BINARY                // O_BINARY is needed in Borland C++ 4.53
00049 #define O_BINARY 0              // If it hasn't been defined than we probably
00050 #endif                          // don't need it.
00051 
00052         if (fileMode == -1) { // try read/write if possible
00053                 fileMode = O_RDWR;
00054         }
00055                 
00056         sprintf(buf, "%s.idx", path);
00057         idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true);
00058 
00059         sprintf(buf, "%s.dat", path);
00060         datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true);
00061 
00062         if (datfd < 0) {
00063                 sprintf(buf, "Error: %d", errno);
00064                 perror(buf);
00065         }
00066 
00067         instance++;
00068 }
00069 
00070 
00071 /******************************************************************************
00072  * RawStr Destructor - Cleans up instance of RawStr
00073  */
00074 
00075 RawStr4::~RawStr4()
00076 {
00077         if (path)
00078                 delete [] path;
00079 
00080         --instance;
00081 
00082         FileMgr::systemFileMgr.close(idxfd);
00083         FileMgr::systemFileMgr.close(datfd);
00084 }
00085 
00086 
00087 /******************************************************************************
00088  * RawStr4::getidxbufdat        - Gets the index string at the given idx offset
00089  *                                              NOTE: buf is allocated and must be freed by
00090  *                                                      calling function
00091  *
00092  * ENT: ioffset - offset in dat file to lookup
00093  *              buf             - address of pointer to allocate for storage of string
00094  */
00095 
00096 void RawStr4::getidxbufdat(long ioffset, char **buf)
00097 {
00098         int size;
00099         char ch;
00100         if (datfd > 0) {
00101                 lseek(datfd->getFd(), ioffset, SEEK_SET);
00102                 for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) {
00103                         if ((ch == '\\') || (ch == 10) || (ch == 13))
00104                                 break;
00105                 }
00106                 *buf = (*buf) ? (char *)realloc(*buf, size + 1) : (char *)malloc(size + 1);
00107                 if (size) {
00108                         lseek(datfd->getFd(), ioffset, SEEK_SET);
00109                         read(datfd->getFd(), *buf, size);
00110                 }
00111                 (*buf)[size] = 0;
00112                 for (size--; size > 0; size--)
00113                         (*buf)[size] = SW_toupper((*buf)[size]);
00114         }
00115         else {
00116                 *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
00117                 **buf = 0;
00118         }
00119 }
00120 
00121 
00122 /******************************************************************************
00123  * RawStr4::getidxbuf   - Gets the index string at the given idx offset
00124  *                                              NOTE: buf is allocated and must be freed by
00125  *                                                      calling function
00126  *
00127  * ENT: ioffset - offset in idx file to lookup
00128  *              buf             - address of pointer to allocate for storage of string
00129  */
00130 
00131 void RawStr4::getidxbuf(long ioffset, char **buf)
00132 {
00133         char *trybuf, *targetbuf;
00134         long offset;
00135         
00136         if (idxfd > 0) {
00137                 lseek(idxfd->getFd(), ioffset, SEEK_SET);
00138                 read(idxfd->getFd(), &offset, 4);
00139 
00140                 offset = swordtoarch32(offset);
00141 
00142                 getidxbufdat(offset, buf);
00143                 for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) {
00144 /*
00145                         if (*trybuf == '-') {           // ignore '-' because alphabetized silly in file
00146                                 targetbuf--;
00147                                 continue;
00148                         }
00149 */
00150                         *targetbuf = SW_toupper(*trybuf);
00151                 }
00152                 *targetbuf = 0;
00153                 trybuf = 0;
00154         }
00155 }
00156 
00157 
00158 /******************************************************************************
00159  * RawStr4::findoffset  - Finds the offset of the key string from the indexes
00160  *
00161  * ENT: key             - key string to lookup
00162  *              start   - address to store the starting offset
00163  *              size            - address to store the size of the entry
00164  *              away            - number of entries before of after to jump
00165  *                                      (default = 0)
00166  *
00167  * RET: error status
00168  */
00169 
00170 signed char RawStr4::findoffset(const char *ikey, long *start, unsigned long *size, long away, long *idxoff)
00171 {
00172         char *trybuf, *targetbuf, *key, quitflag = 0;
00173         signed char retval = 0;
00174         long headoff, tailoff, tryoff = 0, maxoff = 0;
00175 
00176         if (idxfd->getFd() >=0) {
00177                 if (*ikey) {
00178                         headoff = 0;
00179                         tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 8;
00180 
00181                         key = new char [ strlen(ikey) + 1 ];
00182                         strcpy(key, ikey);
00183 
00184                         for (trybuf = targetbuf = key; *trybuf; trybuf++, targetbuf++) {
00185         /*
00186                                 if (*trybuf == '-') {           // ignore '-' because alphabetized silly in file
00187                                         targetbuf--;
00188                                         continue;
00189                                 }
00190         */
00191                                 *targetbuf = SW_toupper(*trybuf);
00192                         }
00193                         *targetbuf = 0;
00194                         trybuf = 0;
00195 
00196                         while (headoff < tailoff) {
00197                                 tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff; 
00198                                 lastoff = -1;
00199                                 getidxbuf(tryoff, &trybuf);
00200 
00201                                 if (!*trybuf) {         // In case of extra entry at end of idx
00202                                         tryoff += (tryoff > (maxoff / 2))?-8:8;
00203                                         retval = -1;
00204                                         break;
00205                                 }
00206                                         
00207                                 if (!strcmp(key, trybuf))
00208                                         break;
00209 
00210                                 int diff = strcmp(key, trybuf);
00211                                 if (diff < 0)
00212                                         tailoff = (tryoff == headoff) ? headoff : tryoff;
00213                                 else headoff = tryoff;
00214                                 if (tailoff == headoff + 8) {
00215                                         if (quitflag++)
00216                                                 headoff = tailoff;
00217                                 }
00218                         }
00219                         if (headoff >= tailoff)
00220                                 tryoff = headoff;
00221                         if (trybuf)
00222                                 free(trybuf);
00223                         delete [] key;
00224                 }
00225                 else    tryoff = 0;
00226 
00227                 lseek(idxfd->getFd(), tryoff, SEEK_SET);
00228 
00229                 *start = *size = 0;
00230                 read(idxfd->getFd(), start, 4);
00231                 read(idxfd->getFd(), size, 4);
00232                 if (idxoff)
00233                         *idxoff = tryoff;
00234 
00235                 *start = swordtoarch32(*start);
00236                 *size  = swordtoarch32(*size);
00237 
00238                 while (away) {
00239                         long laststart = *start;
00240                         unsigned long lastsize = *size;
00241                         long lasttry = tryoff;
00242                         tryoff += (away > 0) ? 8 : -8;
00243 
00244                         bool bad = false;
00245                         if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8)))
00246                                 bad = true;
00247                         else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0)
00248                                 bad = true;
00249                         if (bad) {
00250                                 retval = -1;
00251                                 *start = laststart;
00252                                 *size = lastsize;
00253                                 tryoff = lasttry;
00254                                 if (idxoff)
00255                                         *idxoff = tryoff;
00256                                 break;
00257                         }
00258                         read(idxfd->getFd(), start, 4);
00259                         read(idxfd->getFd(), size, 4);
00260                         if (idxoff)
00261                                 *idxoff = tryoff;
00262 
00263                         *start = swordtoarch32(*start);
00264                         *size  = swordtoarch32(*size);
00265 
00266                         if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) 
00267                                 away += (away < 0) ? 1 : -1;
00268                 }
00269         
00270                 lastoff = tryoff;
00271         }
00272         else {
00273                 *start = 0;
00274                 *size  = 0;
00275                 if (idxoff)
00276                         *idxoff = 0;
00277                 retval = -1;
00278         }
00279         return retval;
00280 }
00281 
00282 
00283 /******************************************************************************
00284  * RawStr4::preptext    - Prepares the text before returning it to external
00285  *                                      objects
00286  *
00287  * ENT: buf     - buffer where text is stored and where to store the prep'd
00288  *                              text.
00289  */
00290 
00291 void RawStr4::preptext(char *buf)
00292 {
00293         char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0;
00294 
00295         for (to = from = buf; *from; from++) {
00296                 switch (*from) {
00297                 case 10:
00298                         if (!realdata)
00299                                 continue;
00300                         space = (cr) ? 0 : 1;
00301                         cr = 0;
00302                         nlcnt++;
00303                         if (nlcnt > 1) {
00304 //                              *to++ = nl;
00305                                 *to++ = nl;
00306 //                              nlcnt = 0;
00307                         }
00308                         continue;
00309                 case 13:
00310                         if (!realdata)
00311                                 continue;
00312                         *to++ = nl;
00313                         space = 0;
00314                         cr = 1;
00315                         continue;
00316                 }
00317                 realdata = 1;
00318                 nlcnt = 0;
00319                 if (space) {
00320                         space = 0;
00321                         if (*from != ' ') {
00322                                 *to++ = ' ';
00323                                 from--;
00324                                 continue;
00325                         }
00326                 }
00327                 *to++ = *from;
00328         }
00329         *to = 0;
00330 
00331         while (to > (buf+1)) {                  // remove trailing excess
00332                 to--;
00333                 if ((*to == 10) || (*to == ' '))
00334                         *to = 0;
00335                 else break;
00336         }
00337 }
00338 
00339 
00340 /******************************************************************************
00341  * RawStr4::gettext     - gets text at a given offset
00342  *
00343  * ENT:
00344  *      start   - starting offset where the text is located in the file
00345  *      size            - size of text entry
00346  *      buf             - buffer to store text
00347  *
00348  */
00349 
00350 void RawStr4::gettext(long istart, unsigned long isize, char *idxbuf, char *buf)
00351 {
00352         char *ch;
00353         char *idxbuflocal = 0;
00354         getidxbufdat(istart, &idxbuflocal);
00355         long start = istart;
00356         unsigned long size = isize;
00357 
00358         do {
00359                 memset(buf, 0, size);
00360                 lseek(datfd->getFd(), start, SEEK_SET);
00361                 read(datfd->getFd(), buf, (int)(size - 1));
00362 
00363                 for (ch = buf; *ch; ch++) {             // skip over index string
00364                         if (*ch == 10) {
00365                                 ch++;
00366                                 break;
00367                         }
00368                 }
00369                 memmove(buf, ch, size - (unsigned long)(ch-buf));
00370 
00371                 // resolve link
00372                 if (!strncmp(buf, "@LINK", 5)) {
00373                         for (ch = buf; *ch; ch++) {             // null before nl
00374                                 if (*ch == 10) {
00375                                         *ch = 0;
00376                                         break;
00377                                 }
00378                         }
00379                         findoffset(buf + 8, &start, &size);
00380                 }
00381                 else break;
00382         }
00383         while (true);   // while we're resolving links
00384 
00385         if (idxbuflocal) {
00386                 int localsize = strlen(idxbuflocal);
00387                 localsize = (localsize < (size - 1)) ? localsize : (size - 1);
00388                 strncpy(idxbuf, idxbuflocal, localsize);
00389                 idxbuf[localsize] = 0;
00390                 free(idxbuflocal);
00391         }
00392 }
00393 
00394 
00395 /******************************************************************************
00396  * RawLD::settext       - Sets text for current offset
00397  *
00398  * ENT: key     - key for this entry
00399  *      buf     - buffer to store
00400  *      len     - length of buffer (0 - null terminated)
00401  */
00402 
00403 void RawStr4::settext(const char *ikey, const char *buf, long len)
00404 {
00405 
00406         long start, outstart;
00407         long idxoff;
00408         long endoff;
00409         long shiftSize;
00410         unsigned long size;
00411         unsigned long outsize;
00412         static const char nl[] = {13, 10};
00413         char *tmpbuf = 0;
00414         char *key = 0;
00415         char *dbKey = 0;
00416         char *idxBytes = 0;
00417         char *outbuf = 0;
00418         char *ch = 0;
00419 
00420         findoffset(ikey, &start, &size, 0, &idxoff);
00421         stdstr(&key, ikey);
00422         for (ch = key; *ch; ch++)
00423                 *ch = SW_toupper(*ch);
00424         ch = 0;
00425 
00426         getidxbufdat(start, &dbKey);
00427 
00428         if (strcmp(key, dbKey) < 0) {
00429         }
00430         else if (strcmp(key, dbKey) > 0) {
00431                 idxoff += 8;
00432         } else if ((!strcmp(key, dbKey)) && (len || strlen(buf) /*we're not deleting*/)) { // got absolute entry
00433                 do {
00434                         tmpbuf = new char [ size + 2 ];
00435                         memset(tmpbuf, 0, size + 2);
00436                         lseek(datfd->getFd(), start, SEEK_SET);
00437                         read(datfd->getFd(), tmpbuf, (int)(size - 1));
00438 
00439                         for (ch = tmpbuf; *ch; ch++) {          // skip over index string
00440                                 if (*ch == 10) {
00441                                         ch++;
00442                                         break;
00443                                 }
00444                         }
00445                         memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf));
00446 
00447                         // resolve link
00448                         if (!strncmp(tmpbuf, "@LINK", 5) && (len ? len : strlen(buf))) {
00449                                 for (ch = tmpbuf; *ch; ch++) {          // null before nl
00450                                         if (*ch == 10) {
00451                                                 *ch = 0;
00452                                                 break;
00453                                         }
00454                                 }
00455                                 findoffset(tmpbuf + 8, &start, &size, 0, &idxoff);
00456                         }
00457                         else break;
00458                 }
00459                 while (true);   // while we're resolving links
00460         }
00461 
00462         endoff = lseek(idxfd->getFd(), 0, SEEK_END);
00463 
00464         shiftSize = endoff - idxoff;
00465 
00466         if (shiftSize > 0) {
00467                 idxBytes = new char [ shiftSize ];
00468                 lseek(idxfd->getFd(), idxoff, SEEK_SET);
00469                 read(idxfd->getFd(), idxBytes, shiftSize);
00470         }
00471 
00472         outbuf = new char [ (len ? len : strlen(buf)) + strlen(key) + 5 ];
00473         sprintf(outbuf, "%s%c%c", key, 13, 10);
00474         size = strlen(outbuf);
00475         memcpy (outbuf + size, buf, len ? len : strlen(buf));
00476         size = outsize = size + (len ? len : strlen(buf));
00477 
00478         start = outstart = lseek(datfd->getFd(), 0, SEEK_END);
00479 
00480         outstart = archtosword32(start);
00481         outsize  = archtosword32(size);
00482 
00483         lseek(idxfd->getFd(), idxoff, SEEK_SET);
00484         if (len ? len : strlen(buf)) {
00485                 lseek(datfd->getFd(), start, SEEK_SET);
00486                 write(datfd->getFd(), outbuf, (long)size);
00487 
00488                 // add a new line to make data file easier to read in an editor
00489                 write(datfd->getFd(), &nl, 2);
00490                 
00491                 write(idxfd->getFd(), &outstart, 4);
00492                 write(idxfd->getFd(), &outsize, 4);
00493                 if (idxBytes) {
00494                         write(idxfd->getFd(), idxBytes, shiftSize);
00495                         delete [] idxBytes;
00496                 }
00497         }
00498         else {  // delete entry
00499                 if (idxBytes) {
00500                         write(idxfd->getFd(), idxBytes+8, shiftSize-8);
00501                         lseek(idxfd->getFd(), -1, SEEK_CUR);    // last valid byte
00502                         FileMgr::systemFileMgr.trunc(idxfd);    // truncate index
00503                         delete [] idxBytes;
00504                 }
00505         }
00506 
00507         delete [] key;
00508         delete [] outbuf;
00509         free(dbKey);
00510 }
00511 
00512 
00513 /******************************************************************************
00514  * RawLD::linkentry     - links one entry to another
00515  *
00516  * ENT: testmt  - testament to find (0 - Bible/module introduction)
00517  *      destidxoff      - dest offset into .vss
00518  *      srcidxoff               - source offset into .vss
00519  */
00520 
00521 void RawStr4::linkentry(const char *destkey, const char *srckey) {
00522         char *text = new char [ strlen(destkey) + 7 ];
00523         sprintf(text, "@LINK %s", destkey);
00524         settext(srckey, text);
00525         delete [] text;
00526 }
00527 
00528 
00529 /******************************************************************************
00530  * RawLD::CreateModule  - Creates new module files
00531  *
00532  * ENT: path    - directory to store module files
00533  * RET: error status
00534  */
00535 
00536 signed char RawStr4::createModule(const char *ipath)
00537 {
00538         char *path = 0;
00539         char *buf = new char [ strlen (ipath) + 20 ];
00540         FileDesc *fd, *fd2;
00541 
00542         stdstr(&path, ipath);
00543 
00544         if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
00545                 path[strlen(path)-1] = 0;
00546 
00547         sprintf(buf, "%s.dat", path);
00548         unlink(buf);
00549         fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
00550         fd->getFd();
00551         FileMgr::systemFileMgr.close(fd);
00552 
00553         sprintf(buf, "%s.idx", path);
00554         unlink(buf);
00555         fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
00556         fd2->getFd();
00557         FileMgr::systemFileMgr.close(fd2);
00558 
00559         delete [] path;
00560         
00561         return 0;
00562 }

Generated on Thu Jun 20 22:13:00 2002 for The Sword Project by doxygen1.2.15