Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

gbfidx.cpp

00001 /*****************************************************************************
00002  *
00003  *      This code reeks but works (sometimes).  Good luck!
00004  * Modified for zText purposes
00005  */
00006 
00007 //#include <stdio.h>
00008 #include <iostream>
00009 #include <fstream>
00010 #include <stdlib.h>
00011 #include <string.h>
00012 #include <ctype.h>
00013 #include <assert.h>
00014 
00015 //#ifndef __GNUC__
00016 #include <io.h>
00017 //#else
00018 //#include <unistd.h>
00019 //#endif
00020 
00021 #include <fcntl.h>
00022 #include <versekey.h>
00023 
00024 
00025 void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size);
00026 char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, unsigned short *size);
00027 void openfiles(char *fname);
00028 void checkparams(int argc, char **argv);
00029 
00030 
00031 VerseKey key1, key2, key3;
00032 int fp=0, vfp=0, cfp=0, bfp=0;
00033 long chapoffset=0;
00034 unsigned short chapsize=0;
00035 long bookoffset=0;
00036 unsigned short booksize=0;
00037 long testoffset=0;
00038 unsigned short testsize=0;
00039 long verseoffset=0;
00040 unsigned short versesize=0;
00041 long nextoffset=0;
00042 char testmnt=0;
00043 int deadcount = 0;
00044 int chapmark=-4, bookmark=-1;
00045 ofstream cfile;
00046 
00047 
00048 int main(int argc, char **argv)
00049 {
00050         long pos, offset;
00051         int num1, num2, rangemax;//, curbook = 0, curchap = 0, curverse = 0;
00052         //char buf[127],
00053         char startflag = 0;
00054         unsigned short size;//, tmp;
00055 
00056         checkparams(argc, argv);
00057 
00058         openfiles(argv[1]);
00059         //key1 = "Matthew 1:1";
00060         //key2 = "Matthew 1:1";
00061         //key3 = "Matthew 1:1";
00062 
00063         testmnt = key1.Testament();
00064         cfile << "testament" << (int) testmnt << "\n";
00065         num1 = key1.Chapter();
00066         num2 = key1.Verse();
00067         pos  = 0;
00068         write(bfp, &pos, 4);  /* Book    offset for testament intros */
00069         pos = 4;
00070         write(cfp, &pos, 4);  /* Chapter offset for testament intro */
00071 
00072 
00073 /*      Right now just zero out intros until parsing correctly */
00074         /*pos = 0;
00075         size = 0;
00076         write(vfp, &pos, 4);  // Module intro
00077         write(vfp, &size, 2);
00078         cfile << "modintro pos{" << pos << "} size{" << size << "}\n";
00079         write(vfp, &pos, 4);  // Testament intro
00080         write(vfp, &size, 2);
00081         cfile << "test intro pos{" << pos << "} size{" << size << "}\n";
00082         */
00083         cout << "GBFIDX Running\n";
00084         cout.flush();
00085         while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) {
00086                 if (!startflag) {
00087                         startflag = 1;
00088                 }
00089                 else {
00090                         if (num2 < key2.Verse()) {            // new chapter
00091                                 if (num1 <= key2.Chapter()) { // new book
00092                                         key2.Verse(1);
00093                                         key2.Chapter(1);
00094                                         key2.Book(key2.Book()+1);
00095                                 }
00096                                 cfile << "Found Chapter Break: " << num1 << " ('" << (const char *)key2 << "')\n";
00097                                 //chapoffset = offset;
00098                                 //chapsize = chapsize - size;
00099 //                              continue;
00100                         }
00101                 }
00102                 key2.Verse(1);
00103                 key2.Chapter(num1);
00104                 key2.Verse(num2);
00105 
00106                 key3 = key2;
00107 //              key3 += (rangemax - key3.Verse());
00108 
00109                 writeidx(key1, key2, key3, offset, size);
00110         }
00111         close(vfp);
00112         close(cfp);
00113         close(bfp);
00114         close(fp);
00115 
00116         return 1;
00117 }
00118 
00119 
00120 /**************************************************************************
00121  * writeidx:    key1    - current location of index
00122  *      key2    - minimum keyval for which this offset is valid
00123  *      key3    - maximum keyval for which this offset is valid
00124  */
00125 
00126 void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size)
00127 {
00128         long pos;
00129         unsigned short tmp;
00130 
00131         for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) {
00132                 if (chapmark>=2)
00133                 {
00134                         if (bookmark==2)
00135                         {
00136                                 //booksize = booksize - chapsize + 7;
00137                                 cfile << "versesize " << versesize << " booksize " << booksize << " chapsize " << chapsize << " size " << size << "\n";
00138                                 //cfile.flush();
00139                                 //assert(chapsize < size);
00140                                 //if (chapsize > size)           // At start of Psalms gets chapsize rather than chapsize+size ???
00141                                 //{
00142                                 //      versesize = versesize - (booksize - (chapsize - size) + 7);
00143                                 //}
00144                                 //else
00145                                 //{
00146                                 versesize = versesize - (booksize - (chapsize) + 7);
00147                                 //}
00148                                         cfile << "Last verse in book\n";
00149                         }
00150                         //chapsize = chapsize - size;
00151                         cfile << "versesize " << versesize << " chapsize " << chapsize << " size " << size<< "\n";
00152                         cfile.flush();
00153                         //assert(chapsize > size);
00154                         //if (chapsize > size)           // At start of Psalms gets chapsize rather than chapsize+size ???
00155                         //{
00156                         //      versesize = versesize - (chapsize - size);
00157                         //}
00158                         //else
00159                         //{
00160                         versesize = versesize - (chapsize);
00161                         //}
00162                         cfile << "Last verse in chapter\n";
00163                 }
00164                 if (chapmark>=2 && bookmark!=1)
00165                 {
00166                         cfile << "prev verse pos{" << verseoffset << "} size{" << versesize << "} nextoffset{" << nextoffset << "}\n";
00167                         cfile.flush();
00168                         assert(verseoffset==nextoffset);
00169                         write(vfp, &verseoffset, 4);
00170                         write(vfp, &versesize, 2);
00171                         nextoffset = verseoffset+versesize;
00172                         bookmark = 0;
00173                         chapmark = 0;
00174                 }
00175                 if (key1.Verse() == 1) {        // new chapter
00176                         cfile << "size??? " << size << "\n";
00177                         cfile.flush();
00178                         //assert(chapsize > size || key1.Chapter()==1);
00179                         //assert(chapsize > size);
00180                         //if (chapsize > size)           // At start of books gets chapsize rather than chapsize+size
00181                         //{
00182                         //      chapsize = chapsize - size;
00183                         //}
00184                         if (key1.Chapter() == 1) {      // new book
00185                                 booksize = booksize - chapsize + 7;
00186                                 if (key1.Book() == 1)
00187                                 {
00188                                         pos = 0;
00189                                         //tmp = testoffset;
00190                                         tmp = 0;                // better just remember that it goes up to the testament intro to avoid 64k limit
00191                                                                                                 // AV exceeds that anyway!
00192                                         write(vfp, &pos, 4);  /* Module intro */
00193                                         write(vfp, &tmp, 2);
00194                                         assert(nextoffset==0);
00195                                         cfile << "modintro pos{" << pos << "} size{" << tmp << "}\n";
00196                                         testsize = testsize - booksize - chapsize + 7;
00197                                         if (testsize > 10000)
00198                                         {
00199                                                 cerr << "Error: testament too big " << testsize << "\n";
00200                                                 exit(-1);
00201                                         }
00202                                         //assert(testoffset==nextoffset);
00203                                         write(vfp, &testoffset, 4);  /* Testament intro (vss)  */
00204                                         write(vfp, &testsize, 2);
00205                                         nextoffset = testoffset+testsize;
00206                                         cfile << "test intro pos{" << testoffset << "} size{" << testsize << "}\n";
00207                                 }
00208                                 pos = lseek(cfp, 0, SEEK_CUR);
00209                                 write(bfp, &pos, 4);
00210                                 pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */
00211                                 write(cfp, &pos, 4);
00212                                 if (booksize > 10000)
00213                                 {
00214                                         cerr << "Error: book too big " << booksize << "\n";
00215                                         exit(-1);
00216                                 }
00217                                 assert(bookoffset==nextoffset);
00218                                 write(vfp, &bookoffset, 4);  /* Book intro (vss)  */
00219                                 write(vfp, &booksize, 2);
00220                                 nextoffset = bookoffset+booksize;
00221                                 cfile << "book intro pos{" << bookoffset << "} size{" << booksize << "}\n";
00222                                 //offset += booksize;
00223                                 //bookmark = false;
00224                         }
00225                         pos = lseek(vfp, 0, SEEK_CUR);
00226                         write(cfp, &pos, 4);
00227                         assert(chapsize < 10000);
00228                         write(vfp, &chapoffset, 4);  /* Chapter intro */
00229                         write(vfp, &chapsize, 2);
00230                         nextoffset = chapoffset+chapsize;
00231                         cfile << "chapter intro pos{" << chapoffset << "} size{" << chapsize << "}\n";
00232                         //offset += chapsize;
00233                         //size -= chapsize;
00234                         //chapmark = false;
00235                 }
00236                 if (key1 >= key2) {
00237                         if (size > 10000)
00238                         {
00239                                 cerr << "Error: verse too big " << size << "\n";
00240                                 exit(-1);
00241                         }
00242                         if (!chapmark && !bookmark)
00243                         {
00244                                 write(vfp, &offset, 4);
00245                                 write(vfp, &size, 2);
00246                                 cfile << "verse pos{" << offset << "} size{" << size << "}\n";
00247                                 cfile.flush();
00248                                 assert(offset==nextoffset);
00249                                 nextoffset = offset+size;
00250                                 //cfile << "bookmark " << bookmark << " chapmark " << chapmark << "\n";
00251                         }
00252                         else
00253                         {
00254                                 verseoffset = offset;
00255                                 versesize = size;
00256                                 cfile << "saving verse pos{" << offset << "} size{" << size << "}\n";
00257                                 cfile << "bookmark " << bookmark << " chapmark " << chapmark << "\n";
00258                         }
00259                 }
00260                 else    {
00261                         pos = 0;
00262                         tmp = 0;
00263                         write(vfp, &pos, 4);
00264                         write(vfp, &tmp, 2);
00265                         cfile << "blank pos{" << pos << "} size{" << tmp << "}\n";
00266                 }
00267         }
00268 }
00269 
00270 char startmod(char *buf)
00271 {
00272         //char loop;
00273 
00274         if (buf[0] != '<')
00275                 return 0;
00276         if (buf[1] != 'H')
00277                 return 0;
00278         if (buf[2] != '0')
00279                 return 0;
00280 /*
00281         if (!isdigit(buf[2]))
00282                 return 0;
00283         for (loop = 3; loop < 7; loop++) {
00284                 if (buf[loop] == ' ')
00285                         break;
00286                 if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
00287                         return 0;
00288         }
00289 */
00290         return 1;
00291 }
00292 
00293 
00294 char starttest(char *buf)
00295 {
00296         //char loop;
00297 
00298         if (buf[0] != '<')
00299                 return 0;
00300         if (buf[1] != 'B')
00301                 return 0;
00302         if (testmnt==2)
00303         {
00304                 if (buf[2] != 'N')
00305                         return 0;
00306         }
00307         else
00308         {
00309                 if (buf[2] != 'O')
00310                         return 0;
00311         }
00312         //if (buf[3] != '>')
00313         //      return 0;
00314 /*
00315         if (!isdigit(buf[2]))
00316                 return 0;
00317         for (loop = 3; loop < 7; loop++) {
00318                 if (buf[loop] == ' ')
00319                         break;
00320                 if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
00321                         return 0;
00322         }
00323 */
00324         return 1;
00325 }
00326 
00327 
00328 char startbook(char *buf)
00329 {
00330         //char loop;
00331 
00332         if (buf[0] != '<')
00333                 return 0;
00334         if (buf[1] != 'S')
00335                 return 0;
00336         if (buf[2] != 'B')
00337                 return 0;
00338 /*
00339         if (!isdigit(buf[2]))
00340                 return 0;
00341         for (loop = 3; loop < 7; loop++) {
00342                 if (buf[loop] == ' ')
00343                         break;
00344                 if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
00345                         return 0;
00346         }
00347 */
00348         return 1;
00349 }
00350 
00351 
00352 char startchap(char *buf)
00353 {
00354         //char loop;
00355 
00356         if (buf[0] != '<')
00357                 return 0;
00358         if (buf[1] != 'S')
00359                 return 0;
00360         if (buf[2] != 'C')
00361                 return 0;
00362 /*
00363         if (!isdigit(buf[2]))
00364                 return 0;
00365         for (loop = 3; loop < 7; loop++) {
00366                 if (buf[loop] == ' ')
00367                         break;
00368                 if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
00369                         return 0;
00370         }
00371 */
00372         return 1;
00373 }
00374 
00375 
00376 char startentry(char *buf)
00377 {
00378         //char loop;
00379         //cfile << "{SV}";
00380 
00381         if (buf[0] != '<')
00382         {
00383                 //cfile << "{no<}";
00384                 return 0;
00385         }
00386         if (buf[1] != 'S')
00387         {
00388                 //cfile << "\n{noS}\n";
00389                 return 0;
00390         }
00391         if (buf[2] != 'V')
00392         {
00393                 //cfile << "\n{noV}\n";
00394                 return 0;
00395         }
00396 /*
00397         if (!isdigit(buf[2]))
00398                 return 0;
00399         for (loop = 3; loop < 7; loop++) {
00400                 if (buf[loop] == ' ')
00401                         break;
00402                 if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-'))
00403                         return 0;
00404         }
00405 */
00406         return 1;
00407 }
00408 
00409 
00410 char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, unsigned short *size)
00411 {
00412         char buf[8];
00413         //char buf2[20];
00414         //char ch;
00415         char loop=0;
00416         long offset2;
00417         int ch2, vs2, rm2;
00418         bool flag;
00419         long versestart = 0;
00420         long chapstart = 0;
00421         long bookstart = 0;
00422         long teststart = 0;
00423 
00424         memset(buf, ' ', 8);
00425 
00426         while (1) {
00427                 //cfile << "#" << buf << "#";
00428                 //if (lseek(fp, 0, SEEK_CUR) > 2000000)
00429                 //{
00430                 //      cfile << lseek(fp, 0, SEEK_CUR) << "\n";
00431                 //}
00432                 if (starttest(buf)) {
00433                         cfile << "\n{start of testament}\n";
00434                         //chapstart = lseek(fp, 0, SEEK_CUR) - 7;
00435                         teststart = lseek(fp, 0, SEEK_CUR) - 7;
00436                         testoffset = teststart;
00437                         memset(buf, ' ', 3);
00438                         flag = false;
00439                         for (loop = 3; loop < 6; loop++) {
00440                                 if (buf[loop]!='>')
00441                                         flag = true;
00442                                 else {
00443                                         buf[loop] = 0;
00444                                         break;
00445                                 }
00446                         }
00447                         ch2 = *num1;
00448                         vs2 = 1;
00449                         if (size) {
00450                                 if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) {
00451                                         testsize = (unsigned short) (lseek(fp, 0, SEEK_END) - teststart-7);
00452                                 }
00453                                 else {
00454                                         if (vs2) {
00455                                                 testsize = (offset2 - teststart - 7);
00456                                         }
00457                                 }
00458                                 lseek(fp, teststart+7, SEEK_SET);
00459                                 cfile << "\nGot testsize " << testsize << "\n";
00460                         }
00461                 }
00462 
00463 
00464                 if (startbook(buf)) {
00465                         cfile << "\n{start of book}\n";
00466                         bookmark++;
00467                         //chapstart = lseek(fp, 0, SEEK_CUR) - 7;
00468                         bookstart = lseek(fp, 0, SEEK_CUR) - 7;
00469                         bookoffset = bookstart;
00470                         memset(buf, ' ', 3);
00471                         flag = false;
00472                         for (loop = 3; loop < 6; loop++) {
00473                                 if (buf[loop]!='>')
00474                                         flag = true;
00475                                 else {
00476                                         buf[loop] = 0;
00477                                         break;
00478                                 }
00479                         }
00480                         if (size) {
00481                                 ch2 = *num1;
00482                                 vs2 = 1;
00483                                 if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) {
00484                                         booksize = (unsigned short) (lseek(fp, 0, SEEK_END) - bookstart - 7);
00485                                 }
00486                                 else {
00487                                         if (vs2) {
00488                                                 booksize = (offset2 - bookstart - 7);
00489                                         }
00490                                 }
00491                                 lseek(fp, bookstart+7, SEEK_SET);
00492                                 cfile << "\nGot booksize " << booksize << "\n";
00493                         }
00494                 }
00495 
00496                 if (startchap(buf)) {
00497                         cfile << "{start of chapter}";
00498                         chapmark++;
00499                         //chapstart = lseek(fp, 0, SEEK_CUR) - 7;
00500                         chapstart = lseek(fp, 0, SEEK_CUR) - 7;
00501                         chapoffset = chapstart;
00502                         memset(buf, ' ', 3);
00503                         flag = false;
00504                         for (loop = 3; loop < 6; loop++) {
00505                                 if (isdigit(buf[loop]))
00506                                         flag = true;
00507                                 else {
00508                                         buf[loop] = 0;
00509                                         break;
00510                                 }
00511                         }
00512                         if (flag)
00513                                 *num1 = atoi(buf);
00514                         else    (*num1)++;
00515 
00516                         if (size) {
00517                                 ch2 = *num1;
00518                                 vs2 = 1;
00519                                 lseek(fp, chapstart, SEEK_SET);
00520                                 if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) {
00521                                         chapsize = (unsigned short) (lseek(fp, 0, SEEK_END) - chapstart);
00522                                         cfile << "getting chapsizeend{" << chapsize << "} = " << lseek(fp, 0, SEEK_END) << " - " << chapstart << "\n";
00523                                 }
00524                                 else {
00525                                         if (vs2) {
00526                                                 chapsize = (offset2 - chapstart);
00527                                                 cfile << "getting chapsize{" << chapsize << "} = " << offset2 << " - " << chapstart << "\n";
00528                                         }
00529                                 }
00530                                 lseek(fp, chapstart + 7, SEEK_SET);
00531                                 cfile << "\nGot chapsize " << chapsize << " loop{" << (int) loop << "}\n";
00532                         }
00533                         //return 0;
00534 
00535                 }
00536                 if (startentry(buf)) {
00537                         //cfile << "{start of verse}";
00538                         memset(buf, ' ', 3);
00539                         flag = false;
00540                         for (loop = 3; loop < 6; loop++) {
00541                                 if (isdigit(buf[loop]))
00542                                         flag = true;
00543                                 else {
00544                                         buf[loop] = 0;
00545                                         break;
00546                                 }
00547                         if (flag)
00548                                 *num2 = atoi(buf);
00549                         else    (*num2)++;
00550                         }
00551                         loop++;
00552                         /*
00553                         if (size)
00554                         {
00555                                 // *offset = lseek(fp, 0, SEEK_CUR) - (7 - loop);
00556                                  *offset = lseek(fp, 0, SEEK_CUR) - 7;
00557                         }
00558                         //else  *offset = (chapstart) ? chapstart : lseek(fp, 0, SEEK_CUR) - 7;
00559                         else    *offset = (chapstart) ? chapstart : lseek(fp, 0, SEEK_CUR) - 7;
00560                         */
00561                         /*if (chapstart)
00562                         {
00563                                 chapsize = *offset-chapstart;
00564                         }
00565                         else
00566                         {
00567                                 chapsize = 0;
00568                         }*/
00569                         *offset = lseek(fp, 0, SEEK_CUR) - 7;
00570                         versestart = *offset;
00571                         if (size) {
00572                                 ch2 = *num1;
00573                                 vs2 = *num2;
00574                                 if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) {
00575                                         *size = (unsigned short) (lseek(fp, 0, SEEK_END) - versestart);
00576                                         cfile << "getting sizeend{" << *size << "} = " << lseek(fp, 0, SEEK_END) << " - " << versestart << "\n";
00577                                 }
00578                                 else {
00579                                         if (vs2) {
00580                                                 *size = (offset2 - versestart);
00581                                                 cfile << "getting size{" << *size << "} = " << offset2 << " - " << versestart << "\n";
00582                                         }
00583                                 }
00584                                 lseek(fp, *offset+1, SEEK_SET);
00585                         }
00586                         else
00587                         {
00588                                 cfile << "got offset{" << *offset << "}\n";
00589                         }
00590                         return 0;
00591                 }
00592                 //cfile << "{ng}";
00593                 //deadcount++;
00594                 //if (deadcount==1000) exit(-1);
00595                 //if (!size)
00596                 //{
00597                 //      cfile << "not bound offset{" << *offset << "}\n";
00598                 //}
00599                 memmove(buf, &buf[1], 6);
00600                 if (read(fp, &buf[6], 1) != 1)
00601                         return 1;
00602         }
00603 }
00604 
00605 
00606 void openfiles(char *fname)
00607 {
00608 #ifndef O_BINARY                // O_BINARY is needed in Borland C++ 4.53
00609 #define O_BINARY 0              // If it hasn't been defined than we probably
00610 #endif                          // don't need it.
00611         char buf[255];
00612 
00613         if ((fp = open(fname, O_RDONLY|O_BINARY)) == -1) {
00614                 fprintf(stderr, "Couldn't open file: %s\n", fname);
00615                 exit(1);
00616         }
00617 
00618         sprintf(buf, "%s.vss", fname);
00619         if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) {
00620                 fprintf(stderr, "Couldn't open file: %s\n", buf);
00621                 exit(1);
00622         }
00623 
00624         sprintf(buf, "%s.cps", fname);
00625         if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) {
00626                 fprintf(stderr, "Couldn't open file: %s\n", buf);
00627                 exit(1);
00628         }
00629 
00630         sprintf(buf, "%s.bks", fname);
00631         if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) {
00632                 fprintf(stderr, "Couldn't open file: %s\n", buf);
00633                 exit(1);
00634         }
00635         cfile.open("gbfidx.log", ios::out);
00636         if (!cfile.is_open())
00637         {
00638                 cerr << "Failed to open log file\n";
00639                 exit(-1);
00640         }
00641 }
00642 
00643 
00644 void checkparams(int argc, char **argv)
00645 {
00646         if (argc < 2) {
00647                 fprintf(stderr, "usage: %s <file to process> [nt - for new testmt file]\n", argv[0]);
00648                 exit(1);
00649         }
00650         if (!strcmp(argv[1], "nt"))
00651                 key1 = key2 = key3 = "Matthew 1:1";
00652         else if (!strcmp(argv[1], "ot"))
00653         {
00654                         key1 = key2 = key3 = "Genesis 1:1";
00655         }
00656         else
00657         {
00658                 cerr << "File must be ot or nt\n";
00659                 exit(-1);
00660         }
00661 }

Generated on Thu Jun 20 22:12:59 2002 for The Sword Project by doxygen1.2.15