Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

rawtxt2z.cpp

00001 // Compression on variable granularity
00002 
00003 #include <fcntl.h>
00004 #include <iostream>
00005 #include <fstream>
00006 #include <string>
00007 
00008 #ifndef __GNUC__
00009 #include <io.h>
00010 #else
00011 #include <unistd.h>
00012 #endif
00013 
00014 #include <zlib.h>
00015 #include <versekey.h>
00016 
00017 int iBufSize, ulBuffNum;
00018 ofstream cfile;
00019 ofstream cfile2;
00020 
00021 int ofd[2], oxfd[2], ovxfd[2];
00022 int ifd[2], ixfd[2];
00023 int itestfd[2], itestxfd[2];
00024 unsigned long ulIOff=0, ulCOff=0, ulFOff=0, ulNone=0;
00025 string currbuff="";
00026 
00027 
00028 int openreadfile(char *buffer, char *path, const char *filename)
00029 {
00030         int filenum;
00031         sprintf(buffer, "%s/%s", path, filename);
00032         cfile << buffer << "\n";
00033         filenum =  open(buffer, O_RDONLY|O_BINARY);
00034         if (filenum > 0)
00035         {
00036                 return filenum;
00037         }
00038         else
00039         {
00040                 cerr << "failed to open file to read\n";
00041                 exit(-1);
00042         }
00043 }
00044 
00045 int openwritefile(char *buffer, char *path, const char *filename)
00046 {
00047         int filenum;
00048         sprintf(buffer, "%s/%s", path, filename);
00049         cfile << buffer << "\n";
00050         filenum =  open(buffer, O_WRONLY|O_BINARY|O_CREAT|O_TRUNC);
00051         if (filenum > 0)
00052         {
00053                 return filenum;
00054         }
00055         else
00056         {
00057                 cerr << "failed to open file to read\n";
00058                 exit(-1);
00059         }
00060 }
00061 
00062 int bytebound(unsigned long offset, VerseKey &thekey)
00063 {
00064         unsigned long bufferoff;
00065         cfile << "byteboundtest " << thekey << "\n";
00066         bufferoff = iBufSize * (ulBuffNum+1);
00067         if (offset > bufferoff)
00068         {
00069                 return 1;
00070         }
00071         else
00072         {
00073                 return 0;
00074         }
00075 }
00076 
00077 int versebound(unsigned long offset, VerseKey &thekey)
00078 {
00079         cfile << "verseboundtest " << thekey << "\n";
00080         return 1;
00081 }
00082 
00083 int chapterbound(unsigned long offset, VerseKey &thekey)
00084 {
00085         VerseKey testkey;
00086         testkey = thekey;
00087         testkey++;
00088         //cfile << "chapterboundtest " << testkey;
00089         if (testkey.Verse()==1 || (!thekey.compare("Revelation of John 22:21")))
00090         {
00091                 //cfile << " 1\n";
00092                 return 1;
00093         }
00094         else
00095         {
00096                 //cfile << " 0\n";
00097                 return 0;
00098         }
00099 }
00100 
00101 int bookbound(unsigned long offset, VerseKey &thekey)
00102 {
00103         VerseKey testkey;
00104         testkey = thekey;
00105         cfile << "bookboundtest " << testkey << "\n";
00106         testkey++;
00107         if (testkey.Chapter()==1 || (!thekey.compare("Revelation of John 22:21")))
00108         {
00109                 return 1;
00110         }
00111         else
00112         {
00113                 return 0;
00114         }
00115 }
00116 
00117 
00118 typedef int (*boundfunc)(unsigned long offset, VerseKey &thekey);
00119 
00120 int writeblock(int i)
00121 {
00122         char *destbuff=NULL;
00123         unsigned long compsize = 0, buffsize=0;
00124 
00125         cfile << "compressing block\n";
00126         // compress current buffer
00127         buffsize = currbuff.length();
00128         write(itestfd[i], currbuff.c_str(), buffsize);
00129         compsize = (unsigned long) (buffsize*1.01)+20;  // at least 1% bigger than buffer + 12 bytes
00130         //cfile << "{" << compsize << "}";
00131         //destbuff = (char *) calloc(compsize + 1, 1);
00132         destbuff = new char[compsize];
00133         if (compress((Bytef*)destbuff, &compsize, (const Bytef*)currbuff.c_str(), buffsize)!=Z_OK)
00134         {
00135                 cerr << "Could not compress buffer: exiting\n";
00136                 delete[] destbuff;
00137                 exit(-1);
00138         }
00139         //cout << "Compressed buffer{" << compsize << "}\n" << destbuff << "\n";
00140         //cout.flush();
00141         // write to compressed file index
00142         ulCOff = lseek(ofd[i], 0, SEEK_END);
00143         write(oxfd[i], &ulCOff, 4);    // offset in compressed file
00144         write(oxfd[i], &compsize, 4);    // compressed size
00145         write(oxfd[i], &buffsize, 4);    // uncompressed size
00146         cfile << buffsize << " -> " << compsize << "\n";
00147         cfile2 << "Compressed{" << compsize << "}\n" << destbuff << "\n";
00148         cfile2.flush();
00149 
00150         //write compressed buffer to file
00151         write(ofd[i], destbuff, compsize);
00152 
00153         //free(destbuff);
00154         delete[] destbuff;
00155 
00156         currbuff = "";
00157         ulBuffNum++;
00158         ulIOff = 0;
00159         return 1;
00160 }
00161 
00162 
00163 
00164 int main(int argc, char **argv)
00165 {
00166         VerseKey key1, key2, key3;
00167         int i;
00168         char xbuff[64];
00169         unsigned long offset;
00170         unsigned short size=0;
00171         unsigned long ulsize=0;
00172         char *tmpbuf=NULL;
00173         int iType;
00174         boundfunc blockbound[4] = {bytebound, versebound, chapterbound, bookbound};
00175         bool newbook=true, newchapter=true, newtestament = true, newmodule = true, lasttodo=true;
00176 
00177         if ((argc < 2) || (argc > 4)) {
00178                 cerr << "usage: " << argv[0] << " datapath [compression type [buffer size]]\n";
00179                 exit(1);
00180         }
00181 
00182         if (argc>2)
00183         {
00184                 iType = atoi(argv[2]);
00185                 if (argc==4)
00186                 {
00187                         iBufSize = atoi(argv[3]);
00188                 }
00189                 else
00190                 {
00191                         iBufSize = 1;
00192                 }
00193         }
00194         else
00195         {
00196                 iType = 2;
00197                 iBufSize = 1;
00198         }
00199 
00200         cfile.open("raw2z.log", ios::out);
00201         if (!cfile.is_open())
00202         {
00203                 cerr << "Failed to open log file\n";
00204                 exit(-1);
00205         }
00206         cfile2.open("raw2z.lg2", ios::out);
00207         if (!cfile2.is_open())
00208         {
00209                 cerr << "Failed to open log file\n";
00210                 exit(-1);
00211         }
00212         cfile << iType << " " << iBufSize << "\n";
00213 
00214         if ((iType<=0) || (iType > 4) || !iBufSize || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help") || !strcmp(argv[1], "/?") || !strcmp(argv[1], "-help"))
00215         {
00216                 cfile << argv[0] << " - a tool to create compressed Sword modules\n";
00217                 cfile << "version 0.1\n\n";
00218                 cfile << "usage: "<< argv[0] << " datapath [compression type [buffer size]]\n\n";
00219                 cfile << "datapath: the directory in which to find the raw module\n";
00220                 cfile << "compression type: (default 2)\n" << "   1 - bytes\n" << "   2 - verses\n" << "   3 - chapters\n" << "   4 - books\n";
00221                 cfile << "buffer size (default 1): the number of the compression type in each block\n";
00222                 exit(1);
00223         }
00224 
00225         //zobj = new SWCompress();
00226         //rawdrv = new RawVerse(argv[1]);
00227 
00228 #ifndef O_BINARY
00229 #define O_BINARY 0
00230 #endif
00231         cfile << "opening files\n";
00232 
00233         tmpbuf = new char [ strlen(argv[1]) + 11 ];
00234 
00235         //original files
00236         ifd[0] = openreadfile(tmpbuf, argv[1], "ot");
00237         ixfd[0] = openreadfile(tmpbuf, argv[1], "ot.vss");
00238         ifd[1] = openreadfile(tmpbuf, argv[1], "nt");
00239         ixfd[1] = openreadfile(tmpbuf, argv[1], "nt.vss");
00240 
00241 switch ( iType) {
00242         case 1 :
00243                 ofd[0] = openwritefile(tmpbuf, argv[1], "ot.rzz");
00244                 oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.rzs");
00245                 ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.rzv");
00246                 ofd[1] = openwritefile(tmpbuf, argv[1], "nt.rzz");
00247                 oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.rzs");
00248                 ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.rzv");
00249                 //boundfunc = bytebound;
00250                 break;
00251         case 2 :
00252                 ofd[0] = openwritefile(tmpbuf, argv[1], "ot.vzz");
00253                 oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.vzs");
00254                 ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.vzv");
00255                 ofd[1] = openwritefile(tmpbuf, argv[1], "nt.vzz");
00256                 oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.vzs");
00257                 ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.vzv");
00258                 break;
00259         case 3 :
00260                 ofd[0] = openwritefile(tmpbuf, argv[1], "ot.czz");
00261                 oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.czs");
00262                 ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.czv");
00263                 ofd[1] = openwritefile(tmpbuf, argv[1], "nt.czz");
00264                 oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.czs");
00265                 ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.czv");
00266                 break;
00267         case 4 :
00268                 ofd[0] = openwritefile(tmpbuf, argv[1], "ot.bzz");
00269                 oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.bzs");
00270                 ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.bzv");
00271                 ofd[1] = openwritefile(tmpbuf, argv[1], "nt.bzz");
00272                 oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.bzs");
00273                 ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.bzv");
00274                 break;
00275         default:
00276                 cerr << "Unknown compression type\n";
00277                 exit(-1);
00278 }
00279         itestfd[0] = openwritefile(tmpbuf, argv[1], "ot.tst");
00280         itestfd[1] = openwritefile(tmpbuf, argv[1], "nt.tst");
00281         itestxfd[0] = openwritefile(tmpbuf, argv[1], "ot.tdx");
00282         itestxfd[1] = openwritefile(tmpbuf, argv[1], "nt.tdx");
00283 
00284 
00285         delete [] tmpbuf;
00286 
00287         //cfile << "about to start\n";
00288 
00289 for ( i=0; i<2; i++)
00290 {
00291         ulIOff=0, ulBuffNum=0;
00292         currbuff = "";
00293         key1 = (i == 1) ? "Matthew 1:1" : "Genesis 1:1";
00294         key2 = key3 = key1;
00295         newtestament = true;
00296 
00297         cfile << "key: " << key1 << " Testament {" << key1.Testament()-1 << "}\n";
00298         //cfile << "Chapter {" << key.Chapter() << "}\n";
00299         //cfile << "Verse {" << key.Verse() << "}\n";
00300         //cfile << key.compare("Revelation of John 22:21") << "\n";
00301         //cfile << key.compare("Genesis 1:1") << "\n";
00302         do
00303         {
00304                 //cfile << "ok";
00305                 // read current verse offset
00306                 if (read(ixfd[i], &offset, 4) != 4)
00307                 {
00308                         cfile << "Failed to read input verse offsets?\n";
00309                         break;
00310                 }
00311                 if (read(ixfd[i], &size, 2) != 2)
00312                 {
00313                         cfile << "Failed to read input verse sizes?\n";
00314                         break;
00315                 }
00316                 cfile << "key:" << key1 << " offset:" << offset << " size:" << size << "\n";
00317                 sprintf(xbuff, "key{%s} offset{%ld} size{%d}\n", (const char *)key1, offset, size);
00318                 write(itestxfd[i], &xbuff, strlen(xbuff));
00319                 ulsize = size;
00320                 if (!offset && !size)
00321                 {
00322                         //Check for module header
00323                         if (read(ixfd[i], &ulIOff, 4) != 4)
00324                         {
00325                                 cfile << "Failed to read input verse offsets?\n";
00326                                 break;
00327                         }
00328                         ulsize = ulIOff;
00329                         ulIOff = 0;
00330                         lseek(ixfd[i], 6, SEEK_SET);
00331                 }
00332 
00333                 if (ulsize)
00334                 {
00335                         // read current verse and add to current buffer
00336                         tmpbuf = (char *) calloc(ulsize + 1, 1);
00337                         lseek(ifd[i], offset, SEEK_SET);
00338                         read(ifd[i], tmpbuf, ulsize);
00339                         currbuff += tmpbuf;
00340                         //cfile << currbuff << "\n";
00341 
00342                         // write to verse index into compressed
00343                         write(ovxfd[i], &ulBuffNum, 4);    // current buffer number
00344                         write(ovxfd[i], &ulIOff, 4);    // offset within the buffer
00345                         write(ovxfd[i], &size, 2);    // verse size
00346 
00347                         ulFOff = lseek(ofd[i], 0, SEEK_CUR) + size;
00348                         if (key1.compare("Revelation of John 22:21")!=-1)
00349                         {
00350                                 lasttodo = false;
00351                         }
00352                         if (blockbound[iType-1](ulFOff, key1)/*at block boudary*/)
00353                         {
00354                                 writeblock(i);
00355                                 /*
00356                                 cfile << "compressing block\n";
00357                                 // compress current buffer
00358                                 buffsize = currbuff.length();
00359                                 write(itestfd[i], currbuff.c_str(), buffsize);
00360                                 compsize = (unsigned long) (buffsize*1.01)+20;  // at least 1% bigger than buffer + 12 bytes
00361                                 //cfile << "{" << compsize << "}";
00362                                 //destbuff = (char *) calloc(compsize + 1, 1);
00363                                 destbuff = new char[compsize];
00364                                 if (compress((Bytef*)destbuff, &compsize, (const Bytef*)currbuff.c_str(), buffsize)!=Z_OK)
00365                                 {
00366                                         cerr << "Could not compress buffer: exiting\n";
00367                                         delete[] destbuff;
00368                                         exit(-1);
00369                                 }
00370                                 //cout << "Compressed buffer{" << compsize << "}\n" << destbuff << "\n";
00371                                 //cout.flush();
00372                                 // write to compressed file index
00373                                 ulCOff = lseek(ofd[i], 0, SEEK_END);
00374                                 write(oxfd[i], &ulCOff, 4);    // offset in compressed file
00375                                 write(oxfd[i], &compsize, 4);    // compressed size
00376                                 write(oxfd[i], &buffsize, 4);    // uncompressed size
00377                                 cfile << buffsize << " -> " << compsize << "\n";
00378                                 cfile2 << "Compressed{" << compsize << "}\n" << destbuff << "\n";
00379                                 cfile2.flush();
00380 
00381                                 //write compressed buffer to file
00382                                 write(ofd[i], destbuff, compsize);
00383 
00384                                 //free(destbuff);
00385                                 delete[] destbuff;
00386 
00387                                 currbuff = "";
00388                                 ulBuffNum++;
00389                                 ulIOff = 0;
00390                                 */
00391                         }
00392                         else
00393                         {
00394                                 ulIOff += ulsize;
00395                         }
00396                         free(tmpbuf);
00397 
00398                         if (newmodule)
00399                         {
00400                                 newmodule = false;
00401                                 cfile << "had a new module " << (const char *) key1 << "{" << offset << "}\n";
00402                                 writeblock(i);
00403                         }
00404                         else if (newtestament)
00405                         {
00406                                 newtestament = false;
00407                                 cfile << "had a new testament " << (const char *) key1 << "{" << offset << "}\n";
00408                         }
00409                         else if (newbook)
00410                         {
00411                                 newbook = false;
00412                                 cfile << "had a new book " << (const char *) key1 << "{" << offset << "}\n";
00413                         }
00414                         else if (newchapter)
00415                         {
00416                                 newchapter = false;
00417                                 cfile << "had a new chapter " << (const char *) key1 << "{" << offset << "}\n";
00418                         }
00419                         else
00420                         {
00421                                 key1++;
00422                         }
00423 
00424                         if (key1.Chapter()!=key2.Chapter() || (key1.Book()!=key2.Book()))
00425                         {
00426                                 newchapter = true;
00427                                 cfile << "got a new chapter " << (const char *) key1 << "\n";
00428                         }
00429                         if (key1.Book()!=key2.Book())
00430                         {
00431                                 newbook = true;
00432                                 cfile << "got a new book " << (const char *) key1 << "\n";
00433                         }
00434                         key2 = key1;
00435 
00436                 }
00437                 else
00438                 {
00439                         cfile << "empty offset\n";
00440                         // write to verse index into compressed
00441                         write(ovxfd[i], &ulNone, 4);    // current buffer number
00442                         write(ovxfd[i], &size, 2);    // verse size
00443                         write(ovxfd[i], &ulNone, 4);    // offset within the buffer
00444                 }
00445         }
00446         while ( (key1.Testament()==i+1) && ((key1.compare("Revelation of John 22:21")==-1) || (lasttodo)));
00447 
00448         close(ifd[i]);
00449         close(ofd[i]);
00450         close(ixfd[i]);
00451         close(oxfd[i]);
00452         close(ovxfd[i]);
00453         close(itestfd[i]);
00454         close(itestxfd[i]);
00455 }
00456         return 1;
00457 }

Generated on Thu Jun 20 22:13:00 2002 for The Sword Project by doxygen1.2.15