00001
00002
00003
00004
00005
00006
00007 #include <stdio.h>
00008 #include <fcntl.h>
00009
00010 #ifndef __GNUC__
00011 #include <io.h>
00012 #else
00013 #include <unistd.h>
00014 #endif
00015
00016 #include <string.h>
00017 #include <utilfuns.h>
00018 #include <rawverse.h>
00019 #include <rawtext.h>
00020
00021 #include <map>
00022 #include <list>
00023 #include <algorithm>
00024 #include <regex.h>
00025
00026 #ifndef O_BINARY
00027 #define O_BINARY 0
00028 #endif
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang)
00039 : SWText(iname, idesc, idisp, enc, dir, mark, ilang),
00040 RawVerse(ipath) {
00041
00042 string fname;
00043 fname = path;
00044 char ch = fname.c_str()[strlen(fname.c_str())-1];
00045 if ((ch != '/') && (ch != '\\'))
00046 fname += "/";
00047
00048 for (int loop = 0; loop < 2; loop++) {
00049 fastSearch[loop] = 0;
00050 string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat"));
00051 if (!access(fastidxname.c_str(), 04)) {
00052 fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx"));
00053 if (!access(fastidxname.c_str(), 04))
00054 fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str());
00055 }
00056 }
00057 }
00058
00059
00060
00061
00062
00063
00064 RawText::~RawText()
00065 {
00066 if (fastSearch[0])
00067 delete fastSearch[0];
00068
00069 if (fastSearch[1])
00070 delete fastSearch[1];
00071 }
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 char *RawText::getRawEntry() {
00082 long start = 0;
00083 unsigned short size = 0;
00084 VerseKey *key = 0;
00085
00086
00087 #ifndef _WIN32_WCE
00088 try {
00089 #endif
00090 key = SWDYNAMIC_CAST(VerseKey, this->key);
00091 #ifndef _WIN32_WCE
00092 }
00093 catch ( ... ) { }
00094 #endif
00095
00096 if (!key)
00097 key = new VerseKey(this->key);
00098
00099 findoffset(key->Testament(), key->Index(), &start, &size);
00100 entrySize = size;
00101
00102 unsigned long newsize = (size + 2) * FILTERPAD;
00103 if (newsize > entrybufallocsize) {
00104 if (entrybuf)
00105 delete [] entrybuf;
00106 entrybuf = new char [ newsize ];
00107 entrybufallocsize = newsize;
00108 }
00109 *entrybuf = 0;
00110
00111 gettext(key->Testament(), start, (size + 2), entrybuf);
00112
00113 rawFilter(entrybuf, size, key);
00114
00115 if (!isUnicode())
00116 preptext(entrybuf);
00117
00118 if (this->key != key)
00119 delete key;
00120
00121 return entrybuf;
00122 }
00123
00124
00125 signed char RawText::createSearchFramework() {
00126 SWKey *savekey = 0;
00127 SWKey *searchkey = 0;
00128 SWKey textkey;
00129 char *word = 0;
00130 char *wordBuf = 0;
00131
00132
00133
00134
00135 map < string, list<long> > dictionary[2];
00136
00137
00138
00139
00140 if (!key->Persist()) {
00141 savekey = CreateKey();
00142 *savekey = *key;
00143 }
00144 else savekey = key;
00145
00146 searchkey = (key->Persist())?key->clone():0;
00147 if (searchkey) {
00148 searchkey->Persist(1);
00149 SetKey(*searchkey);
00150 }
00151
00152
00153 *this = TOP;
00154
00155 VerseKey *lkey = (VerseKey *)key;
00156
00157
00158 while (!Error()) {
00159 long index = lkey->Index();
00160 wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1);
00161 strcpy(wordBuf, StripText());
00162
00163
00164 word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>");
00165 while (word) {
00166
00167
00168 for (unsigned int i = 0; i < strlen(word); i++)
00169 word[i] = SW_toupper(word[i]);
00170
00171
00172
00173
00174 dictionary[lkey->Testament()-1][word].push_back(index);
00175 word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>");
00176 }
00177 free(wordBuf);
00178 (*this)++;
00179 }
00180
00181
00182 SetKey(*savekey);
00183
00184 if (!savekey->Persist())
00185 delete savekey;
00186
00187 if (searchkey)
00188 delete searchkey;
00189
00190
00191
00192 int datfd;
00193 int idxfd;
00194 map < string, list<long> >::iterator it;
00195 list<long>::iterator it2;
00196 unsigned long offset, entryoff;
00197 unsigned short size;
00198
00199 string fname;
00200 fname = path;
00201 char ch = fname.c_str()[strlen(fname.c_str())-1];
00202 if ((ch != '/') && (ch != '\\'))
00203 fname += "/";
00204
00205
00206 for (int loop = 0; loop < 2; loop++) {
00207 if ((datfd = open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1)
00208 return -1;
00209 if ((idxfd = open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) {
00210 close(datfd);
00211 return -1;
00212 }
00213
00214
00215 for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) {
00216 printf("%s: ", it->first.c_str());
00217
00218
00219
00220 offset = lseek(datfd, 0, SEEK_CUR);
00221 write(idxfd, &offset, 4);
00222
00223
00224 write(datfd, it->first.c_str(), strlen(it->first.c_str()));
00225 write(datfd, "\n", 1);
00226
00227
00228
00229
00230 it->second.unique();
00231
00232
00233
00234 unsigned short count = 0;
00235 for (it2 = it->second.begin(); it2 != it->second.end(); it2++) {
00236 entryoff= *it2;
00237 write(datfd, &entryoff, 4);
00238 count++;
00239 }
00240
00241
00242
00243 size = lseek(datfd, 0, SEEK_CUR) - offset;
00244
00245
00246 write(idxfd, &size, 2);
00247 printf("%d entries (size: %d)\n", count, size);
00248 }
00249 close(datfd);
00250 close(idxfd);
00251 }
00252 return 0;
00253 }
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271 ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData)
00272 {
00273 listkey.ClearList();
00274
00275 if ((fastSearch[0]) && (fastSearch[1])) {
00276
00277 switch (searchType) {
00278 case -2: {
00279
00280 if ((flags & REG_ICASE) != REG_ICASE)
00281
00282 break;
00283
00284
00285
00286 VerseKey *testKeyType = 0;
00287 #ifndef _WIN32_WCE
00288 try {
00289 #endif
00290 testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
00291 #ifndef _WIN32_WCE
00292 }
00293 catch ( ... ) {}
00294 #endif
00295
00296
00297
00298 if (!testKeyType)
00299 break;
00300
00301
00302
00303
00304 if (justCheckIfSupported) {
00305 *justCheckIfSupported = true;
00306 return listkey;
00307 }
00308
00309 SWKey saveKey = *testKeyType;
00310
00311 char error = 0;
00312 char **words = 0;
00313 char *wordBuf = 0;
00314 int wordCount = 0;
00315 long start;
00316 unsigned short size;
00317 char *idxbuf = 0;
00318 char *datbuf = 0;
00319 list <long> indexes;
00320 list <long> indexes2;
00321 VerseKey vk;
00322 vk = TOP;
00323
00324 (*percent)(10, percentUserData);
00325
00326
00327 stdstr(&wordBuf, istr);
00328 for (unsigned int i = 0; i < strlen(wordBuf); i++)
00329 wordBuf[i] = SW_toupper(wordBuf[i]);
00330
00331
00332 words = (char **)calloc(sizeof(char *), 10);
00333 int allocWords = 10;
00334 words[wordCount] = strtok(wordBuf, " ");
00335 while (words[wordCount]) {
00336 wordCount++;
00337 if (wordCount == allocWords) {
00338 allocWords+=10;
00339 words = (char **)realloc(words, sizeof(char *)*allocWords);
00340 }
00341 words[wordCount] = strtok(NULL, " ");
00342 }
00343
00344 (*percent)(20, percentUserData);
00345
00346
00347 indexes.erase(indexes.begin(), indexes.end());
00348
00349
00350 for (int j = 0; j < 2; j++) {
00351
00352 for (int i = 0; i < wordCount; i++) {
00353
00354
00355 indexes2.erase(indexes2.begin(), indexes2.end());
00356 error = 0;
00357
00358
00359
00360 for (int away = 0; !error; away++) {
00361 idxbuf = 0;
00362
00363
00364 error = fastSearch[j]->findoffset(words[i], &start, &size, away);
00365
00366
00367 fastSearch[j]->getidxbufdat(start, &idxbuf);
00368
00369
00370 if (strlen(idxbuf) > strlen(words[i]))
00371 idxbuf[strlen(words[i])] = 0;
00372
00373 if (!strcmp(idxbuf, words[i])) {
00374
00375
00376 free(idxbuf);
00377 idxbuf = (char *)calloc(size+2, 1);
00378 datbuf = (char *)calloc(size+2, 1);
00379 fastSearch[j]->gettext(start, size + 2, idxbuf, datbuf);
00380
00381
00382
00383
00384
00385
00386 long *keyindex = (long *)datbuf;
00387 while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) {
00388 if (i) {
00389
00390
00391
00392 if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end())
00393
00394 indexes2.push_back(*keyindex);
00395 }
00396 else indexes2.push_back(*keyindex);
00397 keyindex++;
00398 }
00399 free(datbuf);
00400 }
00401 else error = 1;
00402 free(idxbuf);
00403 }
00404
00405
00406 indexes = indexes2;
00407
00408 percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData);
00409 }
00410
00411
00412 indexes.sort();
00413
00414
00415 for (list <long>::iterator it = indexes.begin(); it != indexes.end(); it++) {
00416
00417
00418 vk.Testament(j+1);
00419 vk.Error();
00420 vk.Index(*it);
00421
00422
00423
00424 if (scope) {
00425 *testKeyType = vk;
00426
00427
00428 if (*testKeyType == vk)
00429 listkey << (const char *) vk;
00430 }
00431 else listkey << (const char*) vk;
00432 }
00433 }
00434 (*percent)(98, percentUserData);
00435
00436 free(words);
00437 free(wordBuf);
00438
00439 *testKeyType = saveKey;
00440
00441 listkey = TOP;
00442 (*percent)(100, percentUserData);
00443 return listkey;
00444 }
00445
00446 default:
00447 break;
00448 }
00449 }
00450
00451
00452 if (justCheckIfSupported) {
00453 *justCheckIfSupported = false;
00454 return listkey;
00455 }
00456
00457
00458 return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
00459 }
00460
00461 #ifdef _MSC_VER
00462 SWModule &RawText::operator =(SW_POSITION p) {
00463 #else
00464 RawText &RawText::operator =(SW_POSITION p) {
00465 #endif
00466 SWModule::operator =(p);
00467 return *this;
00468 }
00469
00470 SWModule &RawText::setentry(const char *inbuf, long len) {
00471 VerseKey *key = 0;
00472
00473 #ifndef _WIN32_WCE
00474 try {
00475 #endif
00476 key = SWDYNAMIC_CAST(VerseKey, this->key);
00477 #ifndef _WIN32_WCE
00478 }
00479 catch ( ... ) {}
00480 #endif
00481
00482 if (!key)
00483 key = new VerseKey(this->key);
00484
00485 settext(key->Testament(), key->Index(), inbuf, len);
00486
00487 if (this->key != key)
00488 delete key;
00489
00490 return *this;
00491 }
00492
00493 SWModule &RawText::operator <<(const char *inbuf) {
00494 return setentry(inbuf, 0);
00495 }
00496
00497
00498 SWModule &RawText::operator <<(const SWKey *inkey) {
00499 VerseKey *destkey = 0;
00500 const VerseKey *srckey = 0;
00501
00502 #ifndef _WIN32_WCE
00503 try {
00504 #endif
00505 destkey = SWDYNAMIC_CAST(VerseKey, this->key);
00506 #ifndef _WIN32_WCE
00507 }
00508 catch ( ... ) {}
00509 #endif
00510
00511 if (!destkey)
00512 destkey = new VerseKey(this->key);
00513
00514
00515 #ifndef _WIN32_WCE
00516 try {
00517 #endif
00518 srckey = SWDYNAMIC_CAST(VerseKey, inkey);
00519 #ifndef _WIN32_WCE
00520 }
00521 catch ( ... ) {}
00522 #endif
00523
00524 if (!srckey)
00525 srckey = new VerseKey(inkey);
00526
00527 linkentry(destkey->Testament(), destkey->Index(), srckey->Index());
00528
00529 if (this->key != destkey)
00530 delete destkey;
00531
00532 if (inkey != srckey)
00533 delete srckey;
00534
00535 return *this;
00536 }
00537
00538
00539
00540
00541
00542
00543
00544
00545 void RawText::deleteEntry() {
00546
00547 VerseKey *key = 0;
00548
00549 #ifndef _WIN32_WCE
00550 try {
00551 #endif
00552 key = SWDYNAMIC_CAST(VerseKey, this->key);
00553 #ifndef _WIN32_WCE
00554 }
00555 catch ( ... ) {}
00556 #endif
00557 if (!key)
00558 key = new VerseKey(this->key);
00559
00560 settext(key->Testament(), key->Index(), "");
00561
00562 if (key != this->key)
00563 delete key;
00564 }
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574 SWModule &RawText::operator +=(int increment)
00575 {
00576 long start;
00577 unsigned short size;
00578 VerseKey *tmpkey = 0;
00579
00580 #ifndef _WIN32_WCE
00581 try {
00582 #endif
00583 tmpkey = SWDYNAMIC_CAST(VerseKey, key);
00584 #ifndef _WIN32_WCE
00585 }
00586 catch ( ... ) {}
00587 #endif
00588 if (!tmpkey)
00589 tmpkey = new VerseKey(key);
00590
00591 findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
00592
00593 SWKey lastgood = *tmpkey;
00594 while (increment) {
00595 long laststart = start;
00596 unsigned short lastsize = size;
00597 SWKey lasttry = *tmpkey;
00598 (increment > 0) ? (*key)++ : (*key)--;
00599 if (tmpkey != key)
00600 delete tmpkey;
00601 tmpkey = 0;
00602 #ifndef _WIN32_WCE
00603 try {
00604 #endif
00605 tmpkey = SWDYNAMIC_CAST(VerseKey, key);
00606 #ifndef _WIN32_WCE
00607 }
00608 catch ( ... ) {}
00609 #endif
00610 if (!tmpkey)
00611 tmpkey = new VerseKey(key);
00612
00613 if ((error = key->Error())) {
00614 *key = lastgood;
00615 break;
00616 }
00617 long index = tmpkey->Index();
00618 findoffset(tmpkey->Testament(), index, &start, &size);
00619 if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) {
00620 increment += (increment < 0) ? 1 : -1;
00621 lastgood = *tmpkey;
00622 }
00623 }
00624 error = (error) ? KEYERR_OUTOFBOUNDS : 0;
00625
00626 if (tmpkey != key)
00627 delete tmpkey;
00628
00629 return *this;
00630 }