/******************************************************************************
* rawtext.cpp - code for class 'RawText'- a module that reads raw text files:
* ot and nt using indexs ??.bks ??.cps ??.vss
*/
#include <stdio.h>
#include <fcntl.h>
#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif
#include <string.h>
#include <utilfuns.h>
#include <rawverse.h>
#include <rawtext.h>
#include <map>
#include <list>
#include <algorithm>
#include <regex.h> // GNU
#ifndef O_BINARY
#define O_BINARY 0
#endif
/******************************************************************************
* RawText Constructor - Initializes data for instance of RawText
*
* ENT: iname - Internal name for module
* idesc - Name to display to user for module
* idisp - Display object to use for displaying
*/
RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang)
: SWText(iname, idesc, idisp, enc, dir, mark, ilang),
RawVerse(ipath) {
string fname;
fname = path;
char ch = fname.c_str()[strlen(fname.c_str())-1];
if ((ch != '/') && (ch != '\\'))
fname += "/";
for (int loop = 0; loop < 2; loop++) {
fastSearch[loop] = 0;
string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat"));
if (!access(fastidxname.c_str(), 04)) {
fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx"));
if (!access(fastidxname.c_str(), 04))
fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str());
}
}
}
/******************************************************************************
* RawText Destructor - Cleans up instance of RawText
*/
RawText::~RawText()
{
if (fastSearch[0])
delete fastSearch[0];
if (fastSearch[1])
delete fastSearch[1];
}
/******************************************************************************
* RawText::getRawEntry - Returns the correct verse when char * cast
* is requested
*
* RET: string buffer with verse
*/
char *RawText::getRawEntry() {
long start = 0;
unsigned short size = 0;
VerseKey *key = 0;
// see if we have a VerseKey * or decendant
try {
key = SWDYNAMIC_CAST(VerseKey, this->key);
}
catch ( ... ) { }
// if we don't have a VerseKey * decendant, create our own
if (!key)
key = new VerseKey(this->key);
findoffset(key->Testament(), key->Index(), &start, &size);
entrySize = size; // support getEntrySize call
unsigned long newsize = (size + 2) * FILTERPAD;
if (newsize > entrybufallocsize) {
if (entrybuf)
delete [] entrybuf;
entrybuf = new char [ newsize ];
entrybufallocsize = newsize;
}
*entrybuf = 0;
readtext(key->Testament(), start, (size + 2), entrybuf);
entrybuf[size] = 0;
rawFilter(entrybuf, size, 0); // hack, decipher
rawFilter(entrybuf, size, key);
if (!isUnicode())
preptext(entrybuf);
if (this->key != key) // free our key if we created a VerseKey
delete key;
return entrybuf;
}
signed char RawText::createSearchFramework() {
SWKey *savekey = 0;
SWKey *searchkey = 0;
SWKey textkey;
char *word = 0;
char *wordBuf = 0;
// dictionary holds words associated with a list
// containing every module position that contains
// the word. [0] Old Testament; [1] NT
map < string, list<long> > dictionary[2];
// save key information so as not to disrupt original
// module position
if (!key->Persist()) {
savekey = CreateKey();
*savekey = *key;
}
else savekey = key;
searchkey = (key->Persist())?key->clone():0;
if (searchkey) {
searchkey->Persist(1);
setKey(*searchkey);
}
// position module at the beginning
*this = TOP;
VerseKey *lkey = (VerseKey *)key;
// iterate thru each entry in module
while (!Error()) {
long index = lkey->Index();
wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1);
strcpy(wordBuf, StripText());
// grab each word from the text
word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>");
while (word) {
// make word upper case
toupperstr(word);
// lookup word in dictionary (or make entry in dictionary
// for this word) and add this module position (index) to
// the word's associated list of module positions
dictionary[lkey->Testament()-1][word].push_back(index);
word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>");
}
free(wordBuf);
(*this)++;
}
// reposition module back to where it was before we were called
setKey(*savekey);
if (!savekey->Persist())
delete savekey;
if (searchkey)
delete searchkey;
// --------- Let's output an index from our dictionary -----------
int datfd;
int idxfd;
map < string, list<long> >::iterator it;
list<long>::iterator it2;
unsigned long offset, entryoff;
unsigned short size;
string fname;
fname = path;
char ch = fname.c_str()[strlen(fname.c_str())-1];
if ((ch != '/') && (ch != '\\'))
fname += "/";
// for old and new testament do...
for (int loop = 0; loop < 2; loop++) {
if ((datfd = open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1)
return -1;
if ((idxfd = open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) {
close(datfd);
return -1;
}
// iterate thru each word in the dictionary
for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) {
printf("%s: ", it->first.c_str());
// get our current offset in our word.dat file and write this as the start
// of the next entry in our database
offset = lseek(datfd, 0, SEEK_CUR);
write(idxfd, &offset, 4);
// write our word out to the word.dat file, delineating with a \n
write(datfd, it->first.c_str(), strlen(it->first.c_str()));
write(datfd, "\n", 1);
// force our mod position list for this word to be unique (remove
// duplicates that may exist if the word was found more than once
// in the verse
it->second.unique();
// iterate thru each mod position for this word and output it to
// our database
unsigned short count = 0;
for (it2 = it->second.begin(); it2 != it->second.end(); it2++) {
entryoff= *it2;
write(datfd, &entryoff, 4);
count++;
}
// now see what our new position is in our word.dat file and
// determine the size of this database entry
size = lseek(datfd, 0, SEEK_CUR) - offset;
// store the size of this database entry
write(idxfd, &size, 2);
printf("%d entries (size: %d)\n", count, size);
}
close(datfd);
close(idxfd);
}
return 0;
}
/******************************************************************************
* SWModule::Search - Searches a module for a string
*
* ENT: istr - string for which to search
* searchType - type of search to perform
* >=0 - regex
* -1 - phrase
* -2 - multiword
* flags - options flags for search
* justCheckIfSupported - if set, don't search, only tell if this
* function supports requested search.
*
* RET: listkey set to verses that contain istr
*/
ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData)
{
listkey.ClearList();
if ((fastSearch[0]) && (fastSearch[1])) {
switch (searchType) {
case -2: {
if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to
// ignore case
break; // can't handle fast case sensitive searches
// test to see if our scope for this search is bounded by a
// VerseKey
VerseKey *testKeyType = 0;
try {
testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
}
catch ( ... ) {}
// if we don't have a VerseKey * decendant we can't handle
// because of scope.
// In the future, add bool SWKey::isValid(const char *tryString);
if (!testKeyType)
break;
// check if we just want to see if search is supported.
// If we've gotten this far, then it is supported.
if (justCheckIfSupported) {
*justCheckIfSupported = true;
return listkey;
}
SWKey saveKey = *testKeyType; // save current place
char error = 0;
char **words = 0;
char *wordBuf = 0;
int wordCount = 0;
long start;
unsigned short size;
char *idxbuf = 0;
char *datbuf = 0;
list <long> indexes;
list <long> indexes2;
VerseKey vk;
vk = TOP;
(*percent)(10, percentUserData);
// toupper our copy of search string
stdstr(&wordBuf, istr);
toupperstr(wordBuf);
// get list of individual words
words = (char **)calloc(sizeof(char *), 10);
int allocWords = 10;
words[wordCount] = strtok(wordBuf, " ");
while (words[wordCount]) {
wordCount++;
if (wordCount == allocWords) {
allocWords+=10;
words = (char **)realloc(words, sizeof(char *)*allocWords);
}
words[wordCount] = strtok(NULL, " ");
}
(*percent)(20, percentUserData);
// clear our result set
indexes.erase(indexes.begin(), indexes.end());
// search both old and new testament indexes
for (int j = 0; j < 2; j++) {
// iterate thru each word the user passed to us.
for (int i = 0; i < wordCount; i++) {
// clear this word's result set
indexes2.erase(indexes2.begin(), indexes2.end());
error = 0;
// iterate thru every word in the database that starts
// with our search word
for (int away = 0; !error; away++) {
idxbuf = 0;
// find our word in the database and jump ahead _away_
error = fastSearch[j]->findoffset(words[i], &start, &size, away);
// get the word from the database
fastSearch[j]->getidxbufdat(start, &idxbuf);
// check to see if it starts with our target word
if (strlen(idxbuf) > strlen(words[i]))
idxbuf[strlen(words[i])] = 0;
// else words[i][strlen(idxbuf)] = 0;
if (!strcmp(idxbuf, words[i])) {
// get data for this word from database
free(idxbuf);
idxbuf = 0;
datbuf = 0;
fastSearch[j]->readtext(start, &size, &idxbuf, &datbuf);
// we know that the data consists of sizof(long)
// records each a valid module position that constains
// this word
//
// iterate thru each of these module positions
long *keyindex = (long *)datbuf;
while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) {
if (i) { // if we're not on our first word
// check to see if this word is already in the result set.
// This is our AND functionality
if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end())
// add to new result set
indexes2.push_back(*keyindex);
}
else indexes2.push_back(*keyindex);
keyindex++;
}
free(datbuf);
}
else error = 1; // no more matches
free(idxbuf);
}
// make new result set final result set
indexes = indexes2;
percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData);
}
// indexes contains our good verses, lets return them in a listkey
indexes.sort();
// iterate thru each good module position that meets the search
for (list <long>::iterator it = indexes.begin(); it != indexes.end(); it++) {
// set a temporary verse key to this module position
vk.Testament(j+1);
vk.Error();
vk.Index(*it);
// check scope
// Try to set our scope key to this verse key
if (scope) {
*testKeyType = vk;
// check to see if it set ok and if so, add to our return list
if (*testKeyType == vk)
listkey << (const char *) vk;
}
else listkey << (const char*) vk;
}
}
(*percent)(98, percentUserData);
free(words);
free(wordBuf);
*testKeyType = saveKey; // set current place back to original
listkey = TOP;
(*percent)(100, percentUserData);
return listkey;
}
default:
break;
}
}
// check if we just want to see if search is supported
if (justCheckIfSupported) {
*justCheckIfSupported = false;
return listkey;
}
// if we don't support this search, fall back to base class
return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
}
void RawText::setEntry(const char *inbuf, long len) {
VerseKey *key = 0;
// see if we have a VerseKey * or decendant
try {
key = SWDYNAMIC_CAST(VerseKey, this->key);
}
catch ( ... ) {}
// if we don't have a VerseKey * decendant, create our own
if (!key)
key = new VerseKey(this->key);
settext(key->Testament(), key->Index(), inbuf, len);
if (this->key != key) // free our key if we created a VerseKey
delete key;
}
void RawText::linkEntry(const SWKey *inkey) {
VerseKey *destkey = 0;
const VerseKey *srckey = 0;
// see if we have a VerseKey * or decendant
try {
destkey = SWDYNAMIC_CAST(VerseKey, this->key);
}
catch ( ... ) {}
// if we don't have a VerseKey * decendant, create our own
if (!destkey)
destkey = new VerseKey(this->key);
// see if we have a VerseKey * or decendant
try {
srckey = SWDYNAMIC_CAST(VerseKey, inkey);
}
catch ( ... ) {}
// if we don't have a VerseKey * decendant, create our own
if (!srckey)
srckey = new VerseKey(inkey);
linkentry(destkey->Testament(), destkey->Index(), srckey->Index());
if (this->key != destkey) // free our key if we created a VerseKey
delete destkey;
if (inkey != srckey) // free our key if we created a VerseKey
delete srckey;
}
/******************************************************************************
* RawText::deleteEntry - deletes this entry
*
* RET: *this
*/
void RawText::deleteEntry() {
VerseKey *key = 0;
try {
key = SWDYNAMIC_CAST(VerseKey, this->key);
}
catch ( ... ) {}
if (!key)
key = new VerseKey(this->key);
settext(key->Testament(), key->Index(), "");
if (key != this->key)
delete key;
}
/******************************************************************************
* RawText::increment - Increments module key a number of entries
*
* ENT: increment - Number of entries to jump forward
*
* RET: *this
*/
void RawText::increment(int steps) {
long start;
unsigned short size;
VerseKey *tmpkey = 0;
try {
tmpkey = SWDYNAMIC_CAST(VerseKey, key);
}
catch ( ... ) {}
if (!tmpkey)
tmpkey = new VerseKey(key);
findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
SWKey lastgood = *tmpkey;
while (steps) {
long laststart = start;
unsigned short lastsize = size;
SWKey lasttry = *tmpkey;
(steps > 0) ? (*key)++ : (*key)--;
if (tmpkey != key)
delete tmpkey;
tmpkey = 0;
try {
tmpkey = SWDYNAMIC_CAST(VerseKey, key);
}
catch ( ... ) {}
if (!tmpkey)
tmpkey = new VerseKey(key);
if ((error = key->Error())) {
*key = lastgood;
break;
}
long index = tmpkey->Index();
findoffset(tmpkey->Testament(), index, &start, &size);
if (
(((laststart != start) || (lastsize != size)) // we're a different entry
&& (start > 0) && (size)) // and we actually have a size
||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
steps += (steps < 0) ? 1 : -1;
lastgood = *tmpkey;
}
}
error = (error) ? KEYERR_OUTOFBOUNDS : 0;
if (tmpkey != key)
delete tmpkey;
}