diff options
Diffstat (limited to 'src/modules')
75 files changed, 15823 insertions, 0 deletions
diff --git a/src/modules/comments/hrefcom/hrefcom.cpp b/src/modules/comments/hrefcom/hrefcom.cpp new file mode 100644 index 0000000..200e21f --- /dev/null +++ b/src/modules/comments/hrefcom/hrefcom.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * hrefcom.cpp - code for class 'HREFCom'- a module that produces HTML HREFs + * pointing to actual text desired. Uses standard + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <hrefcom.h> + + + /****************************************************************************** + * HREFCom Constructor - Initializes data for instance of HREFCom + * + * ENT: iname - Internal name for module + * iprefix - string to prepend to each HREF (e.g. "file://mods/com/jfb/") + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +HREFCom::HREFCom(const char *ipath, const char *iprefix, const char *iname, const char *idesc, SWDisplay *idisp) : RawVerse(ipath), SWCom(iname, idesc, idisp) +{ + prefix = 0; + stdstr(&prefix, iprefix); +} + + +/****************************************************************************** + * HREFCom Destructor - Cleans up instance of HREFCom + */ + +HREFCom::~HREFCom() +{ + if (prefix) + delete [] prefix; +} + + +/****************************************************************************** + * HREFCom::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *HREFCom::getRawEntry() { + long start; + unsigned short size; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = ((size + 2) + strlen(prefix)) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + tmpbuf = new char [ size + 10 ]; + + readtext(key->Testament(), start, size + 2, tmpbuf); + sprintf(entrybuf, "%s%s", prefix, tmpbuf); + preptext(entrybuf); + + delete [] tmpbuf; + + if (key != this->key) + delete key; + + return entrybuf; +} diff --git a/src/modules/comments/rawcom/rawcom.cpp b/src/modules/comments/rawcom/rawcom.cpp new file mode 100644 index 0000000..f71b6ec --- /dev/null +++ b/src/modules/comments/rawcom/rawcom.cpp @@ -0,0 +1,221 @@ +/****************************************************************************** + * rawcom.cpp - code for class 'RawCom'- a module that reads raw commentary + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawcom.h> + + + /****************************************************************************** + * RawCom Constructor - Initializes data for instance of RawCom + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawCom::RawCom(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding encoding, SWTextDirection dir, SWTextMarkup markup, const char* ilang) + : RawVerse(ipath), + SWCom(iname, idesc, idisp, encoding, dir, markup, ilang){ +} + + +/****************************************************************************** + * RawCom Destructor - Cleans up instance of RawCom + */ + +RawCom::~RawCom() +{ +} + + +/****************************************************************************** + * RawCom::getRawEntry() - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawCom::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + readtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (key != this->key) + delete key; + + return entrybuf; +} + + +/****************************************************************************** + * RawCom::increment - Increments module key a number of entries + * + * ENT: steps - Number of entries to jump forward + * + * RET: *this + */ + +void RawCom::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} + + +void RawCom::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void RawCom::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawCom::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawCom::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} diff --git a/src/modules/comments/rawfiles/rawfiles.cpp b/src/modules/comments/rawfiles/rawfiles.cpp new file mode 100644 index 0000000..c073a73 --- /dev/null +++ b/src/modules/comments/rawfiles/rawfiles.cpp @@ -0,0 +1,274 @@ +/****************************************************************************** + * rawfiles.cpp - code for class 'RawFiles'- a module that produces HTML HREFs + * pointing to actual text desired. Uses standard + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawfiles.h> +#include <filemgr.h> + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + + /****************************************************************************** + * RawFiles Constructor - Initializes data for instance of RawFiles + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawFiles::RawFiles(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawVerse(ipath, O_RDWR), SWCom(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawFiles Destructor - Cleans up instance of RawFiles + */ + +RawFiles::~RawFiles() +{ +} + + +/****************************************************************************** + * RawFiles::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawFiles::getRawEntry() { + FileDesc *datafile; + long start = 0; + unsigned short size = 0; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (entrybuf) + delete [] entrybuf; + + if (size) { + tmpbuf = new char [ (size + 2) + strlen(path) + 5 ]; + sprintf(tmpbuf,"%s/",path); + readtext(key->Testament(), start, (size + 2), tmpbuf+strlen(tmpbuf)); + datafile = FileMgr::systemFileMgr.open(tmpbuf, O_RDONLY|O_BINARY); + delete [] tmpbuf; + if (datafile->getFd() > 0) { + size = lseek(datafile->getFd(), 0, SEEK_END); + entrybuf = new char [ size * FILTERPAD ]; + memset(entrybuf, 0, size * FILTERPAD); + lseek(datafile->getFd(), 0, SEEK_SET); + read(datafile->getFd(), entrybuf, size); +// preptext(entrybuf); + } + else { + entrybuf = new char [2]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + FileMgr::systemFileMgr.close(datafile); + } + else { + entrybuf = new char [2]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + + if (key != this->key) + delete key; + + return entrybuf; +} + + +/****************************************************************************** + * RawFiles::setEntry(char *)- Update the modules current key entry with + * provided text + */ + +void RawFiles::setEntry(const char *inbuf, long len) { + FileDesc *datafile; + long start; + unsigned short size; + char *tmpbuf; + VerseKey *key = 0; + + len = (len<0)?strlen(inbuf):len; + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (size) { + tmpbuf = new char [ (size + 3) + strlen(path) + 1 ]; + sprintf(tmpbuf, "%s/", path); + readtext(key->Testament(), start, (size + 2), tmpbuf+strlen(tmpbuf)); + } + else { + tmpbuf = new char [ 16 + strlen(path) + 1 ]; + sprintf(tmpbuf, "%s/%s", path, getnextfilename()); + settext(key->Testament(), key->Index(), tmpbuf+strlen(path)+1); + } + datafile = FileMgr::systemFileMgr.open(tmpbuf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + delete [] tmpbuf; + if (datafile->getFd() > 0) { + write(datafile->getFd(), inbuf, len); + } + FileMgr::systemFileMgr.close(datafile); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * RawFiles::linkEntry(SWKey *)- Link the modules current key entry with + * another module entry + * + * RET: *this + */ + +void RawFiles::linkEntry(const SWKey *inkey) { + + long start; + unsigned short size; + char *tmpbuf; + const VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (size) { + tmpbuf = new char [ size + 2]; + readtext(key->Testament(), start, size + 2, tmpbuf); + + if (key != inkey) + delete key; + key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + settext(key->Testament(), key->Index(), tmpbuf); + } + + if (key != inkey) + delete key; +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawFiles::deleteEntry() { + + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * RawFiles::getnextfilename - generates a valid filename in which to store + * an entry + * + * RET: filename + */ + +char *RawFiles::getnextfilename() { + static char incfile[255]; + long number; + FileDesc *datafile; + + sprintf(incfile, "%s/incfile", path); + datafile = FileMgr::systemFileMgr.open(incfile, O_RDONLY|O_BINARY); + if (read(datafile->getFd(), &number, 4) != 4) + number = 0; + number++; + FileMgr::systemFileMgr.close(datafile); + + datafile = FileMgr::systemFileMgr.open(incfile, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + write(datafile->getFd(), &number, 4); + FileMgr::systemFileMgr.close(datafile); + sprintf(incfile, "%.7ld", number-1); + return incfile; +} + + +char RawFiles::createModule (const char *path) { + char *incfile = new char [ strlen (path) + 16 ]; + static long zero = 0; + FileDesc *datafile; + + sprintf(incfile, "%s/incfile", path); + datafile = FileMgr::systemFileMgr.open(incfile, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + delete [] incfile; + write(datafile->getFd(), &zero, 4); + FileMgr::systemFileMgr.close(datafile); + + return RawVerse::createModule (path); +} + + + diff --git a/src/modules/comments/swcom.cpp b/src/modules/comments/swcom.cpp new file mode 100644 index 0000000..1feb0cf --- /dev/null +++ b/src/modules/comments/swcom.cpp @@ -0,0 +1,30 @@ +/****************************************************************************** + * swcom.cpp - code for base class 'SWCom'- The basis for all commentary + * modules + */ + +#include <swcom.h> + + +/****************************************************************************** + * SWCom Constructor - Initializes data for instance of SWCom + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWCom::SWCom(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, "Commentaries", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); +} + + +/****************************************************************************** + * SWCom Destructor - Cleans up instance of SWCom + */ + +SWCom::~SWCom() +{ +} diff --git a/src/modules/comments/zcom/zcom.cpp b/src/modules/comments/zcom/zcom.cpp new file mode 100644 index 0000000..a0b35c3 --- /dev/null +++ b/src/modules/comments/zcom/zcom.cpp @@ -0,0 +1,255 @@ +/****************************************************************************** + * rawcom.cpp - code for class 'zCom'- a module that reads raw commentary + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <zverse.h> +#include <zcom.h> + + +/****************************************************************************** + * zCom Constructor - Initializes data for instance of zCom + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zCom::zCom(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWCom(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ +{ + blockType = iblockType; + lastWriteKey = 0; +} + +/****************************************************************************** + * zCom Destructor - Cleans up instance of zCom + */ + +zCom::~zCom() { + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; +} + +/****************************************************************************** + * zCom::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ +char *zCom::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + zreadtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +bool zCom::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + +void zCom::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, key)) { + flushCache(); + } + delete lastWriteKey; + } + + settext(key->Testament(), key->Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key->clone(); // must delete + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void zCom::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) { + } + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + +/****************************************************************************** + * zCom::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zCom::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * zCom::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void zCom::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} + diff --git a/src/modules/common/entriesblk.cpp b/src/modules/common/entriesblk.cpp new file mode 100644 index 0000000..d38cf53 --- /dev/null +++ b/src/modules/common/entriesblk.cpp @@ -0,0 +1,166 @@ +#include <entriesblk.h> +#include <stdlib.h> +#include <string.h> + +const int EntriesBlock::METAHEADERSIZE = 4; + // count(4); +const int EntriesBlock::METAENTRYSIZE = 8; + // offset(4); size(4); + +EntriesBlock::EntriesBlock(const char *iBlock, unsigned long size) { + block = (char *)calloc(1, size); + memcpy(block, iBlock, size); +} + + +EntriesBlock::EntriesBlock() { + block = (char *)calloc(1, sizeof(__u32)); +} + + +EntriesBlock::~EntriesBlock() { + free(block); +} + + +void EntriesBlock::setCount(int count) { + __u32 rawCount = archtosword32(count); + memcpy(block, &rawCount, sizeof(__u32)); +} + + +int EntriesBlock::getCount() { + __u32 count = 0; + memcpy(&count, block, sizeof(__u32)); + count = swordtoarch32(count); + return count; +} + + +void EntriesBlock::getMetaEntry(int index, unsigned long *offset, unsigned long *size) { + __u32 rawOffset = 0; + __u32 rawSize = 0; + *offset = 0; + *size = 0; + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(&rawOffset, block + METAHEADERSIZE + (index * METAENTRYSIZE), sizeof(rawOffset)); + memcpy(&rawSize, block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), sizeof(rawSize)); + + *offset = (unsigned long)swordtoarch32(rawOffset); + *size = (unsigned long)swordtoarch32(rawSize); +} + + +void EntriesBlock::setMetaEntry(int index, unsigned long offset, unsigned long size) { + __u32 rawOffset = archtosword32(offset); + __u32 rawSize = archtosword32(size); + + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE), &rawOffset, sizeof(rawOffset)); + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), &rawSize, sizeof(rawSize)); +} + + +const char *EntriesBlock::getRawData(unsigned long *retSize) { + unsigned long max = 4; + int loop; + unsigned long offset; + unsigned long size; + for (loop = 0; loop < getCount(); loop++) { + getMetaEntry(loop, &offset, &size); + max = ((offset + size) > max) ? (offset + size) : max; + } + *retSize = max; + return block; +} + + +int EntriesBlock::addEntry(const char *entry) { + unsigned long dataSize; + getRawData(&dataSize); + unsigned long len = strlen(entry); + unsigned long offset; + unsigned long size; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + // new meta entry + new data size + 1 because null + block = (char *)realloc(block, dataSize + METAENTRYSIZE + len + 1); + // shift right to make room for new meta entry + memmove(block + dataStart + METAENTRYSIZE, block + dataStart, dataSize - dataStart); + + for (int loop = 0; loop < count; loop++) { + getMetaEntry(loop, &offset, &size); + if (offset) { // if not a deleted entry + offset += METAENTRYSIZE; + setMetaEntry(loop, offset, size); + } + } + + offset = dataSize; // original dataSize before realloc + size = len + 1; + // add our text to the end + memcpy(block + offset + METAENTRYSIZE, entry, size); + // increment count + setCount(count + 1); + // add our meta entry + setMetaEntry(count, offset + METAENTRYSIZE, size); + // return index of our new entry + return count; +} + + +const char *EntriesBlock::getEntry(int entryIndex) { + unsigned long offset; + unsigned long size; + static char *empty = ""; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? block+offset : empty; +} + + +unsigned long EntriesBlock::getEntrySize(int entryIndex) { + unsigned long offset; + unsigned long size; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? size : 0; +} + + +void EntriesBlock::removeEntry(int entryIndex) { + unsigned long offset; + unsigned long size, size2; + unsigned long dataSize; + getRawData(&dataSize); + getMetaEntry(entryIndex, &offset, &size); + unsigned long len = size - 1; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + + if (!offset) // already deleted + return; + + // shift left to retrieve space used for old entry + memmove(block + offset, block + offset + size, dataSize - (offset + size)); + + // fix offset for all entries after our entry that were shifted left + for (int loop = entryIndex + 1; loop < count; loop++) { + getMetaEntry(loop, &offset, &size2); + if (offset) { // if not a deleted entry + offset -= size; + setMetaEntry(loop, offset, size2); + } + } + + // zero out our meta entry + setMetaEntry(entryIndex, 0L, 0); +} + + diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp new file mode 100644 index 0000000..3606fbc --- /dev/null +++ b/src/modules/common/lzsscomprs.cpp @@ -0,0 +1,665 @@ +/****************************************************************************** + * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that + * provides LZSS compression + */ + +#include <string.h> +#include <stdlib.h> +#include <lzsscomprs.h> + + +/****************************************************************************** + * LZSSCompress Statics + */ + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char LZSSCompress::m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int LZSSCompress::m_match_position; +short int LZSSCompress::m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int LZSSCompress::m_lson[N + 1]; +short int LZSSCompress::m_rson[N + 257]; +short int LZSSCompress::m_dad[N + 1]; + + +/****************************************************************************** + * LZSSCompress Constructor - Initializes data for instance of LZSSCompress + * + */ + +LZSSCompress::LZSSCompress() : SWCompress() { +} + + +/****************************************************************************** + * LZSSCompress Destructor - Cleans up instance of LZSSCompress + */ + +LZSSCompress::~LZSSCompress() { +} + + +/****************************************************************************** + * LZSSCompress::InitTree - This function initializes the tree nodes to + * "empty" states. + */ + +void LZSSCompress::InitTree(void) { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) { + m_rson[i] = NOT_USED; + } +} + + +/****************************************************************************** + * LZSSCompress::InsertNode - This function inserts a string from the ring + * buffer into one of the trees. It loads the + * match position and length member variables + * for the longest match. + * + * The string to be inserted is identified by + * the parameter Pos, A full F bytes are + * inserted. So, + * m_ring_buffer[Pos ... Pos+F-1] + * are inserted. + * + * If the matched length is exactly F, then an + * old node is removed in favor of the new one + * (because the old one will be deleted + * sooner). + * + * Note that Pos plays a dual role. It is + * used as both a position in the ring buffer + * and also as a tree node. + * m_ring_buffer[Pos] defines a character that + * is used to identify a tree node. + * + * ENT: pos - position in the buffer + */ + +void LZSSCompress::InsertNode(short int Pos) +{ + short int i; + short int p; + int cmp; + unsigned char * key; + +/* + ASSERT(Pos >= 0); + ASSERT(Pos < N); +*/ + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) { + if (cmp >= 0) { + if (m_rson[p] != NOT_USED) { + p = m_rson[p]; + } + else { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else { + if (m_lson[p] != NOT_USED) { + p = m_lson[p]; + } + else { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) { + m_rson[ m_dad[p] ] = Pos; + } + else { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::DeleteNode - This function removes the node "Node" from the + * tree. + * + * ENT: node - node to be removed + */ + +void LZSSCompress::DeleteNode(short int Node) +{ + short int q; + +/* + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); +*/ + + if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) { + q = m_rson[Node]; + } + else { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) { + do { + q = m_rson[q]; + } while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) { + m_rson[ m_dad[Node] ] = q; + } + else { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void LZSSCompress::Encode(void) +{ + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a <position,length> pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight <position,length> pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars((char *) &(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (!mask) { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars((char *) code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) { + // Get next character... + + if (GetChars((char *) &c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars((char *) code_buf, code_buf_pos); + } + + + // must set zlen for parent class to know length of compressed buffer + zlen = zpos; +} + + +/****************************************************************************** + * LZSSCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void LZSSCompress::Decode(void) +{ + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + unsigned long totalLen = 0; + + direct = 1; // set direction needed by parent [Get|Send]Chars() + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else { + // Next byte must be a flag. + + if (GetChars((char *) &flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) { + if (GetChars((char *) c, 1) != 1) + break; + + if (SendChars((char *) c, 1) != 1) { + totalLen++; + break; + } + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // <position,length> pair. The position is in 12 bits and + // the length is in 4 bits. + + else { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars((char *) c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" :characters to the output stream. + + if (SendChars((char *) c, len) != (unsigned int)len) { + totalLen += len; + break; + } + } + } + slen = totalLen; +} diff --git a/src/modules/common/rawstr.cpp b/src/modules/common/rawstr.cpp new file mode 100644 index 0000000..c7363d9 --- /dev/null +++ b/src/modules/common/rawstr.cpp @@ -0,0 +1,551 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <rawstr.h> +#include <sysdata.h> +/****************************************************************************** + * RawStr Statics + */ + +int RawStr::instance = 0; +char RawStr::nl = '\n'; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr::RawStr(const char *ipath, int fileMode) +{ + char buf[127]; + + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr::~RawStr() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbufdat(long ioffset, char **buf) +{ + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + toupperstr_utf8(*buf); + } +} + + +/****************************************************************************** + * RawStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr::findoffset(const char *ikey, long *start, unsigned short *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 6; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + toupperstr_utf8(key); + + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-6:6; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 6) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + while (away) { + long laststart = *start; + unsigned short lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 6 : -6; + + bool bad = false; + if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr::preptext(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr::readtext(long istart, unsigned short *isize, char **idxbuf, char **buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + if (*buf) + delete [] *buf; + *buf = new char [ ++(*isize) * FILTERPAD ]; + *idxbuf = new char [ (*isize) * FILTERPAD ]; + + memset(*buf, 0, *isize); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)((*isize) - 1)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, *isize - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(*buf + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr::settext(const char *ikey, const char *buf, long len) +{ + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned short size; + unsigned short outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + toupperstr_utf8(key); + + len = (len < 0) ? strlen(buf) : len; + + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != -2) // not a new file + idxoff += 6; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 6, &start, &size, 0, &idxoff); + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + (len); + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len > 0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (int)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 2); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+6, shiftSize-6); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + settext(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawstr4.cpp b/src/modules/common/rawstr4.cpp new file mode 100644 index 0000000..da0789b --- /dev/null +++ b/src/modules/common/rawstr4.cpp @@ -0,0 +1,555 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <rawstr4.h> +#include <sysdata.h> + +/****************************************************************************** + * RawStr Statics + */ + +int RawStr4::instance = 0; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr4::RawStr4(const char *ipath, int fileMode) +{ + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr4::~RawStr4() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr4::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbufdat(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr4::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) { + *targetbuf = *trybuf; + } + *targetbuf = 0; + trybuf = 0; + toupperstr_utf8(targetbuf); + } +} + + +/****************************************************************************** + * RawStr4::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr4::findoffset(const char *ikey, long *start, unsigned long *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 8; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + toupperstr_utf8(key); + + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf) { // In case of extra entry at end of idx + tryoff += (tryoff > (maxoff / 2))?-8:8; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 8) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + while (away) { + long laststart = *start; + unsigned long lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 8 : -8; + + bool bad = false; + if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr4::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr4::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr4::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr4::readtext(long istart, unsigned long *isize, char **idxbuf, char **buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + if (*buf) + delete [] *buf; + *buf = new char [ ++(*isize) * FILTERPAD ]; + *idxbuf = new char [ (*isize) * FILTERPAD ]; + + memset(*buf, 0, *isize); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)((*isize) - 1)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, *isize - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(*buf + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr4::setText(const char *ikey, const char *buf, long len) { + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned long size; + unsigned long outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + toupperstr_utf8(key); + + len = (len < 0) ? strlen(buf) : len; + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != -2) // not a new file + idxoff += 8; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0/*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len > 0)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 8, &start, &size, 0, &idxoff); + ++size; + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + len; + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len>0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (long)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 4); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+8, shiftSize-8); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr4::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr4::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawverse.cpp b/src/modules/common/rawverse.cpp new file mode 100644 index 0000000..f77fbe5 --- /dev/null +++ b/src/modules/common/rawverse.cpp @@ -0,0 +1,348 @@ +/****************************************************************************** + * rawverse.cpp - code for class 'RawVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#include <sys/stat.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <versekey.h> +#include <sysdata.h> + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + +/****************************************************************************** + * RawVerse Statics + */ + +int RawVerse::instance = 0; +const char *RawVerse::nl = "\r\n"; + + +/****************************************************************************** + * RawVerse Constructor - Initializes data for instance of RawVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawVerse::RawVerse(const char *ipath, int fileMode) +{ + char *buf; + + path = 0; + stdstr(&path, ipath); + buf = new char [ strlen(path) + 80 ]; + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.vss", path); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.vss", path); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot", path); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt", path); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + instance++; +} + + +/****************************************************************************** + * RawVerse Destructor - Cleans up instance of RawVerse + */ + +RawVerse::~RawVerse() +{ + int loop1; + + if (path) + delete [] path; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + } +} + + +/****************************************************************************** + * RawVerse::findoffset - Finds the offset of the key verse from the indexes + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void RawVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) { + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + if (idxfp[testmt-1]->getFd() >= 0) { + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), start, 4); + long len = read(idxfp[testmt-1]->getFd(), size, 2); // read size + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (len < 2) { + *size = (unsigned short)((*start) ? (lseek(textfp[testmt-1]->getFd(), 0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file + } + } + else { + *start = 0; + *size = 0; + } +} + + +/****************************************************************************** + * RawVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = 10; +// *to++ = nl[1]; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; +// *to++ = nl[0]; + *to++ = 10; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawVerse::readtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 2 (null)(null) + * buf - buffer to store text + * + */ + +void RawVerse::readtext(char testmt, long start, unsigned short size, char *buf) { + memset(buf, 0, size+1); + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + if (size) { + if (textfp[testmt-1]->getFd() >= 0) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + read(textfp[testmt-1]->getFd(), buf, (int)size - 2); + } + } +} + + +/****************************************************************************** + * RawVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawVerse::settext(char testmt, long idxoff, const char *buf, long len) +{ + long start, outstart; + unsigned short size; + unsigned short outsize; + + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + size = outsize = (len < 0) ? strlen(buf) : len; + + start = outstart = lseek(textfp[testmt-1]->getFd(), 0, SEEK_END); + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + + if (size) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + write(textfp[testmt-1]->getFd(), buf, (int)size); + + // add a new line to make data file easier to read in an editor + write(textfp[testmt-1]->getFd(), nl, 2); + } + else { + start = 0; + } + + outstart = archtosword32(start); + outsize = archtosword16(size); + + write(idxfp[testmt-1]->getFd(), &outstart, 4); + write(idxfp[testmt-1]->getFd(), &outsize, 2); + + +} + + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long start; + unsigned short size; + + destidxoff *= 6; + srcidxoff *= 6; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(idxfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), &start, 4); + read(idxfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(idxfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(idxfp[testmt-1]->getFd(), &start, 4); + write(idxfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char RawVerse::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.vss", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.vss", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} diff --git a/src/modules/common/sapphire.cpp b/src/modules/common/sapphire.cpp new file mode 100644 index 0000000..686bccb --- /dev/null +++ b/src/modules/common/sapphire.cpp @@ -0,0 +1,228 @@ +/* sapphire.cpp -- the Saphire II stream cipher class. + Dedicated to the Public Domain the author and inventor: + (Michael Paul Johnson). This code comes with no warranty. + Use it at your own risk. + Ported from the Pascal implementation of the Sapphire Stream + Cipher 9 December 1994. + Added hash pre- and post-processing 27 December 1994. + Modified initialization to make index variables key dependent, + made the output function more resistant to cryptanalysis, + and renamed to Sapphire II 2 January 1995 +*/ + + +#ifdef WIN32 +#include <memory.h> +#endif + +#ifdef UNIX +#include <memory.h> +#include <unistd.h> +#else +#ifndef _MSC_VER +#include <mem.h> +#endif +#endif + +#ifdef _WIN32_WCE +#include <string.h> +#endif + +#include "sapphire.h" + +unsigned char sapphire::keyrand(int limit, + unsigned char *user_key, + unsigned char keysize, + unsigned char *rsum, + unsigned *keypos) + { + unsigned u, // Value from 0 to limit to return. + retry_limiter, // No infinite loops allowed. + mask; // Select just enough bits. + + if (!limit) return 0; // Avoid divide by zero error. + retry_limiter = 0; + mask = 1; // Fill mask with enough bits to cover + while (mask < (unsigned)limit) // the desired range. + mask = (mask << 1) + 1; + do + { + *rsum = cards[*rsum] + user_key[(*keypos)++]; + if (*keypos >= keysize) + { + *keypos = 0; // Recycle the user key. + *rsum += keysize; // key "aaaa" != key "aaaaaaaa" + } + u = mask & *rsum; + if (++retry_limiter > 11) + u %= limit; // Prevent very rare long loops. + } + while (u > (unsigned)limit); + return u; + } + +void sapphire::initialize(unsigned char *key, unsigned char keysize) + { + // Key size may be up to 256 bytes. + // Pass phrases may be used directly, with longer length + // compensating for the low entropy expected in such keys. + // Alternatively, shorter keys hashed from a pass phrase or + // generated randomly may be used. For random keys, lengths + // of from 4 to 16 bytes are recommended, depending on how + // secure you want this to be. + + int i; + unsigned char toswap, swaptemp, rsum; + unsigned keypos; + + // If we have been given no key, assume the default hash setup. + + if (keysize < 1) + { + hash_init(); + return; + } + + // Start with cards all in order, one of each. + + for (i=0;i<256;i++) + cards[i] = i; + + // Swap the card at each position with some other card. + + toswap = 0; + keypos = 0; // Start with first byte of user key. + rsum = 0; + for (i=255;i>=0;i--) + { + toswap = keyrand(i, key, keysize, &rsum, &keypos); + swaptemp = cards[i]; + cards[i] = cards[toswap]; + cards[toswap] = swaptemp; + } + + // Initialize the indices and data dependencies. + // Indices are set to different values instead of all 0 + // to reduce what is known about the state of the cards + // when the first byte is emitted. + + rotor = cards[1]; + ratchet = cards[3]; + avalanche = cards[5]; + last_plain = cards[7]; + last_cipher = cards[rsum]; + + toswap = swaptemp = rsum = 0; + keypos = 0; + } + +void sapphire::hash_init(void) + { + // This function is used to initialize non-keyed hash + // computation. + + int i, j; + + // Initialize the indices and data dependencies. + + rotor = 1; + ratchet = 3; + avalanche = 5; + last_plain = 7; + last_cipher = 11; + + // Start with cards all in inverse order. + + for (i=0, j=255;i<256;i++,j--) + cards[i] = (unsigned char) j; + } + +sapphire::sapphire(unsigned char *key, unsigned char keysize) + { + if (key && keysize) + initialize(key, keysize); + } + +void sapphire::burn(void) + { + // Destroy the key and state information in RAM. + memset(cards, 0, 256); + rotor = ratchet = avalanche = last_plain = last_cipher = 0; + } + +sapphire::~sapphire() + { + burn(); + } + +unsigned char sapphire::encrypt(unsigned char b) + { +#ifdef USBINARY + // Picture a single enigma rotor with 256 positions, rewired + // on the fly by card-shuffling. + + // This cipher is a variant of one invented and written + // by Michael Paul Johnson in November, 1993. + + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_cipher = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_plain = b; + return last_cipher; +#else + return b; +#endif + } + +unsigned char sapphire::decrypt(unsigned char b) + { + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_plain = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_cipher = b; + return last_plain; + } + +void sapphire::hash_final(unsigned char *hash, // Destination + unsigned char hashlength) // Size of hash. + { + int i; + + for (i=255;i>=0;i--) + encrypt((unsigned char) i); + for (i=0;i<hashlength;i++) + hash[i] = encrypt(0); + } + diff --git a/src/modules/common/swcipher.cpp b/src/modules/common/swcipher.cpp new file mode 100644 index 0000000..d221b8b --- /dev/null +++ b/src/modules/common/swcipher.cpp @@ -0,0 +1,123 @@ +/****************************************************************************** + * swcipher.cpp - code for class 'SWCipher'- a driver class that provides + * cipher utilities. + */ + +#include <string.h> +#include <stdlib.h> +#include <swcipher.h> + + +/****************************************************************************** + * SWCipher Constructor - Initializes data for instance of SWCipher + * + */ + +SWCipher::SWCipher(unsigned char *key) { + master.initialize(key, strlen((char *)key)); + buf = 0; +} + + +/****************************************************************************** + * SWCipher Destructor - Cleans up instance of SWCipher + */ + +SWCipher::~SWCipher() +{ + if (buf) + free(buf); +} + + +char *SWCipher::Buf(const char *ibuf, unsigned int ilen) +{ + if (ibuf) { + + if (buf) + free(buf); + + if (!ilen) { + len = strlen(buf); + ilen = len + 1; + } + else len = ilen; + + buf = (char *) malloc(ilen); + memcpy(buf, ibuf, ilen); + cipher = false; + } + + Decode(); + + return buf; +} + + +char *SWCipher::cipherBuf(unsigned int *ilen, const char *ibuf) +{ + if (ibuf) { + + if (buf) + free(buf); + + buf = (char *) malloc(*ilen); + memcpy(buf, ibuf, *ilen); + len = *ilen; + cipher = true; + } + + Encode(); + + *ilen = (short)len; + return buf; +} + + +/****************************************************************************** + * SWCipher::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Encode(void) +{ + if (!cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.encrypt(buf[i]); + cipher = true; + } +} + + +/****************************************************************************** + * SWCipher::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Decode(void) +{ + if (cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.decrypt(buf[i]); + cipher = false; + } +} + + +/****************************************************************************** + * SWCipher::setCipherKey - setter for a new CipherKey + * + */ + +void SWCipher::setCipherKey(const char *ikey) { + unsigned char *key = (unsigned char *)ikey; + master.initialize(key, strlen((char *)key)); +} diff --git a/src/modules/common/swcomprs.cpp b/src/modules/common/swcomprs.cpp new file mode 100644 index 0000000..4bd2e5e --- /dev/null +++ b/src/modules/common/swcomprs.cpp @@ -0,0 +1,190 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'SWCompress'- a driver class that provides + * compression utilities. + */ + +#include <string.h> +#include <stdlib.h> +#include <swcomprs.h> + + +/****************************************************************************** + * SWCompress Constructor - Initializes data for instance of SWCompress + * + */ + +SWCompress::SWCompress() +{ + buf = zbuf = 0; + Init(); +} + + +/****************************************************************************** + * SWCompress Destructor - Cleans up instance of SWCompress + */ + +SWCompress::~SWCompress() +{ + if (zbuf) + free(zbuf); + + if (buf) + free(buf); +} + + +void SWCompress::Init() +{ + if (buf) + free(buf); + + if (zbuf) + free(zbuf); + + buf = 0; + zbuf = 0; + direct = 0; + zlen = 0; + slen = 0; + zpos = 0; + pos = 0; +} + + +char *SWCompress::Buf(const char *ibuf, unsigned long *len) { + // setting an uncompressed buffer + if (ibuf) { + Init(); + slen = (len) ? *len : strlen(ibuf); + buf = (char *) calloc(slen + 1, 1); + memcpy(buf, ibuf, slen); + } + + // getting an uncompressed buffer + if (!buf) { + buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return; + direct = 1; + Decode(); +// slen = strlen(buf); + if (len) + *len = slen; + } + return buf; +} + + +char *SWCompress::zBuf(unsigned long *len, char *ibuf) +{ + // setting a compressed buffer + if (ibuf) { + Init(); + zbuf = (char *) malloc(*len); + memcpy(zbuf, ibuf, *len); + zlen = *len; + } + + // getting a compressed buffer + if (!zbuf) { + direct = 0; + Encode(); + } + + *len = zlen; + return zbuf; +} + + +unsigned long SWCompress::GetChars(char *ibuf, unsigned long len) +{ + if (direct) { + len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos); + if (len > 0) { + memmove(ibuf, &zbuf[zpos], len); + zpos += len; + } + } + else { +// slen = strlen(buf); + len = (((slen - pos) > (unsigned)len) ? len : slen - pos); + if (len > 0) { + memmove(ibuf, &buf[pos], len); + pos += len; + } + } + return len; +} + + +unsigned long SWCompress::SendChars(char *ibuf, unsigned long len) +{ + if (direct) { + if (buf) { +// slen = strlen(buf); + if ((pos + len) > (unsigned)slen) { + buf = (char *) realloc(buf, pos + len + 1024); + memset(&buf[pos], 0, len + 1024); + } + } + else buf = (char *)calloc(1, len + 1024); + memmove(&buf[pos], ibuf, len); + pos += len; + } + else { + if (zbuf) { + if ((zpos + len) > zlen) { + zbuf = (char *) realloc(zbuf, zpos + len + 1024); + zlen = zpos + len + 1024; + } + } + else { + zbuf = (char *)calloc(1, len + 1024); + zlen = len + 1024; + } + memmove(&zbuf[zpos], ibuf, len); + zpos += len; + } + return len; +} + + +/****************************************************************************** + * SWCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Encode(void) +{ + cycleStream(); +} + + +/****************************************************************************** + * SWCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Decode(void) +{ + cycleStream(); +} + + +void SWCompress::cycleStream() { + char buf[1024]; + unsigned long len, totlen = 0; + + do { + len = GetChars(buf, 1024); + if (len) + totlen += SendChars(buf, len); + } while (len == 1024); + + zlen = slen = totlen; +} diff --git a/src/modules/common/zipcomprs.cpp b/src/modules/common/zipcomprs.cpp new file mode 100644 index 0000000..01ba430 --- /dev/null +++ b/src/modules/common/zipcomprs.cpp @@ -0,0 +1,158 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'ZipCompress'- a driver class that provides + * compression utilities. - using zlib + */ + +#include <string.h> +#include <string> +#include <stdlib.h> +#include <stdio.h> +#include <zipcomprs.h> +#include <zlib.h> + +/****************************************************************************** + * ZipCompress Constructor - Initializes data for instance of ZipCompress + * + */ + +ZipCompress::ZipCompress() : SWCompress() +{ +// fprintf(stderr, "init compress\n"); +} + + +/****************************************************************************** + * ZipCompress Destructor - Cleans up instance of ZipCompress + */ + +ZipCompress::~ZipCompress() { +} + + +/****************************************************************************** + * ZipCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void ZipCompress::Encode(void) +{ +/* +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // get buffer + char chunk[1024]; + char *buf = (char *)calloc(1, 1024); + char *chunkbuf = buf; + unsigned long chunklen; + unsigned long len = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + len += chunklen; + if (chunklen < 1023) + break; + else buf = (char *)realloc(buf, len + 1024); + chunkbuf = buf+len; + } + + + zlen = (long) (len*1.001)+15; + char *zbuf = new char[zlen+1]; + if (len) + { + //printf("Doing compress\n"); + if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len)!=Z_OK) + { + printf("ERROR in compression\n"); + } + else { + SendChars(zbuf, zlen); + } + } + else + { + fprintf(stderr, "No buffer to compress\n"); + } + delete [] zbuf; + free (buf); +} + + +/****************************************************************************** + * ZipCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void ZipCompress::Decode(void) +{ +/* +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ + + // get buffer + char chunk[1024]; + char *zbuf = (char *)calloc(1, 1024); + char *chunkbuf = zbuf; + int chunklen; + unsigned long zlen = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + zlen += chunklen; + if (chunklen < 1023) + break; + else zbuf = (char *)realloc(zbuf, zlen + 1024); + chunkbuf = zbuf + zlen; + } + + //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen); + if (zlen) { + unsigned long blen = zlen*20; // trust compression is less than 1000% + char *buf = new char[blen]; + //printf("Doing decompress {%s}\n", zbuf); + if (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen) != Z_OK) { + fprintf(stderr, "no room in outbuffer to during decompression. see zipcomp.cpp\n"); + } + SendChars(buf, blen); + delete [] buf; + slen = blen; + } + else { + fprintf(stderr, "No buffer to decompress!\n"); + } + //printf("Finished decoding\n"); + free (zbuf); +} diff --git a/src/modules/common/zstr.cpp b/src/modules/common/zstr.cpp new file mode 100644 index 0000000..cd1add5 --- /dev/null +++ b/src/modules/common/zstr.cpp @@ -0,0 +1,705 @@ +/****************************************************************************** + * zstr.cpp - code for class 'zStr'- a module that reads compressed text + * files and provides lookup and parsing functions based on + * class StrKey + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <zstr.h> +#include <swcomprs.h> + +#include <sysdata.h> +#include <entriesblk.h> + +/****************************************************************************** + * zStr Statics + */ + +int zStr::instance = 0; +const int zStr::IDXENTRYSIZE = 8; +const int zStr::ZDXENTRYSIZE = 8; + + +/****************************************************************************** + * zStr Constructor - Initializes data for instance of zStr + * + * ENT: ipath - path of the directory where data and index files are located. + */ + +zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp) { + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + + compressor = (icomp) ? icomp : new SWCompress(); + this->blockCount = blockCount; +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdx", path); + zdxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdt", path); + zdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd <= 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + cacheBlock = 0; + cacheBlockIndex = -1; + cacheDirty = false; + + instance++; +} + + +/****************************************************************************** + * zStr Destructor - Cleans up instance of zStr + */ + +zStr::~zStr() { + + flushCache(); + + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); + FileMgr::systemFileMgr.close(zdxfd); + FileMgr::systemFileMgr.close(zdtfd); + + + if (compressor) + delete compressor; + +} + + +/****************************************************************************** + * zStr::getidxbufdat - Gets the index string at the given dat offset + * NOTE: buf is calloc'd, or if not null, realloc'd and must + * be free'd by calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromDatOffset(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * zStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is calloc'd, or if not null, realloc'd + * and must be freed by calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromIdxOffset(long ioffset, char **buf) { + __u32 offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, sizeof(__u32)); + offset = swordtoarch32(offset); + getKeyFromDatOffset(offset, buf); + } +} + + +/****************************************************************************** + * zStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * offset - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status + */ + +signed char zStr::findKeyIndex(const char *ikey, long *idxoff, long away) { + char *trybuf = 0, *key = 0, quitflag = 0; + signed char retval = 0; + __s32 headoff, tailoff, tryoff = 0, maxoff = 0; + __u32 start, size; + + if (idxfd->getFd() >= 0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - IDXENTRYSIZE; + if (*ikey) { + headoff = 0; + stdstr(&key, ikey); + toupperstr_utf8(key); + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + (((((tailoff / IDXENTRYSIZE) - (headoff / IDXENTRYSIZE))) / 2) * IDXENTRYSIZE) : lastoff; + lastoff = -1; + + getKeyFromIdxOffset(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-IDXENTRYSIZE:IDXENTRYSIZE; + retval = -1; + break; + } + + int diff = strcmp(key, trybuf); + if (!diff) + break; + + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + IDXENTRYSIZE) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else { tryoff = 0; } + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + start = size = 0; + retval = (read(idxfd->getFd(), &start, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + retval = (read(idxfd->getFd(), &size, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + while (away) { + __u32 laststart = start; + __u32 lastsize = size; + __s32 lasttry = tryoff; + tryoff += (away > 0) ? IDXENTRYSIZE : -IDXENTRYSIZE; + + bool bad = false; + if (((long)(tryoff + (away*IDXENTRYSIZE)) < -IDXENTRYSIZE) || (tryoff + (away*IDXENTRYSIZE) > (maxoff+IDXENTRYSIZE))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + start = laststart; + size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + + if (((laststart != start) || (lastsize != size)) && (start >= 0) && (size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * zStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zStr::prepText(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * zStr::getText - gets text at a given offset + * + * ENT: + * offset - idxoffset where the key is located. + * buf - buffer to store text + * idxbuf - buffer to store index key + * NOTE: buffer will be alloc'd / realloc'd and + * should be free'd by the client + * + */ + +void zStr::getText(long offset, char **idxbuf, char **buf) { + char *ch; + char *idxbuflocal = 0; + getKeyFromIdxOffset(offset, &idxbuflocal); + __u32 start; + __u32 size; + + do { + lseek(idxfd->getFd(), offset, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + *idxbuf = (*idxbuf) ? (char *)realloc(*idxbuf, size*2 + 1) : (char *)malloc(size*2 + 1); + memset(*buf, 0, size + 1); + memset(*idxbuf, 0, size + 1); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)(size)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, size - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(*buf + 6, &offset); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + __u32 localsize = strlen(idxbuflocal); + localsize = (localsize < (size - 1)) ? localsize : (size - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } + __u32 block = 0; + __u32 entry = 0; + memmove(&block, *buf, sizeof(__u32)); + memmove(&entry, *buf + sizeof(__u32), sizeof(__u32)); + block = swordtoarch32(block); + entry = swordtoarch32(entry); + getCompressedText(block, entry, buf); +} + + +/****************************************************************************** + * zStr::getCompressedText - Get text entry from a compressed index / zdata + * file. + */ + +void zStr::getCompressedText(long block, long entry, char **buf) { + + __u32 size = 0; + + if (cacheBlockIndex != block) { + __u32 start = 0; + + lseek(zdxfd->getFd(), block * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + + lseek(zdtfd->getFd(), start, SEEK_SET); + read(zdtfd->getFd(), *buf, size); + + flushCache(); + + unsigned long len = size; + compressor->zBuf(&len, *buf); + char * rawBuf = compressor->Buf(0, &len); + cacheBlock = new EntriesBlock(rawBuf, len); + cacheBlockIndex = block; + } + size = cacheBlock->getEntrySize(entry); + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + strcpy(*buf, cacheBlock->getEntry(entry)); +} + + +/****************************************************************************** + * zLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zStr::setText(const char *ikey, const char *buf, long len) { + + __u32 start, outstart; + __u32 size, outsize; + __s32 endoff; + long idxoff = 0; + __s32 shiftSize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + len = (len < 0) ? strlen(buf) : len; + stdstr(&key, ikey); + toupperstr_utf8(key); + + char notFound = findKeyIndex(ikey, &idxoff, 0); + if (!notFound) { + getKeyFromIdxOffset(idxoff, &dbKey); + int diff = strcmp(key, dbKey); + if (diff < 0) { + } + else if (diff > 0) { + idxoff += IDXENTRYSIZE; + } + else if ((!diff) && (len > 0 /*we're not deleting*/)) { // got absolute entry + do { + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, size); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(tmpbuf + IDXENTRYSIZE, &idxoff); + delete [] tmpbuf; + } + else break; + } + while (true); // while we're resolving links + } + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + if (len > 0) { // NOT a link + if (!cacheBlock) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + else if (cacheBlock->getCount() >= blockCount) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + __u32 entry = cacheBlock->addEntry(buf); + cacheDirty = true; + outstart = archtosword32(cacheBlockIndex); + outsize = archtosword32(entry); + memcpy (outbuf + size, &outstart, sizeof(__u32)); + memcpy (outbuf + size + sizeof(__u32), &outsize, sizeof(__u32)); + size += (sizeof(__u32) * 2); + } + else { // link + memcpy(outbuf + size, buf, len); + size += len; + } + + start = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len > 0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, sizeof(__u32)); + write(idxfd->getFd(), &outsize, sizeof(__u32)); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+IDXENTRYSIZE, shiftSize-IDXENTRYSIZE); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + } + } + + if (idxBytes) + delete [] idxBytes; + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * zLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zStr::linkEntry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +void zStr::flushCache() { + if (cacheBlock) { + if (cacheDirty) { + __u32 start = 0; + unsigned long size = 0; + __u32 outstart = 0, outsize = 0; + + const char *rawBuf = cacheBlock->getRawData(&size); + compressor->Buf(rawBuf, &size); + compressor->zBuf(&size); + + long zdxSize = lseek(zdxfd->getFd(), 0, SEEK_END); + long zdtSize = lseek(zdtfd->getFd(), 0, SEEK_END); + + if ((cacheBlockIndex * ZDXENTRYSIZE) > (zdxSize - ZDXENTRYSIZE)) { // New Block + start = zdtSize; + } + else { + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &outsize, sizeof(__u32)); + start = swordtoarch32(start); + outsize = swordtoarch32(outsize); + if (start + outsize >= zdtSize) { // last entry, just overwrite + // start is already set + } + else if (size < outsize) { // middle entry, but smaller, that's fine and let's preserve bigger size + size = outsize; + } + else { // middle and bigger-- we have serious problems, for now let's put it at the end = lots of wasted space + start = zdtSize; + } + } + + + + outstart = archtosword32(start); + outsize = archtosword32((__u32)size); + + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + lseek(zdtfd->getFd(), start, SEEK_SET); + rawBuf = compressor->zBuf(&size); + write(zdtfd->getFd(), rawBuf, size); + + // add a new line to make data file easier to read in an editor + write(zdtfd->getFd(), &nl, 2); + + write(zdxfd->getFd(), &outstart, sizeof(__u32)); + write(zdxfd->getFd(), &outsize, sizeof(__u32)); + + delete cacheBlock; + } + } + cacheBlockIndex = -1; + cacheBlock = 0; + cacheDirty = false; +} + + +/****************************************************************************** + * zLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char zStr::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdt", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/zverse.cpp b/src/modules/common/zverse.cpp new file mode 100644 index 0000000..6d76ddc --- /dev/null +++ b/src/modules/common/zverse.cpp @@ -0,0 +1,518 @@ +/****************************************************************************** + * zverse.h - code for class 'zVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey for compressed modules + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <versekey.h> +#include <zverse.h> +#include <sysdata.h> + + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +/****************************************************************************** + * zVerse Statics + */ + +int zVerse::instance = 0; + +const char zVerse::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b'}; + +/****************************************************************************** + * zVerse Constructor - Initializes data for instance of zVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + * fileMode - open mode for the files (O_RDONLY, etc.) + * blockType - verse, chapter, book, etc. + */ + +zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp) +{ + char buf[127]; + + nl = '\n'; + path = 0; + cacheBufIdx = -1; + cacheTestament = 0; + cacheBuf = 0; + dirtyCache = false; + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + compressor = (icomp) ? icomp : new SWCompress(); + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockType]); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockType]); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockType]); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockType]); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockType]); + compfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockType]); + compfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + instance++; +} + + +/****************************************************************************** + * zVerse Destructor - Cleans up instance of zVerse + */ + +zVerse::~zVerse() +{ + int loop1; + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + if (path) + delete [] path; + + if (compressor) + delete compressor; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + FileMgr::systemFileMgr.close(compfp[loop1]); + } +} + + +/****************************************************************************** + * zVerse::findoffset - Finds the offset of the key verse from the indexes + * + * + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * book - book to find (0 - testament introduction) + * chapter - chapter to find (0 - book introduction) + * verse - verse to find (0 - chapter introduction) + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void zVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) +{ + // set start to offset in + // set size to + // set + unsigned long ulBuffNum=0; // buffer number + unsigned long ulVerseStart=0; // verse offset within buffer + unsigned short usVerseSize=0; // verse size + unsigned long ulCompOffset=0; // compressed buffer start + unsigned long ulCompSize=0; // buffer size compressed + unsigned long ulUnCompSize=0; // buffer size uncompressed + char *pcCompText=NULL; // compressed text + + *start = *size = 0; + //printf ("Finding offset %ld\n", idxoff); + idxoff *= 10; + if (!testmt) { + testmt = ((idxfp[0]) ? 1:2); + } + + // assert we have and valid file descriptor + if (compfp[testmt-1]->getFd() < 1) + return; + + long newOffset = lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + if (newOffset == idxoff) { + if (read(compfp[testmt-1]->getFd(), &ulBuffNum, 4) != 4) { + printf ("Error reading ulBuffNum\n"); + return; + } + } + else return; + + ulBuffNum = swordtoarch32(ulBuffNum); + + if (read(compfp[testmt-1]->getFd(), &ulVerseStart, 4) < 2) + { + printf ("Error reading ulVerseStart\n"); + return; + } + if (read(compfp[testmt-1]->getFd(), &usVerseSize, 2) < 2) + { + printf ("Error reading usVerseSize\n"); + return; + } + + *start = swordtoarch32(ulVerseStart); + *size = swordtoarch16(usVerseSize); + + if (*size) { + if (((long) ulBuffNum == cacheBufIdx) && (testmt == cacheTestament) && (cacheBuf)) { + // have the text buffered + return; + } + + //printf ("Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize); + + + if (lseek(idxfp[testmt-1]->getFd(), ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12) + { + printf ("Error seeking compressed file index\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompOffset, 4)<4) + { + printf ("Error reading ulCompOffset\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompSize, 4)<4) + { + printf ("Error reading ulCompSize\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulUnCompSize, 4)<4) + { + printf ("Error reading ulUnCompSize\n"); + return; + } + + ulCompOffset = swordtoarch32(ulCompOffset); + ulCompSize = swordtoarch32(ulCompSize); + ulUnCompSize = swordtoarch32(ulUnCompSize); + + if (lseek(textfp[testmt-1]->getFd(), ulCompOffset, SEEK_SET)!=(long)ulCompOffset) + { + printf ("Error: could not seek to right place in compressed text\n"); + return; + } + pcCompText = new char[ulCompSize]; + + if (read(textfp[testmt-1]->getFd(), pcCompText, ulCompSize)<(long)ulCompSize) + { + printf ("Error reading compressed text\n"); + return; + } + compressor->zBuf(&ulCompSize, pcCompText); + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + unsigned long len = 0; + compressor->Buf(0, &len); + cacheBuf = (char *)calloc(len + 1, 1); + memcpy(cacheBuf, compressor->Buf(), len); + + cacheTestament = testmt; + cacheBufIdx = ulBuffNum; + if (pcCompText) + delete [] pcCompText; + } +} + + +/****************************************************************************** + * zVerse::zreadtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 1 (null) + * buf - buffer to store text + * + */ + +void zVerse::zreadtext(char testmt, long start, unsigned short size, char *inbuf) +{ + memset(inbuf, 0, size); + if (size > 2) { + strncpy(inbuf, &(cacheBuf[start]), size-2); + } +} + + +/****************************************************************************** + * zVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zVerse::settext(char testmt, long idxoff, const char *buf, long len) { + + len = (len < 0) ? strlen(buf) : len; + if (!testmt) + testmt = ((idxfp[0]) ? 1:2); + if ((!dirtyCache) || (cacheBufIdx < 0)) { + cacheBufIdx = lseek(idxfp[testmt-1]->getFd(), 0, SEEK_END) / 12; + cacheTestament = testmt; + if (cacheBuf) + free(cacheBuf); + cacheBuf = (char *)calloc(len + 1, 1); + } + else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len + 1)):calloc((len + 1), 1)); + + dirtyCache = true; + + unsigned long start, outstart; + unsigned long outBufIdx = cacheBufIdx; + unsigned short size; + unsigned short outsize; + + idxoff *= 10; + size = outsize = len; + + start = strlen(cacheBuf); + + if (!size) + start = outBufIdx = 0; + + outBufIdx = archtosword32(outBufIdx); + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &outBufIdx, 4); + write(compfp[testmt-1]->getFd(), &outstart, 4); + write(compfp[testmt-1]->getFd(), &outsize, 2); + strcat(cacheBuf, buf); +} + + +void zVerse::flushCache() { + if (dirtyCache) { + unsigned long idxoff; + unsigned long start, outstart; + unsigned long size, outsize; + unsigned long zsize, outzsize; + + idxoff = cacheBufIdx * 12; + size = outsize = zsize = outzsize = strlen(cacheBuf); + if (size) { +// if (compressor) { +// delete compressor; +// compressor = new LZSSCompress(); +// } + compressor->Buf(cacheBuf); + compressor->zBuf(&zsize); + outzsize = zsize; + + start = outstart = lseek(textfp[cacheTestament-1]->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + outzsize = archtosword32(zsize); + + write(textfp[cacheTestament-1]->getFd(), compressor->zBuf(&zsize), zsize); + + lseek(idxfp[cacheTestament-1]->getFd(), idxoff, SEEK_SET); + write(idxfp[cacheTestament-1]->getFd(), &outstart, 4); + write(idxfp[cacheTestament-1]->getFd(), &outzsize, 4); + write(idxfp[cacheTestament-1]->getFd(), &outsize, 4); + } + dirtyCache = false; + } +} + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long bufidx; + long start; + unsigned short size; + + destidxoff *= 10; + srcidxoff *= 10; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(compfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(compfp[testmt-1]->getFd(), &bufidx, 4); + read(compfp[testmt-1]->getFd(), &start, 4); + read(compfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(compfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &bufidx, 4); + write(compfp[testmt-1]->getFd(), &start, 4); + write(compfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char zVerse::createModule(const char *ipath, int blockBound) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); //compBufIdxOffset + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} + + +/****************************************************************************** + * zVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + if (to > buf) { + for (to--; to > buf; to--) { // remove trailing excess + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } + } +} diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp new file mode 100644 index 0000000..ad55396 --- /dev/null +++ b/src/modules/filters/cipherfil.cpp @@ -0,0 +1,38 @@ +/****************************************************************************** + * + * cipherfil - SWFilter decendant to decipher a module + */ + + +#include <stdlib.h> +#include <string.h> +#include <cipherfil.h> + + +CipherFilter::CipherFilter(const char *key) { + cipher = new SWCipher((unsigned char *)key); +} + + +CipherFilter::~CipherFilter() { + delete cipher; +} + + +SWCipher *CipherFilter::getCipher() { + return cipher; +} + + +char CipherFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + unsigned int len; +// len = strlen(text); + len = maxlen; + if (len > 0) { + cipher->cipherBuf(&len, text); + strncpy(text, cipher->Buf(), (len < (unsigned int)maxlen) ? len : maxlen); + } + text[maxlen] = 0; + text[maxlen+1] = 0; + return 0; +} diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp new file mode 100644 index 0000000..c5b7b90 --- /dev/null +++ b/src/modules/filters/gbffootnotes.cpp @@ -0,0 +1,118 @@ +/****************************************************************************** + * + * gbffootnotes - SWFilter decendant to hide or show footnotes + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbffootnotes.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFFootnotes::on[] = "On"; +const char GBFFootnotes::off[] = "Off"; +const char GBFFootnotes::optName[] = "Footnotes"; +const char GBFFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +GBFFootnotes::GBFFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFFootnotes::~GBFFootnotes() { +} + +void GBFFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[4096]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 4096); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'R': // Reference + switch(token[1]) { + case 'F': // Begin footnote + hide = true; + break; + case 'f': // end footnote + hide = false; + break; + } + continue; // skip token + case 'W': + if (token[1] == 'T') { + switch (token[2]) { + case 'P': + case 'S': + case 'A': + continue; // remove this token + default: + break; + } + } + } + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp new file mode 100644 index 0000000..590e2fa --- /dev/null +++ b/src/modules/filters/gbfheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * gbfheadings - SWFilter decendant to hide or show headings + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfheadings.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFHeadings::on[] = "On"; +const char GBFHeadings::off[] = "Off"; +const char GBFHeadings::optName[] = "Headings"; +const char GBFHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +GBFHeadings::GBFHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFHeadings::~GBFHeadings() { +} + +void GBFHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 2048); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'T': // Reference + switch(token[1]) { + case 'S': // Begin heading + hide = true; + break; + case 's': // end heading + hide = false; + break; + } + continue; // skip token + } + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp new file mode 100644 index 0000000..73d445a --- /dev/null +++ b/src/modules/filters/gbfhtml.cpp @@ -0,0 +1,536 @@ +/*************************************************************************** + gbfhtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfhtml.h> + + +GBFHTML::GBFHTML() +{ +} + + +char GBFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + bool hasFootnotePreTag = false; + bool isRightJustified = false; + bool isCentered = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else + from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '\n') { + *from = ' '; + } + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) + { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + for (tok = token+2; *tok; tok++) + *to++ = *tok; + *to++ = '<'; + *to++ = '/'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'B': //word(s) explained in footnote + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + hasFootnotePreTag = true; //we have the RB tag + continue; + case 'F': // footnote begin + if (hasFootnotePreTag) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + *to++ = ' '; + } + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '('; + + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + hasFootnotePreTag = false; + continue; + } + break; + + case 'F': // font tags + switch(token[1]) + { + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '#'; + *to++ = 'F'; + *to++ = 'F'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'J': //Justification + switch(token[1]) + { + case 'R': //right + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'r'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'h'; + *to++ = 't'; + *to++ = '\"'; + *to++ = '>'; + isRightJustified = true; + continue; + + case 'C': //center + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '\"'; + *to++ = '>'; + isCentered = true; + continue; + + case 'L': //left, reset right and center + if (isCentered) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '>'; + isCentered = false; + } + if (isRightJustified) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + isRightJustified = false; + } + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue;/* + case 'S': + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue;*/ + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp new file mode 100644 index 0000000..30b27ba --- /dev/null +++ b/src/modules/filters/gbfhtmlhref.cpp @@ -0,0 +1,148 @@ +/*************************************************************************** + gbfhtmlhref.cpp - GBF to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfhtmlhref.h> + +GBFHTMLHREF::GBFHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("Rf", ")</small></font>"); + addTokenSubstitute("Rx", "</a>"); + addTokenSubstitute("FI", "<i>"); // italics begin + addTokenSubstitute("Fi", "</i>"); + addTokenSubstitute("FB", "<n>"); // bold begin + addTokenSubstitute("Fb", "</n>"); + addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin + addTokenSubstitute("Fr", "</font>"); + addTokenSubstitute("FU", "<u>"); // underline begin + addTokenSubstitute("Fu", "</u>"); + addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin + addTokenSubstitute("Fo", "</cite>"); + addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", "</sup>"); + addTokenSubstitute("FV", "<sub>"); // Subscript begin + addTokenSubstitute("Fv", "</sub>"); + addTokenSubstitute("TT", "<big>"); // Book title begin + addTokenSubstitute("Tt", "</big>"); + addTokenSubstitute("PP", "<cite>"); // poetry begin + addTokenSubstitute("Pp", "</cite>"); + addTokenSubstitute("Fn", "</font>"); // font end + addTokenSubstitute("CL", "<br />"); // new line + addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin + addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin + addTokenSubstitute("JL", "</div>"); // align end + +} + + +bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + + if (!substituteToken(buf, token)) { + if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + pushString(buf, " <small><em><<a href=\"#"); + for (tok = token+1; *tok; tok++) + //if(token[i] != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 2; *tok; tok++) + //if(token[i] != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>></em></small>"); + } + + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + pushString(buf, " <small><em>(<A HREF=\"#"); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>)</em></small>"); + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + pushString(buf, " <small><em>(<a href=\"M"); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>)</em></small>"); + } + + else if (!strncmp(token, "RX", 2)) { + pushString(buf, "<a href=\""); + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *(*buf)++ = *tok; + } + else { + break; + } + } + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + else if (!strncmp(token, "RB", 2)) { + pushString(buf, "<i>"); + userData["hasFootnotePreTag"] = "true"; + } + + else if (!strncmp(token, "RF", 2)) { + if(userData["hasFootnotePreTag"] == "true") { + userData["hasFootnotePreTag"] = "false"; + pushString(buf, "</i> "); + } + pushString(buf, "<font color=\"#800000\"><small> ("); + } + + else if (!strncmp(token, "FN", 2)) { + pushString(buf, "<font face=\""); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + else if (!strncmp(token, "CA", 2)) { // ASCII value + *(*buf)++ = (char)atoi(&token[2]); + } + + else { + return false; + } + } + return true; +} diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp new file mode 100644 index 0000000..f8d336e --- /dev/null +++ b/src/modules/filters/gbfmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * gbfmorph - SWFilter decendant to hide or show morph tags + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfmorph.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFMorph::on[] = "On"; +const char GBFMorph::off[] = "Off"; +const char GBFMorph::optName[] = "Morphological Tags"; +const char GBFMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +GBFMorph::GBFMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFMorph::~GBFMorph() { +} + +void GBFMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && token[1] == 'T') { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfosis.cpp b/src/modules/filters/gbfosis.cpp new file mode 100644 index 0000000..43161d4 --- /dev/null +++ b/src/modules/filters/gbfosis.cpp @@ -0,0 +1,313 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter decendant to hide or show strongs number + * in a GBF module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <gbfosis.h> +#include <swmodule.h> +#include <versekey.h> +#include <stdarg.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +GBFOSIS::GBFOSIS() { +} + + +GBFOSIS::~GBFOSIS() { +} + + +char GBFOSIS::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char buf[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart, *textEnd; + char *wordStart, *wordEnd; + bool newText = false; + bool newWord = false; + string tmp; + bool suspendTextPassThru = false; + bool keepToken = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + textStart = from; + wordStart = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; + wordEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + + + while (wordStart < (text+maxlen)) { +// if (strchr(" ,;.?!()'\"", *wordStart)) + if (strchr(";, .:?!()'\"", *wordStart)) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;.:?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + pushString(&to, convertToOSIS(tmp.c_str(), key)); + suspendTextPassThru = false; + } + + // Footnote + if (!strcmp(token, "RF")) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strcmp(token, "Rf")) { + tmp = "<note type=\"x-StudyNote\"><notePart type=\"x-MainText\">"; + tmp.append(textStart, (int)(textEnd - textStart)+1); + tmp += "</notePart></note>"; + pushString(&to, tmp.c_str()); + suspendTextPassThru = false; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + pushString(&to, "<figure src=\""); + const char *c; + for (c = src;((*c) && (*c != '"')); c++); + + /* uncomment for SWORD absolute path logic + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + end of uncomment for asolute path logic */ + + for (c++;((*c) && (*c != '"')); c++) + *to++ = *c; + + pushString(&to, "\" />"); + return true; + } + + // Strongs numbers + else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + // normal strongs number + strstrip(val); + if (!strncmp(wordStart, "<w ", 3)) { + sprintf(buf, "lemma=\"x-Strong:%s\" ", val); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + else { + sprintf(buf, "<w lemma=\"x-Strong:%s\">", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + } + } + } + + // Morphology + else if (*token == 'W' && token[1] == 'T' && (token[2] == 'G' || token[2] == 'H')) { // Strongs + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + strstrip(val); + if (!strncmp(wordStart, "<w ", 3)) { + sprintf(buf, "morph=\"x-%s:%s\" ", "StrongsMorph", val); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + else { + sprintf(buf, "<w morph=\"x-%s:%s\">", "StrongsMorph", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + } + } + + if (!keepToken) { // if we don't want strongs + if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { + if (lastspace) + to--; + } + if (newText) {textStart = from+1; newText = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newWord = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); } + if (!suspendTextPassThru) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + char ref[254]; + if (vkey->Verse()) + sprintf(ref, "<verseStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Book()) + sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + else *ref = 0; + if (*ref) { + memmove(text+strlen(ref), text, maxlen-strlen(ref)-1); + memcpy(text, ref, strlen(ref)); + to+=strlen(ref); + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + sprintf(ref, "<verseEnd ref=\"%s\" />", vkey->getOSISRef()); + pushString(&to, ref); + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); + sprintf(ref, "<chapterEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); + sprintf(ref, "<bookEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + } + } + } + + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + } + } + *to++ = 0; + *to = 0; + return 0; +} + + +void GBFOSIS::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +const char *GBFOSIS::convertToOSIS(const char *inRef, const SWKey *key) { + static string outRef; + + outRef = ""; + + VerseKey defLanguage; + ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true); + const char *startFrag = inRef; + for (int i = 0; i < verses.Count(); i++) { + VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i)); + char buf[5120]; + char frag[800]; + if (element) { + memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1); + frag[((const char *)element->userData - startFrag) + 1] = 0; + startFrag = (const char *)element->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\" refEnd=\"%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag); + } + else { + memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1); + frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0; + startFrag = (const char *)verses.GetElement(i)->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag); + } + outRef+=buf; + } + return outRef.c_str(); +} diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp new file mode 100644 index 0000000..65766d3 --- /dev/null +++ b/src/modules/filters/gbfplain.cpp @@ -0,0 +1,106 @@ +/****************************************************************************** + * + * gbfplain - SWFilter decendant to strip out all GBF tags or convert to + * ASCII rendered symbols. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfplain.h> + + +GBFPlain::GBFPlain() { +} + + +char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + for (char *tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'F': // footnote begin + *to++ = ' '; + *to++ = '['; + continue; + case 'f': // footnote end + *to++ = ']'; + *to++ = ' '; + continue; + } + break; + case 'C': + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; +/* Bug in WEB + case 'L': + *to++ = '<'; + continue; +*/ + case 'L': // Bug in WEB. Use above entry when fixed + case 'N': // new line + *to++ = '\n'; + continue; + case 'M': // new paragraph + *to++ = '\n'; + *to++ = '\n'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp new file mode 100644 index 0000000..40e5752 --- /dev/null +++ b/src/modules/filters/gbfrtf.cpp @@ -0,0 +1,298 @@ +/****************************************************************************** + * + * gbfrtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfrtf.h> +#include <ctype.h> + +GBFRTF::GBFRTF() { +} + + +char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + char token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char *)&text[maxlen - len]; + } + else from = (unsigned char *)text; // ------------------------------- + for (to = (unsigned char *)text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '3'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = ' '; + *to++ = '<'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = '}'; + continue; + + case 'T': // Tense + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '4'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = ' '; + *to++ = '('; + bool separate = false; + for (tok = token + 2; *tok; tok++) { + if (separate) { + *to++ = ';'; + *to++ = ' '; + separate = false; + } + switch (*tok) { + case 'G': + case 'H': + for (tok++; *tok; tok++) { + if (isdigit(*tok)) { + *to++ = *tok; + separate = true; + } + else { + tok--; + break; + } + } + break; + default: + for (; *tok; tok++) { + *to++ = *tok; + } + } + } + *to++ = ')'; + *to++ = '}'; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '"'; + *to++ = '"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'F': // footnote begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '('; + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = ' '; + *to++ = '}'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) { + case 'I': // italic start + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'i': // italic end + *to++ = '\\'; + *to++ = 'i'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'B': // bold start + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'b': // bold end + *to++ = '\\'; + *to++ = 'b'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'N': + *to++ = '{'; + if (!strnicmp(token+2, "Symbol", 6)) { + *to++ = '\\'; + *to++ = 'f'; + *to++ = '7'; + *to++ = ' '; + } + continue; + case 'n': + *to++ = '}'; + continue; + case 'S': + *to++ = '{'; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + continue; + case 'R': + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '6'; + *to++ = ' '; + continue; + case 'r': + *to++ = '}'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; + case 'L': // line break + *to++ = '\\'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'n'; + *to++ = 'e'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 'T': + *to++ = '<'; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '2'; + *to++ = '2'; + *to++ = ' '; + continue; + case 't': + *to++ = '}'; + continue; + case 'S': + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + break; + + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp new file mode 100644 index 0000000..cb722bd --- /dev/null +++ b/src/modules/filters/gbfstrongs.cpp @@ -0,0 +1,130 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter decendant to hide or show strongs number + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfstrongs.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif +#include <ctype.h> + +const char GBFStrongs::on[] = "On"; +const char GBFStrongs::off[] = "Off"; +const char GBFStrongs::optName[] = "Strong's Numbers"; +const char GBFStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +GBFStrongs::GBFStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFStrongs::~GBFStrongs() { +} + +void GBFStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart = text, *textEnd = 0; + bool newText = false; + string tmp; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 2; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + if (!option) { + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + if (newText) {textStart = to; newText = false; } + continue; + } + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp new file mode 100644 index 0000000..ca03e71 --- /dev/null +++ b/src/modules/filters/gbfthml.cpp @@ -0,0 +1,463 @@ +/*************************************************************************** + gbfthml.cpp - GBF to ThML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfthml.h> + + +GBFThML::GBFThML() +{ +} + + +char GBFThML::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') + { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': + case 'H': + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'S'; + *to++ = 't'; + *to++ = 'r'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 'g'; + *to++ = 's'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 1; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + + case 'T': // Tense + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'M'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = 'p'; + *to++ = 'h'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'F': // footnote begin + *to++ = '<'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = ' '; + *to++ = 'p'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'o'; + *to++ = 't'; + *to++ = '"'; + *to++ = '>'; + continue; + case 'f': // footnote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) + { + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = 'f'; + *to++ = 'f'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'p'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 'S': + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 's'; + *to++ = 's'; + *to++ = '='; + *to++ = '\"'; + *to++ = 's'; + *to++ = 'e'; + *to++ = 'c'; + *to++ = 'h'; + *to++ = 'e'; + *to++ = 'a'; + *to++ = 'd'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + continue; + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp new file mode 100644 index 0000000..0f85c6c --- /dev/null +++ b/src/modules/filters/greeklexattribs.cpp @@ -0,0 +1,96 @@ +/****************************************************************************** + * + * greeklexattribs - SWFilter decendant to set entry attributes for greek + * lexicons + */ + + +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include <greeklexattribs.h> +#include <swmodule.h> + + +GreekLexAttribs::GreekLexAttribs() { +} + + +char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + if (module->isProcessEntryAttributes()) { + char *from; + bool inAV = false; + string phrase; + string freq; + char val[128], *valto; + char wordstr[7]; + char *currentPhrase = 0, *ch = 0; + char *currentPhraseEnd = 0; + int number = 0; + + + for (from = text; *from; from++) { + if (inAV) { + if (currentPhrase == 0) { + if (isalpha(*from)) + currentPhrase = from; + } + else { + if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) { + if (*from == '<') { + if (!currentPhraseEnd) + currentPhraseEnd = from - 1; + for (; *from && *from != '>'; from++) { + if (!strncmp(from, "value=\"", 7)) { + valto = val; + from += 7; + for (unsigned int i = 0; from[i] != '\"' && i < 127; i++) + *valto++ = from[i]; + *valto = 0; + sprintf(wordstr, "%03d", number+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val; + from += strlen(val); + } + } + continue; + } + + phrase = ""; + phrase.append(currentPhrase, (int)(((currentPhraseEnd)?currentPhraseEnd:from) - currentPhrase)-1); + currentPhrase = from; + while (*from && isdigit(*from)) from++; + freq = ""; + freq.append(currentPhrase, (int)(from - currentPhrase)); + if ((freq.length() > 0) && (phrase.length() > 0)) { + sprintf(wordstr, "%03d", ++number); + if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) { + string tmp = phrase.substr(0, phrase.find_first_of("(")); + phrase.erase(phrase.find_first_of("("), 1); + phrase.erase(phrase.find_first_of(")"), 1); + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase; + phrase = tmp; + } + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase; + module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq; + currentPhrase = 0; + currentPhraseEnd = 0; + } + } + } + if (*from == ';') inAV = false; + + } + else if (!strncmp(from, "AV-", 3)) { + inAV = true; + from+=2; + } + } + } + return 0; +} + + diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp new file mode 100644 index 0000000..75ee998 --- /dev/null +++ b/src/modules/filters/latin1utf16.cpp @@ -0,0 +1,120 @@ +/****************************************************************************** + * + * Latin1UTF16 - SWFilter decendant to convert a Latin-1 character to UTF-16 + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <latin1utf16.h> + +Latin1UTF16::Latin1UTF16() { +} + + +char Latin1UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + switch (*from) { + case 0x80: // '€' + *to++ = 0x20AC; + break; + case 0x82: // '‚' + *to++ = 0x201A; + break; + case 0x83: // 'ƒ' + *to++ = 0x0192; + break; + case 0x84: // '„' + *to++ = 0x201E; + break; + case 0x85: // '…' + *to++ = 0x2026; + break; + case 0x86: // '†' + *to++ = 0x2020; + break; + case 0x87: // '‡' + *to++ = 0x2021; + break; + case 0x88: // 'ˆ' + *to++ = 0x02C6; + break; + case 0x89: // '‰' + *to++ = 0x2030; + break; + case 0x8A: // 'Š' + *to++ = 0x0160; + break; + case 0x8B: // '‹' + *to++ = 0x2039; + break; + case 0x8C: // 'Œ' + *to++ = 0x0152; + break; + case 0x8E: // 'Ž' + *to++ = 0x017D; + break; + case 0x91: // '‘' + *to++ = 0x2018; + break; + case 0x92: // '’' + *to++ = 0x2019; + break; + case 0x93: // '“' + *to++ = 0x201C; + break; + case 0x94: // '”' + *to++ = 0x201D; + break; + case 0x95: // '•' + *to++ = 0x2022; + break; + case 0x96: // '–' + *to++ = 0x2013; + break; + case 0x97: // '—' + *to++ = 0x2014; + break; + case 0x98: // '˜' + *to++ = 0x02DC; + break; + case 0x99: // '™' + *to++ = 0x2122; + break; + case 0x9A: // 'š' + *to++ = 0x0161; + break; + case 0x9B: // '›' + *to++ = 0x203A; + break; + case 0x9C: // 'œ' + *to++ = 0x0153; + break; + case 0x9E: // 'ž' + *to++ = 0x017E; + break; + case 0x9F: // 'Ÿ' + *to++ = 0x0178; + break; + default: + *to++ = (unsigned short)*from; + } + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp new file mode 100644 index 0000000..91af8dc --- /dev/null +++ b/src/modules/filters/latin1utf8.cpp @@ -0,0 +1,179 @@ +/****************************************************************************** + * + * Latin1UTF8 - SWFilter decendant to convert a Latin-1 character to UTF-8 + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <latin1utf8.h> +#include <swmodule.h> + +Latin1UTF8::Latin1UTF8() { +} + + +char Latin1UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + + len = strlen(text) + 1; + if (len == maxlen + 1) + maxlen = (maxlen + 1) * FILTERPAD; + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; // ------------------------------- + + + + for (to = (unsigned char*)text; *from; from++) { + if (*from < 0x80) { + *to++ = *from; + } + else if (*from < 0xc0) { + switch(*from) { + case 0x80: // '€' + *to++ = 0xe2; // 'â' + *to++ = 0x82; // '‚' + *to++ = 0xac; // '¬' + break; + case 0x82: // '‚' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9a; // 'š' + break; + case 0x83: // 'ƒ' + *to++ = 0xc6; // 'Æ' + *to++ = 0x92; // '’' + break; + case 0x84: // '„' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9e; // 'ž' + break; + case 0x85: // '…' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa6; // '¦' + break; + case 0x86: // '†' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa0; // ' ' + break; + case 0x87: // '‡' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa1; // '¡' + break; + case 0x88: // 'ˆ' + *to++ = 0xcb; // 'Ë' + *to++ = 0x86; // '†' + break; + case 0x89: // '‰' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb0; // '°' + break; + case 0x8A: // 'Š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa0; // ' ' + break; + case 0x8B: // '‹' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb9; // '¹' + break; + case 0x8C: // 'Œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x92; // '’' + break; + case 0x8E: // 'Ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbd; // '½' + break; + case 0x91: // '‘' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x98; // '˜' + break; + case 0x92: // '’' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x99; // '™' + break; + case 0x93: // '“' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9c; // 'œ' + break; + case 0x94: // '”' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9d; // '' + break; + case 0x95: // '•' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa2; // '¢' + break; + case 0x96: // '–' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x93; // '“' + break; + case 0x97: // '—' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x94; // '”' + break; + case 0x98: // '˜' + *to++ = 0xcb; // 'Ë' + *to++ = 0x9c; // 'œ' + break; + case 0x99: // '™' + *to++ = 0xe2; // 'â' + *to++ = 0x84; // '„' + *to++ = 0xa2; // '¢' + break; + case 0x9A: // 'š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa1; // '¡' + break; + case 0x9B: // '›' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xba; // 'º' + break; + case 0x9C: // 'œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x93; // '“' + break; + case 0x9E: // 'ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbe; // '¾' + break; + case 0x9F: // 'Ÿ' + *to++ = 0xc5; // 'Å' + *to++ = 0xb8; // '¸' + break; + default: + *to++ = 0xC2; + *to++ = *from; + } + } + else { + *to++ = 0xC3; + *to++ = (*from - 0x40); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp new file mode 100644 index 0000000..96fc4d8 --- /dev/null +++ b/src/modules/filters/plainfootnotes.cpp @@ -0,0 +1,102 @@ +/*************************************************************************** + plainfootnotes.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <plainfootnotes.h> +#include <swkey.h> + +#include <stdlib.h> +#include <string.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +const char PLAINFootnotes::on[] = "On"; +const char PLAINFootnotes::off[] = "Off"; +const char PLAINFootnotes::optName[] = "Footnotes"; +const char PLAINFootnotes::optTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist"; + +PLAINFootnotes::PLAINFootnotes(){ + option = false; + options.push_back(on); + options.push_back(off); +} + +PLAINFootnotes::~PLAINFootnotes(){ +} + + +void PLAINFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *PLAINFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + + +char PLAINFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char token[2048]; + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + + if (!option) { // if we don't want footnotes + char *to, *from; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) + { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '{') // Footnote start + { + hide = true; + continue; + } + if (*from == '}') // Footnote end + { + hide=false; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} + diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp new file mode 100644 index 0000000..fefb029 --- /dev/null +++ b/src/modules/filters/plainhtml.cpp @@ -0,0 +1,134 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <plainhtml.h> + + +PLAINHTML::PLAINHTML() +{ +} + + +char PLAINHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + int count = 0; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if ((*from == '\n') && (from[1] == '\n')) // paragraph + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + from++; + continue; + } else { + if ((*from == '\n')) // && (from[1] != '\n')) // new line + { + *to++ = '<'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + } + + if (*from == '{') { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = ' '; + *to++ = 'C'; + *to++ = 'O'; + *to++ = 'L'; + *to++ = 'O'; + *to++ = 'R'; + *to++ = '='; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + + *to++ = '<'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + *to++ = ' '; + *to++ = '('; + continue; + } + + if (*from == '}') + { + *to++ = ')'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + + *to++ = '<'; + *to++ = '/'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = '>'; + continue; + } + + if ((*from == ' ') && (count > 5000)) + { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + count = 0; + continue; + } + + *to++ = *from; + count++; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp new file mode 100644 index 0000000..f0b842b --- /dev/null +++ b/src/modules/filters/rtfhtml.cpp @@ -0,0 +1,99 @@ +/*************************************************************************** + rtfhtml.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <rtfhtml.h> + + +RTFHTML::RTFHTML() { + +} + + +char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + bool center = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') // a RTF command + { + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd')) + { // switch all modifier off + if (center) + { + *to++ = '<'; + *to++ = '/'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = false; + } + from += 4; + continue; + } + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r')) + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + *to++ = '\n'; + from += 3; + continue; + } + if (from[1] == ' ') + { + from += 1; + continue; + } + if ((from[1] == 'q') && (from[2] == 'c')) // center on + { + if (!center) + { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = true; + } + from += 2; + continue; + } + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwphtml.cpp b/src/modules/filters/rwphtml.cpp new file mode 100644 index 0000000..6f8ae4f --- /dev/null +++ b/src/modules/filters/rwphtml.cpp @@ -0,0 +1,187 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <rwphtml.h> + +RWPHTML::RWPHTML() +{ +} + + +char RWPHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + signed char greek_str[500]; + bool inverse = false; + bool first_letter = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } else + from = text; + for (to = text; *from; from++) { + if (*from == '\\') { + ++from; + int i=0; + first_letter = true; + greek_str[0] = '\0'; + while (*from != '\\') { /* get the greek word or phrase */ + greek_str[i++] = *from; + greek_str[i + 1] = '\0'; + from++; + } /* convert to symbol font as best we can */ + strcpy(to,"<I> </I><FONT FACE=\"symbol\">"); + to += strlen(to); + for (int j = 0; j < i; j++) { + if ((first_letter) + && (greek_str[j] == 'h')) { + if (greek_str[j + 1] == 'o') { + *to++ = 'o'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'a') { + *to++ = 'a'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'w') { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'u') { + *to++ = 'u'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -109) { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -120) { + *to++ = 'h'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'i') { + *to++ = 'i'; + first_letter = false; + ++j; + continue; + }else if (greek_str[j + 1] == 'e') { + *to++ = 'e'; + first_letter = false; + ++j; + continue; + } + first_letter = false; + } + if ((greek_str[j] == 't') + && (greek_str[j + 1] == 'h')) { + *to++ = 'q'; + ++j; + continue; + } + if ((greek_str[j] == 'c') + && (greek_str[j + 1] == 'h')) { + *to++ = 'c'; + ++j; + continue; + } + if ((greek_str[j] == 'p') + && (greek_str[j + 1] == 'h')) { + ++j; + *to++ = 'f'; + continue; + } + if (greek_str[j] == -120) { + *to++ = 'h'; + continue; + } + if (greek_str[j] == -125) { + *to++ = 'a'; + continue; + } + if (greek_str[j] == -109) { + if(greek_str[j+1] == 'i') ++j; + *to++ = 'w'; + continue; + } + if (greek_str[j] == ' ') + first_letter = true; + if (greek_str[j] == 's') { + if(isalpha(greek_str[j + 1])) *to++ = 's'; + else if(!isprint(greek_str[j] )) *to++ = 's'; + else *to++ = 'V'; + continue; + } + if (greek_str[j] == '\'') { + continue; + } + *to++ = greek_str[j]; + } + strcpy(to,"</FONT><I> </I>"); + to += strlen(to); + continue; + } + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + strcpy(to,"<FONT COLOR=#0000FF>"); + to += strlen(to); + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + strcpy(to,"</FONT>"); + to += strlen(to); + continue; + } + if (*from == '{') { + strcpy(to,"<BR><STRONG>"); + to += strlen(to); + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + strcpy(to,"<P>"); + to += strlen(to); + } + continue; + } + if (*from == '}') { + strcpy(to," </STRONG>"); + to += strlen(to); + continue; + } + if ((*from == '\n') && (from[1] == '\n')) { + strcpy(to,"<P>"); + to += strlen(to); + continue; + } + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwprtf.cpp b/src/modules/filters/rwprtf.cpp new file mode 100644 index 0000000..8f7b074 --- /dev/null +++ b/src/modules/filters/rwprtf.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * rwprtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include <stdlib.h> +#include <string.h> +#include <rwprtf.h> + + +RWPRTF::RWPRTF() { + +} + + +char RWPRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + bool ingreek = false; + bool inverse = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') { + if(!ingreek) { + ingreek = true; + *to++ = '['; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = '8'; + *to++ = ' '; + continue; + } + else { + ingreek = false; + *to++ = '}'; + *to++ = ']'; + continue; + } + } + + if ((ingreek) && ((*from == 'h') || (*from == 'H'))) + continue; // 'h's are mostly useless in RWP translitterations. The greek is more correct without them. + + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '2'; + *to++ = ' '; + *to++ = '#'; + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + *to++ = '|'; + *to++ = '}'; + continue; + } + + if (*from == '{') { + *to++ = '{'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = ' '; + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + } + continue; + } + + if (*from == '}') { + // this is kinda neat... DO NOTHING + } + if ((*from == '\n') && (from[1] == '\n')) { + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp new file mode 100644 index 0000000..d0d5ceb --- /dev/null +++ b/src/modules/filters/scsuutf8.cpp @@ -0,0 +1,220 @@ +/****************************************************************************** + * + * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8 + * + */ + + +/* This class is based on: + * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl + * on Andrea's balcony in North Amsterdam on 1998-08-04 + * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion + * to correct the haphazard "if" after UQU to "else if" on 1998-10-01 + * + * This is a deflator to UTF-8 output for input compressed in SCSU, + * the (Reuters) Standard Compression Scheme for Unicode as described + * in http://www.unicode.org/unicode/reports/tr6.html + */ + +#include <stdlib.h> +#include <stdio.h> +#include <swmodule.h> + +#include <scsuutf8.h> + +SCSUUTF8::SCSUUTF8() { +} + + +unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text) +{ + /* join UTF-16 surrogates without any pairing sanity checks */ + + static int d; + + if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; } + if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; } + + /* output one character as UTF-8 multibyte sequence */ + + if (uchar < 0x80) { + *text++ = c; + } + else if (uchar < 0x800) { + *text++ = 0xc0 | uchar >> 6; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x10000) { + *text++ = 0xe0 | uchar >> 12; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *text++ = 0xf0 | uchar >> 18; + *text++ = 0x80 | uchar >> 12 & 0x3f; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + + return text; +} + +char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + unsigned long buflen = len * FILTERPAD; + char active = 0, mode = 0; + + static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; + static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; + static unsigned short win[256] = { + 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, + 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, + 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, + 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, + 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, + 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, + 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, + 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, + 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, + 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, + 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, + 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, + 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, + 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, + 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, + 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, + 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, + 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, + 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, + 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, + 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60 + }; + + if (!len) + return 0; + + memmove(&text[buflen - len], text, len); + from = (unsigned char*)&text[buflen - len]; + to = (unsigned char *)text; + + // ------------------------------- + + for (int i = 0; i < len;) { + + + if (i >= len) break; + c = from[i++]; + + if (c >= 0x80) + { + to = UTF8Output (c - 0x80 + slide[active], to); + } + else if (c >= 0x20 && c <= 0x7F) + { + to = UTF8Output (c, to); + } + else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) + { + to = UTF8Output (c, to); + } + else if (c >= 0x1 && c <= 0x8) /* SQn */ + { + if (i >= len) break; + /* single quote */ d = from[i++]; + + to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : + d - 0x80 + slide [c - 0x1], to); + } + else if (c >= 0x10 && c <= 0x17) /* SCn */ + { + /* change window */ active = c - 0x10; + } + else if (c >= 0x18 && c <= 0x1F) /* SDn */ + { + /* define window */ active = c - 0x18; + if (i >= len) break; + slide [active] = win [from[i++]]; + } + else if (c == 0xB) /* SDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); + } + else if (c == 0xE) /* SQU */ + { + if (i >= len) break; + /* SQU */ c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF) /* SCU */ + { + /* change to Unicode mode */ mode = 1; + + while (mode) + { + if (i >= len) break; + c = from[i++]; + + if (c <= 0xDF || c >= 0xF3) + { + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF0) /* UQU */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c >= 0xE0 && c <= 0xE7) /* UCn */ + { + active = c - 0xE0; mode = 0; + } + else if (c >= 0xE8 && c <= 0xEF) /* UDn */ + { + if (i >= len) break; + slide [active=c-0xE8] = win [from[i++]]; mode = 0; + } + else if (c == 0xF1) /* UDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = + 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; + } + } + } + + + } + + *to++ = 0; + *to = 0; + return 0; +} + diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp new file mode 100644 index 0000000..dd5fe81 --- /dev/null +++ b/src/modules/filters/swbasicfilter.cpp @@ -0,0 +1,300 @@ +/****************************************************************************** + * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter + * impl that provides some basic methods that + * many filters will need and can use as a starting + * point. + * + * $Id: swbasicfilter.cpp,v 1.18 2002/06/06 21:08:47 scribe Exp $ + * + * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <swbasicfilter.h> +#include <stdio.h> +#include <stdarg.h> + +SWBasicFilter::SWBasicFilter() { + tokenStart = 0; + tokenEnd = 0; + escStart = 0; + escEnd = 0; + + setTokenStart("<"); + setTokenEnd(">"); + setEscapeStart("&"); + setEscapeEnd(";"); + + escStringCaseSensitive = false; + tokenCaseSensitive = false; + passThruUnknownToken = false; + passThruUnknownEsc = false; +} + + +void SWBasicFilter::setPassThruUnknownToken(bool val) { + passThruUnknownToken = val; +} + + +void SWBasicFilter::setPassThruUnknownEscapeString(bool val) { + passThruUnknownEsc = val; +} + + +void SWBasicFilter::setTokenCaseSensitive(bool val) { + tokenCaseSensitive = val; +} + + +void SWBasicFilter::setEscapeStringCaseSensitive(bool val) { + escStringCaseSensitive = val; +} + + +SWBasicFilter::~SWBasicFilter() { + if (tokenStart) + delete [] tokenStart; + + if (tokenEnd) + delete [] tokenEnd; + + if (escStart) + delete [] escStart; + + if (escEnd) + delete [] escEnd; +} + + +void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!tokenCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + tokenSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else tokenSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!escStringCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + escSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else escSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +bool SWBasicFilter::substituteToken(char **buf, const char *token) { + DualStringMap::iterator it; + + if (!tokenCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, token); + toupperstr(tmp); + it = tokenSubMap.find(tmp); + delete [] tmp; + } else + it = tokenSubMap.find(token); + + if (it != tokenSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) { + DualStringMap::iterator it; + + if (!escStringCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, escString); + toupperstr(tmp); + it = escSubMap.find(tmp); + delete [] tmp; + } else + it = escSubMap.find(escString); + + if (it != escSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::handleToken(char **buf, const char *token, DualStringMap &userData) { + return substituteToken(buf, token); +} + + +bool SWBasicFilter::handleEscapeString(char **buf, const char *escString, DualStringMap &userData) { + return substituteEscapeString(buf, escString); +} + + +void SWBasicFilter::setEscapeStart(const char *escStart) { + stdstr(&(this->escStart), escStart); +} + + +void SWBasicFilter::setEscapeEnd(const char *escEnd) { + stdstr(&(this->escEnd), escEnd); +} + + +void SWBasicFilter::setTokenStart(const char *tokenStart) { + stdstr(&(this->tokenStart), tokenStart); +} + + +void SWBasicFilter::setTokenEnd(const char *tokenEnd) { + stdstr(&(this->tokenEnd), tokenEnd); +} + + +char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + this->key = key; + this->module = module; + char *to, *from, token[4096]; + int tokpos = 0; + bool intoken = false; + int len; + bool inEsc = false; + char escStartLen = strlen(escStart); + char escEndLen = strlen(escEnd); + char escStartPos = 0, escEndPos = 0; + char tokenStartLen = strlen(tokenStart); + char tokenEndLen = strlen(tokenEnd); + char tokenStartPos = 0, tokenEndPos = 0; + DualStringMap userData; + string lastTextNode; + + bool suspendTextPassThru = false; + userData["suspendTextPassThru"] = "false"; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + resultBuffer = text; + + for (to = text; *from; from++) { + if (*from == tokenStart[tokenStartPos]) { + if (tokenStartPos == (tokenStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = false; + } + else tokenStartPos++; + continue; + } + + if (*from == escStart[escStartPos]) { + if (escStartPos == (escStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = true; + } + else escStartPos++; + continue; + } + + if (inEsc) { + if (*from == escEnd[escEndPos]) { + if (escEndPos == (escEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleEscapeString(&to, token, userData)) && (passThruUnknownEsc)) { + pushString(&to, escStart); + pushString(&to, token); + pushString(&to, escEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (!inEsc) { + if (*from == tokenEnd[tokenEndPos]) { + if (tokenEndPos == (tokenEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleToken(&to, token, userData)) && (passThruUnknownToken)) { + pushString(&to, tokenStart); + pushString(&to, token); + pushString(&to, tokenEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!suspendTextPassThru) + *to++ = *from; + lastTextNode += *from; + } + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp new file mode 100644 index 0000000..d9b1f0e --- /dev/null +++ b/src/modules/filters/thmlfootnotes.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlfootnotes - SWFilter decendant to hide or show footnotes + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlfootnotes.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLFootnotes::on[] = "On"; +const char ThMLFootnotes::off[] = "Off"; +const char ThMLFootnotes::optName[] = "Footnotes"; +const char ThMLFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +ThMLFootnotes::ThMLFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLFootnotes::~ThMLFootnotes() { +} + +void ThMLFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "note", 4)) { + hide = true; + continue; + } + else if (!strncmp(token, "/note", 5)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp new file mode 100644 index 0000000..66d9a20 --- /dev/null +++ b/src/modules/filters/thmlgbf.cpp @@ -0,0 +1,330 @@ +/*************************************************************************** + thmlgbf.cpp - ThML to GBF filter + ------------------- + begin : 1999-10-28 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlgbf.h> + + +ThMLGBF::ThMLGBF() +{ +} + + +char ThMLGBF::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + bool sechead = false; + bool title = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = '<'; + *to++ = 'W'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'T'; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'X'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'x'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "note", 4)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'F'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'f'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sup", 3)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'S'; + *to++ = '>'; + } + else if (!strncmp(token, "/sup", 4)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 's'; + *to++ = '>'; + } + else if (!strnicmp(token, "font color=#ff0000", 18)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'r'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'S'; + *to++ = '>'; + sechead = true; + continue; + } + else if (sechead && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 's'; + *to++ = '>'; + sechead = false; + continue; + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'T'; + *to++ = '>'; + title = true; + continue; + } + else if (title && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 't'; + *to++ = '>'; + title = false; + continue; + } + else if (!strnicmp(token, "br", 2)) { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'L'; + *to++ = '>'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'I'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'B'; + *to++ = '>'; + continue; + case '/': + switch(token[1]) { + case 'P': + case 'p': + *to++ = '<'; + *to++ = 'C'; + *to++ = 'M'; + *to++ = '>'; + continue; + case 'I': + case 'i': // italic end + *to++ = '<'; + *to++ = 'F'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp new file mode 100644 index 0000000..00b8a23 --- /dev/null +++ b/src/modules/filters/thmlheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * thmlheadings - SWFilter decendant to hide or show headings + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlheadings.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLHeadings::on[] = "On"; +const char ThMLHeadings::off[] = "Off"; +const char ThMLHeadings::optName[] = "Headings"; +const char ThMLHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +ThMLHeadings::ThMLHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLHeadings::~ThMLHeadings() { +} + +void ThMLHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "div class=\"sechead\"", 19)) { + hide = true; + continue; + } + if (!strnicmp(token, "div class=\"title\"", 17)) { + hide = true; + continue; + } + else if (hide && !strnicmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp new file mode 100644 index 0000000..9cb8679 --- /dev/null +++ b/src/modules/filters/thmlhtml.cpp @@ -0,0 +1,211 @@ +/*************************************************************************** + thmlhtml.cpp - ThML to HTML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlhtml.h> +#include <swmodule.h> + + +ThMLHTML::ThMLHTML() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("/scripRef", " </a>"); + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); +} + + +bool ThMLHTML::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, "<small><em>"); + for (const char *tok = token + 5; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</em></small>"); + } + else if (token[27] == 'T') { + pushString(buf, "<small><i>"); + for (unsigned int i = 29; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, "</i></small>"); + } + } + else if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + pushString(buf, "<small><em>"); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, "</em></small>"); + } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "<small><em>("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")</em></small>"); + } + else if (!strncmp(token, "scripRef", 8)) { + pushString(buf, "<a href=\""); + for (const char *tok = token + 9; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if(!strncmp(token, "note", 4)) { + pushString(buf, " <font color=\"#800000\"><small>("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp new file mode 100644 index 0000000..ce7e3fd --- /dev/null +++ b/src/modules/filters/thmlhtmlhref.cpp @@ -0,0 +1,269 @@ +/*************************************************************************** + thmlhtmlhref.cpp - ThML to HTML filter with hrefs + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlhtmlhref.h> +#include <swmodule.h> + + +ThMLHTMLHREF::ThMLHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); + addTokenSubstitute("/scripture", "</i> "); +} + + +bool ThMLHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync ", 5)) { + pushString(buf, "<a href=\""); + for (tok = token + 5; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + + //scan for value and add it to the buffer + for (tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + pushString(buf, "</a>"); + } + + else if (!strncmp(token, "scripture ", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "<i>"); + } + + else if (!strncmp(token, "scripRef p", 10) || !strncmp(token, "scripRef v", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "<a href=\""); + for (const char *tok = token + 9; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + // we're starting a scripRef like "<scripRef>John 3:16</scripRef>" + else if (!strcmp(token, "scripRef")) { + userData["inscriptRef"] = "false"; + // let's stop text from going to output + userData["suspendTextPassThru"] = "true"; + } + + // we've ended a scripRef + else if (!strcmp(token, "/scripRef")) { + if (userData["inscriptRef"] == "true") { // like "<scripRef passage="John 3:16">John 3:16</scripRef>" + userData["inscriptRef"] = "false"; + pushString(buf, "</a>"); + } + + else { // like "<scripRef>John 3:16</scripRef>" + pushString(buf, "<a href=\"passage="); + //char *strbuf = (char *)userData["lastTextNode"].c_str(); + pushString(buf, userData["lastTextNode"].c_str()); + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + pushString(buf, userData["lastTextNode"].c_str()); + // let's let text resume to output again + userData["suspendTextPassThru"] = "false"; + pushString(buf, "</a>"); + } + } + + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "<br /><b><i>"); + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "<br /><b><i>"); + } + else if (!strncmp(token, "/div", 4)) { + if (userData["SecHead"] == "true") { + pushString(buf, "</i></b><br />"); + userData["SecHead"] = "false"; + } + } + + else if (!strncmp(token, "sync type=\"Strongs\" value=\"T", 28)) { + pushString(buf, "<a href=\""); + for (tok = token + 5; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 29; *(tok+2); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>"); + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " <small><font color=\"#800000\">("); + } + else { + *(*buf)++ = '<'; + for (const char *tok = token; *tok; tok++) + *(*buf)++ = *tok; + *(*buf)++ = '>'; + //return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp new file mode 100644 index 0000000..33856db --- /dev/null +++ b/src/modules/filters/thmllemma.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * thmllemma - SWFilter decendant to hide or show lemmas + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmllemma.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLLemma::on[] = "On"; +const char ThMLLemma::off[] = "Off"; +const char ThMLLemma::optName[] = "Lemmas"; +const char ThMLLemma::optTip[] = "Toggles Lemmas On and Off if they exist"; + +ThMLLemma::ThMLLemma() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLLemma::~ThMLLemma() { +} + +void ThMLLemma::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLLemma::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLLemma::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want lemmas + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"lemma\" ", 18)) { // Lemma + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a lemma token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp new file mode 100644 index 0000000..f95bede --- /dev/null +++ b/src/modules/filters/thmlmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * thmlmorph - SWFilter decendant to hide or show morph tags + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlmorph.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLMorph::on[] = "On"; +const char ThMLMorph::off[] = "Off"; +const char ThMLMorph::optName[] = "Morphological Tags"; +const char ThMLMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +ThMLMorph::ThMLMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLMorph::~ThMLMorph() { +} + +void ThMLMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"morph\" ", 18)) { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlolb.cpp b/src/modules/filters/thmlolb.cpp new file mode 100644 index 0000000..2b31fab --- /dev/null +++ b/src/modules/filters/thmlolb.cpp @@ -0,0 +1,243 @@ +/*************************************************************************** + thmlolb.cpp - ThML to OLB filter + ------------------- + begin : 2001-05-10 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlolb.h> + + +ThMLOLB::ThMLOLB() +{ +} + + +char ThMLOLB::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + int i; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) + { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '#'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = ' '; + continue; + } + else if (!strncmp(token, "note ", 5)) { + *to++ = '{'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '}'; + continue; + } + else if (!strnicmp(token, "font", 4)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + case '/': + switch(token[1]) { + case 'I': + case 'i': // italic end + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2047) + token[tokpos++] = *from; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlosis.cpp b/src/modules/filters/thmlosis.cpp new file mode 100644 index 0000000..54cd002 --- /dev/null +++ b/src/modules/filters/thmlosis.cpp @@ -0,0 +1,332 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter decendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <thmlosis.h> +#include <swmodule.h> +#include <versekey.h> +#include <stdarg.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +ThMLOSIS::ThMLOSIS() { +} + + +ThMLOSIS::~ThMLOSIS() { +} + + +char ThMLOSIS::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char buf[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart, *textEnd; + char *wordStart, *wordEnd; + bool newText = false; + bool newWord = false; + string tmp; + bool suspendTextPassThru = false; + bool keepToken = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + textStart = from; + wordStart = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; + wordEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + + + while (wordStart < (text+maxlen)) { +// if (strchr(" ,;.?!()'\"", *wordStart)) + if (strchr(";,: .?!()'\"", *wordStart)) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;:.?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + pushString(&to, convertToOSIS(tmp.c_str(), key)); + suspendTextPassThru = false; + } + + // Footnote + if (!strcmp(token, "note")) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strcmp(token, "/note")) { + tmp = "<note type=\"x-StudyNote\"><notePart type=\"x-MainText\">"; + tmp.append(textStart, (int)(textEnd - textStart)+1); + tmp += "</notePart></note>"; + pushString(&to, tmp.c_str()); + suspendTextPassThru = false; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + pushString(&to, "<figure src=\""); + const char *c; + for (c = src;((*c) && (*c != '"')); c++); + + /* uncomment for SWORD absolute path logic + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + end of uncomment for asolute path logic */ + + for (c++;((*c) && (*c != '"')); c++) + *to++ = *c; + + pushString(&to, "\" />"); + return true; + } + + // Strongs numbers + else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + strstrip(val); + sprintf(buf, "<w lemma=\"x-Strong:%s\">", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; +// tmp = ""; +// tmp.append(textStart, (int)(wordEnd - wordStart)); +// module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + + // Morphology + else if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + strstrip(val); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + strstrip(val); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + if (!strncmp(wordStart, "<w ", 3)) { + + const char *cls = "Unknown", *morph; + + if (module->getEntryAttributes()["Word"][wordstr]["Morph"].size() > 0) { + if (module->getEntryAttributes()["Word"][wordstr]["MorphClass"].size() > 0) + cls = module->getEntryAttributes()["Word"][wordstr]["MorphClass"].c_str(); + morph = module->getEntryAttributes()["Word"][wordstr]["Morph"].c_str(); + + sprintf(buf, "morph=\"x-%s:%s\" ", cls, morph); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + } + } + + if (!keepToken) { // if we don't want strongs + if (strchr(" ,:;.?!()'\"", from[1])) { + if (lastspace) + to--; + } + if (newText) {textStart = from+1; newText = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newWord = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); } + if (!suspendTextPassThru) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + char ref[254]; + if (vkey->Verse()) + sprintf(ref, "<verseStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Book()) + sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + else *ref = 0; + if (*ref) { + memmove(text+strlen(ref), text, maxlen-strlen(ref)-1); + memcpy(text, ref, strlen(ref)); + to+=strlen(ref); + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + sprintf(ref, "<verseEnd ref=\"%s\" />", vkey->getOSISRef()); + pushString(&to, ref); + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); + sprintf(ref, "<chapterEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); + sprintf(ref, "<bookEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + } + } + } + + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + } + } + *to++ = 0; + *to = 0; + return 0; +} + + +void ThMLOSIS::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +const char *ThMLOSIS::convertToOSIS(const char *inRef, const SWKey *key) { + static string outRef; + + outRef = ""; + + VerseKey defLanguage; + ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true); + const char *startFrag = inRef; + for (int i = 0; i < verses.Count(); i++) { + VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i)); + char buf[5120]; + char frag[800]; + if (element) { + memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1); + frag[((const char *)element->userData - startFrag) + 1] = 0; + startFrag = (const char *)element->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\" refEnd=\"%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag); + } + else { + memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1); + frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0; + startFrag = (const char *)verses.GetElement(i)->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag); + } + outRef+=buf; + } + return outRef.c_str(); +} diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp new file mode 100644 index 0000000..5609f16 --- /dev/null +++ b/src/modules/filters/thmlplain.cpp @@ -0,0 +1,201 @@ +/****************************************************************************** + * + * thmlplain - SWFilter decendant to strip out all ThML tags or convert to + * ASCII rendered symbols. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlplain.h> + + +ThMLPlain::ThMLPlain() { +} + + +char ThMLPlain::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == 10 || *from == 13) + from++; + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = ' '; + *to++ = '<'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = ' '; + *to++ = '('; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = ')'; + continue; + } + if (!strncmp("note", token, 4)) { + *to++ = ' '; + *to++ = '('; + } + else if (!strncmp("br", token, 2)) + *to++ = '\n'; + else if (!strncmp("/p", token, 2)) + *to++ = '\n'; + else if (!strncmp("/note", token, 5)) { + *to++ = ')'; + *to++ = ' '; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + + return 0; +} + + diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp new file mode 100644 index 0000000..8b603b0 --- /dev/null +++ b/src/modules/filters/thmlrtf.cpp @@ -0,0 +1,222 @@ +/*************************************************************************** + thmlrtf.cpp - ThML to RTF filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlrtf.h> + + +ThMLRTF::ThMLRTF() +{ + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("/note", ") }"); + + addTokenSubstitute("br", "\\line "); + addTokenSubstitute("br /", "\\line "); + addTokenSubstitute("i", "{\\i1 "); + addTokenSubstitute("/i", "}"); + addTokenSubstitute("b", "{\\b1 "); + addTokenSubstitute("/b", "}"); + addTokenSubstitute("p", "\\par "); + + //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant + addTokenSubstitute("BR", "\\line "); + addTokenSubstitute("I", "{\\i1 "); + addTokenSubstitute("/I", "}"); + addTokenSubstitute("B", "{\\b1 "); + addTokenSubstitute("/B", "}"); + addTokenSubstitute("P", "\\par "); +} + +bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, " {\\fs15 <"); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ">}"); + } + else if (token[27] == 'T') { + pushString(buf, " {\\fs15 ("); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + } + else if (!strncmp(token, "sync type=\"morph\" ", 18)) { + pushString(buf, " {\\fs15 ("); + for (const char *tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + + pushString(buf, ")}"); + } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "{\\fs15 ("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + else if (!strncmp(token, "scripRef", 8)) { +// pushString(buf, "{\\cf2 #"); + pushString(buf, "<a href=\"\">"); + } + else if (!strncmp(token, "/scripRef", 9)) { + pushString(buf, "</a>"); + } + else if (!strncmp(token, "div", 3)) { + *(*buf)++ = '{'; + if (!strncmp(token, "div class=\"title\"", 17)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + } + else if (!strncmp(token, "/div", 4)) { + *(*buf)++ = '}'; + if (userData["sechead"] == "true") { + pushString(buf, "\\par "); + userData["sechead"] == "false"; + } + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " {\\i1\\fs15 ("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp new file mode 100644 index 0000000..23edd6d --- /dev/null +++ b/src/modules/filters/thmlscripref.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlscripref - SWFilter decendant to hide or show scripture references + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlscripref.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLScripref::on[] = "On"; +const char ThMLScripref::off[] = "Off"; +const char ThMLScripref::optName[] = "Scripture Cross-references"; +const char ThMLScripref::optTip[] = "Toggles Scripture Cross-references On and Off if they exist"; + + +ThMLScripref::ThMLScripref() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLScripref::~ThMLScripref() { +} + +void ThMLScripref::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLScripref::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLScripref::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want scriprefs + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "scripRef", 8)) { + hide = true; + continue; + } + else if (!strnicmp(token, "/scripRef", 9)) { + hide = false; + continue; + } + + // if not a scripref token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp new file mode 100644 index 0000000..ab5a3eb --- /dev/null +++ b/src/modules/filters/thmlstrongs.cpp @@ -0,0 +1,156 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter decendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <thmlstrongs.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif +#include <ctype.h> + +const char ThMLStrongs::on[] = "On"; +const char ThMLStrongs::off[] = "Off"; +const char ThMLStrongs::optName[] = "Strong's Numbers"; +const char ThMLStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +ThMLStrongs::ThMLStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLStrongs::~ThMLStrongs() { +} + +void ThMLStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart = text, *textEnd = 0; + string tmp; + bool newText = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + + if (!option) { // if we don't want strongs + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + if (newText) {textStart = to; newText = false; } + continue; + } + } + if (module->isProcessEntryAttributes()) { + if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp new file mode 100644 index 0000000..a6a52cf --- /dev/null +++ b/src/modules/filters/thmlvariants.cpp @@ -0,0 +1,185 @@ +/****************************************************************************** + * + * thmlvariants - SWFilter decendant to hide or show textual variants + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlvariants.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLVariants::primary[] = "Primary Reading"; +const char ThMLVariants::secondary[] = "Secondary Reading"; +const char ThMLVariants::all[] = "All Readings"; + +const char ThMLVariants::optName[] = "Textual Variants"; +const char ThMLVariants::optTip[] = "Switch between Textual Variants modes"; + + +ThMLVariants::ThMLVariants() { + option = false; + options.push_back(primary); + options.push_back(secondary); + options.push_back(all); +} + + +ThMLVariants::~ThMLVariants() { +} + +void ThMLVariants::setOptionValue(const char *ival) +{ + if (!stricmp(ival, primary)) option = 0; + else if (!stricmp(ival, secondary)) option = 1; + else option = 2; +} + +const char *ThMLVariants::getOptionValue() +{ + if (option == 0) { + return primary; + } + else if (option == 1) { + return secondary; + } + else { + return all; + } +} + +char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option == 0) { //we want primary only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"variant\" class=\"2\"", 28)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + else if (option == 1) { //we want variant only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"variant\" class=\"1\"", 28)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + return 0; +} + + + + + + diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp new file mode 100644 index 0000000..6313792 --- /dev/null +++ b/src/modules/filters/unicodertf.cpp @@ -0,0 +1,75 @@ +/****************************************************************************** + * + * unicodertf - SWFilter decendant to convert a double byte unicode file + * to RTF tags + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <unicodertf.h> + +UnicodeRTF::UnicodeRTF() { +} + + +char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from, *maxto; + int len; + char digit[10]; + short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768) + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + maxto =(unsigned char*)text + maxlen; + + // ------------------------------- + bool lastUni = false; + for (to = (unsigned char*)text; *from && (to <= maxto); from++) { + ch = 0; + if ((*from & 128) != 128) { +// if ((*from == ' ') && (lastUni)) +// *to++ = ' '; + *to++ = *from; + lastUni = false; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '\\'; + *to++ = 'u'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = '?'; + lastUni = true; + } + + if (to != maxto) { + *to++ = 0; + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp new file mode 100644 index 0000000..5a7719f --- /dev/null +++ b/src/modules/filters/utf16utf8.cpp @@ -0,0 +1,95 @@ +/****************************************************************************** + * + * UTF16UTF8 - SWFilter decendant to convert UTF-16 to UTF-8 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf16utf8.h> + +UTF16UTF8::UTF16UTF8() { +} + + +char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned short *from; + unsigned char *to; + + int len; + unsigned long uchar; + unsigned short schar; + + len = 0; + from = (unsigned short*) text; + while (*from) { + len += 2; + from++; + } + + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned short*)&text[maxlen - len]; + } + else + from = (unsigned short*)text; + + + // ------------------------------- + + for (to = (unsigned char*)text; *from; from++) { + uchar = 0; + + if (*from < 0xD800 || *from > 0xDFFF) { + uchar = *from; + } + else if (*from >= 0xD800 && *from <= 0xDBFF) { + uchar = *from; + schar = *(from+1); + if (uchar < 0xDC00 || uchar > 0xDFFF) { + //error, do nothing + continue; + } + uchar &= 0x03ff; + schar &= 0x03ff; + uchar <<= 10; + uchar |= schar; + uchar += 0x10000; + from++; + } + else { + //error, do nothing + continue; + } + + if (uchar < 0x80) { + *to++ = uchar; + } + else if (uchar < 0x800) { + *to++ = 0xc0 | (uchar >> 6); + *to++ = 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x10000) { + *to++ = 0xe0 | (uchar >> 12); + *to++ = 0x80 | (uchar >> 6) & 0x3f; + *to++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *to++ = 0xF0 | (uchar >> 18); + *to++ = 0x80 | (uchar >> 12) & 0x3F; + *to++ = 0x80 | (uchar >> 6) & 0x3F; + *to++ = 0x80 | uchar & 0x3F; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + + + + diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp new file mode 100644 index 0000000..5121f48 --- /dev/null +++ b/src/modules/filters/utf8arshaping.cpp @@ -0,0 +1,48 @@ +/****************************************************************************** +* +* utf8arshaping - SWFilter decendant to perform Arabic shaping on +* UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8arshaping.h> + +UTF8arShaping::UTF8arShaping() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8arShaping::~UTF8arShaping() { + ucnv_close(conv); +} + +char UTF8arShaping::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; + ustr2 = new UChar[len]; + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + + len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp new file mode 100644 index 0000000..8fa7280 --- /dev/null +++ b/src/modules/filters/utf8bidireorder.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** +* +* utf8cnormalizer - SWFilter decendant to perform reordering of UTF-8 +* text to visual order according to Unicode BiDi +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8bidireorder.h> + +UTF8BiDiReorder::UTF8BiDiReorder() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8BiDiReorder::~UTF8BiDiReorder() { + ucnv_close(conv); +} + +char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + ustr2 = new UChar[len]; + + UBiDi* bidi = ubidi_openSized(len + 1, 0, &err); + ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err); + len = ubidi_writeReordered(bidi, ustr2, len, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + ubidi_close(bidi); + +// len = ubidi_writeReverse(ustr, len, ustr2, len, +// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp new file mode 100644 index 0000000..84cb513 --- /dev/null +++ b/src/modules/filters/utf8cantillation.cpp @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * UTF8Cantillation - SWFilter decendant to remove UTF-8 Hebrew cantillation + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8cantillation.h> + + +const char UTF8Cantillation::on[] = "On"; +const char UTF8Cantillation::off[] = "Off"; +const char UTF8Cantillation::optName[] = "Hebrew Cantillation"; +const char UTF8Cantillation::optTip[] = "Toggles Hebrew Cantillation Marks"; + +UTF8Cantillation::UTF8Cantillation() { + option = false; + options.push_back(on); + options.push_back(off); +} + +UTF8Cantillation::~UTF8Cantillation(){}; + +void UTF8Cantillation::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8Cantillation::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8Cantillation::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if (*from != 0xD6) { + if (*from == 0xD7 && *(from + 1) == 0x84) { + from++; + } + else { + *to++ = *from; + } + } + else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) { + *to++ = *from; + from++; + *to++ = *from; + } + else { + from++; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp new file mode 100644 index 0000000..b0e5dc8 --- /dev/null +++ b/src/modules/filters/utf8greekaccents.cpp @@ -0,0 +1,252 @@ +/****************************************************************************** + * + * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8greekaccents.h> + + +const char UTF8GreekAccents::on[] = "On"; +const char UTF8GreekAccents::off[] = "Off"; +const char UTF8GreekAccents::optName[] = "Greek Accents"; +const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents"; + +UTF8GreekAccents::UTF8GreekAccents() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8GreekAccents::~UTF8GreekAccents(){}; + +void UTF8GreekAccents::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8GreekAccents::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + for (from = (unsigned char*)text; *from; from++) { + //first just remove combining characters + if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) + from += 2; + else if (*from == 0xCC && *(from + 1)) { + if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) + from++; + } + else if (*from == 0xCD && *(from + 1) == 0xBA) + from++; + //now converted pre-composed characters to their alphabetic bases, discarding the accents + + //Greek + //capital alpha + else if ((*from == 0xCE && *(from + 1) == 0x86)) { + *to++ = 0xCE; + *to++ = 0x91; + from++; + } + //capital epsilon + else if ((*from == 0xCE && *(from + 1) == 0x88)) { + *to++ = 0xCE; + *to++ = 0x95; + from++; + } + //capital eta + else if ((*from == 0xCE && *(from + 1) == 0x89)) { + *to++ = 0xCE; + *to++ = 0x97; + from++; + } + //capital iota + else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { + *to++ = 0xCE; + *to++ = 0x99; + from++; + } + //capital omicron + else if ((*from == 0xCE && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0x9F; + from++; + } + //capital upsilon + else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from++; + } + //capital omega + else if ((*from == 0xCE && *(from + 1) == 0x8F)) { + *to++ = 0xCE; + *to++ = 0xA9; + from++; + } + + //alpha + else if ((*from == 0xCE && *(from + 1) == 0xAC)) { + *to++ = 0xCE; + *to++ = 0xB1; + from++; + } + //epsilon + else if ((*from == 0xCE && *(from + 1) == 0xAD)) { + *to++ = 0xCE; + *to++ = 0xB5; + from++; + } + //eta + else if ((*from == 0xCE && *(from + 1) == 0xAE)) { + *to++ = 0xCE; + *to++ = 0xB7; + from++; + } + //iota + else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { + *to++ = 0xCE; + *to++ = 0xB9; + from++; + } + //omicron + else if ((*from == 0xCF && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0xBF; + from++; + } + //upsilon + else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { + *to++ = 0xCF; + *to++ = 0x85; + from++; + } + //omega + else if ((*from == 0xCF && *(from + 1) == 0x8E)) { + *to++ = 0xCF; + *to++ = 0x89; + from++; + } + + //Extended Greek + //capital alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) { + *to++ = 0xCE; + *to++ = 0x91; + from+=2; + } + //capital epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { + *to++ = 0xCE; + *to++ = 0x95; + from+=2; + } + //capital eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { + *to++ = 0xCE; + *to++ = 0x97; + from+=2; + } + //capital iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { + *to++ = 0xCE; + *to++ = 0x99; + from+=2; + } + //capital omicron + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) { + *to++ = 0xCE; + *to++ = 0x9F; + from+=2; + } + //capital upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from+=2; + } + //capital omega + else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { + *to++ = 0xCE; + *to++ = 0xA9; + from+=2; + } + //capital rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { + *to++ = 0xCE; + *to++ = 0xA1; + from+=2; + } + + //alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) { + *to++ = 0xCE; + *to++ = 0xB1; + from+=2; + } + //epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { + *to++ = 0xCE; + *to++ = 0xB5; + from+=2; + } + //eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { + *to++ = 0xCE; + *to++ = 0xB7; + from+=2; + } + //iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { + *to++ = 0xCE; + *to++ = 0xB9; + from+=2; + } + //omicron + else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { + *to++ = 0xCE; + *to++ = 0xBF; + from+=2; + } + //upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { + *to++ = 0xCF; + *to++ = 0x85; + from+=2; + } + //omega + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { + *to++ = 0xCF; + *to++ = 0x89; + from+=2; + } + //rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { + *to++ = 0xCF; + *to++ = 0x81; + from+=2; + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + } + return 0; +} + + + + + + diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp new file mode 100644 index 0000000..e5b50e1 --- /dev/null +++ b/src/modules/filters/utf8hebrewpoints.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** + * + * UTF8HebrewPoints - SWFilter decendant to remove UTF-8 Hebrew vowel points + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8hebrewpoints.h> + + +const char UTF8HebrewPoints::on[] = "On"; +const char UTF8HebrewPoints::off[] = "Off"; +const char UTF8HebrewPoints::optName[] = "Hebrew Vowel Points"; +const char UTF8HebrewPoints::optTip[] = "Toggles Hebrew Vowel Points"; + +UTF8HebrewPoints::UTF8HebrewPoints() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8HebrewPoints::~UTF8HebrewPoints(){}; + +void UTF8HebrewPoints::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8HebrewPoints::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8HebrewPoints::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) { + from++; + } + else { + *to++ = *from; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp new file mode 100644 index 0000000..7487815 --- /dev/null +++ b/src/modules/filters/utf8html.cpp @@ -0,0 +1,66 @@ +/****************************************************************************** + * + * utf8html - SWFilter decendant to convert a UTF-8 stream to HTML escapes + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8html.h> + +UTF8HTML::UTF8HTML() { +} + + +char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + char digit[10]; + unsigned long ch; + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + // ------------------------------- + for (to = (unsigned char*)text; *from; from++) { + ch = 0; + if ((*from & 128) != 128) { +// if (*from != ' ') + *to++ = *from; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '&'; + *to++ = '#'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = ';'; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp new file mode 100644 index 0000000..6cc1acd --- /dev/null +++ b/src/modules/filters/utf8latin1.cpp @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8latin1.h> + +UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) { +} + + +char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0xff) { + *to++ = (unsigned char)uchar; + } + else { + *to++ = replacementChar; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp new file mode 100644 index 0000000..df9e090 --- /dev/null +++ b/src/modules/filters/utf8nfc.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfc - SWFilter decendant to perform NFC (canonical composition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8nfc.h> + +UTF8NFC::UTF8NFC() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFC::~UTF8NFC() { + ucnv_close(conv); +} + +char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //canonical composition + unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp new file mode 100644 index 0000000..450cbbf --- /dev/null +++ b/src/modules/filters/utf8nfkd.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfkd - SWFilter decendant to perform NFKD (compatability decomposition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8nfkd.h> + +UTF8NFKD::UTF8NFKD() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFKD::~UTF8NFKD() { + ucnv_close(conv); +} + +char UTF8NFKD::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //compatability decomposition + unorm_normalize(source, len, UNORM_NFKD, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp new file mode 100644 index 0000000..3686717 --- /dev/null +++ b/src/modules/filters/utf8transliterator.cpp @@ -0,0 +1,718 @@ +/****************************************************************************** +* +* utf8transliterators - SWFilter decendant to transliterate between +* ICU-supported scripts. +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8transliterator.h> + +#ifndef _ICUSWORD_ +#include "unicode/resbund.h" +#endif +#include <swlog.h> + +#ifdef _ICU_ +class UnicodeCaster { + const UnicodeString &ustr; +public: + UnicodeCaster(const UnicodeString &ustr):ustr(ustr) {}; operator const char *() { return ""; }; +}; + +#endif +const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { + "Off", + "Latin", + "Basic Latin", + "Beta", + "BGreek", +/* + "Greek", + "Hebrew", + "Cyrillic", + "Arabic", + "Syriac", + "Katakana", + "Hiragana", + "Jamo", + "Hangul", + "Devanagari", + "Tamil", + "Bengali", + "Gurmukhi", + "Gujarati", + "Oriya", + "Telugu", + "Kannada", + "Malayalam", + "Thai", + "Georgian", + "Armenian", + "Ethiopic", + "Gothic", + "Ugaritic", + "Coptic" + */ +}; + +const char UTF8Transliterator::optName[] = "Transliteration"; +const char UTF8Transliterator::optTip[] = "Transliterates between scripts"; + +SWTransMap UTF8Transliterator::transMap; + +#ifndef _ICUSWORD_ + +const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; +const char UTF8Transliterator::SW_RB_RULE[] = "Rule"; +#ifdef SWICU_DATA +const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA; +#else +const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/"; +#endif + +class SWCharString { + public: + inline SWCharString(const UnicodeString& str); + inline ~SWCharString(); + inline operator const char*() { return ptr; } + private: + char buf[128]; + char* ptr; +}; +SWCharString::SWCharString(const UnicodeString& str) { + // TODO This isn't quite right -- we should probably do + // preflighting here to determine the real length. + if (str.length() >= (int32_t)sizeof(buf)) { + ptr = new char[str.length() + 8]; + } else { + ptr = buf; + } + str.extract(0, 0x7FFFFFFF, ptr, ""); +} + +SWCharString::~SWCharString() { + if (ptr != buf) { + delete[] ptr; + } +} + +#endif // _ICUSWORD_ + + +UTF8Transliterator::UTF8Transliterator() { + option = 0; + unsigned long i; + for (i = 0; i < NUMTARGETSCRIPTS; i++) { + options.push_back(optionstring[i]); + } +#ifndef _ICUSWORD_ + utf8status = U_ZERO_ERROR; + Load(utf8status); +#endif +} + +void UTF8Transliterator::Load(UErrorCode &status) +{ +#ifndef _ICUSWORD_ + static const char translit_swordindex[] = "translit_swordindex"; + + UResourceBundle *bundle, *transIDs, *colBund; + bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("no resource index to load"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + return; + } + + transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status); + UParseError parseError; + + int32_t row, maxRows; + if (U_SUCCESS(status)) { + maxRows = ures_getSize(transIDs); + for (row = 0; row < maxRows; row++) { + colBund = ures_getByIndex(transIDs, row, 0, &status); + + if (U_SUCCESS(status) && ures_getSize(colBund) == 4) { + UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status); + UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0); + UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status); + SWLog::systemlog->LogInformation("ok so far"); + + if (U_SUCCESS(status)) { + switch (type) { + case 0x66: // 'f' + case 0x69: // 'i' + // 'file' or 'internal'; + // row[2]=resource, row[3]=direction + { + UBool visible = (type == 0x0066 /*f*/); + UTransDirection dir = + (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) == + 0x0046 /*F*/) ? + UTRANS_FORWARD : UTRANS_REVERSE; + //registry->put(id, resString, dir, visible); + SWLog::systemlog->LogInformation("instantiating %s ...", (const char *)(UnicodeCaster)resString); + registerTrans(id, resString, dir, status); + SWLog::systemlog->LogInformation("done."); + } + break; + case 0x61: // 'a' + // 'alias'; row[2]=createInstance argument + //registry->put(id, resString, TRUE); + break; + } + } + else SWLog::systemlog->LogError("Failed to get resString"); + } + else SWLog::systemlog->LogError("Failed to get row"); + + ures_close(colBund); + } + } + else + { + SWLog::systemlog->LogError("no resource index to load"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + } + + ures_close(transIDs); + ures_close(bundle); + +#endif // _ICUSWORD_ +} + +void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource, + UTransDirection dir, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + SWLog::systemlog->LogInformation("registering ID locally %s", (const char *)(UnicodeCaster)ID); + SWTransData swstuff; + swstuff.resource = resource; + swstuff.dir = dir; + SWTransPair swpair; + swpair.first = ID; + swpair.second = swstuff; + transMap.insert(swpair); +#endif +} + +bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status); + if (!U_FAILURE(status)) + { + // already have it, clean up and return true + SWLog::systemlog->LogInformation("already have it %s", (const char *)(UnicodeCaster)ID); + delete trans; + return true; + } + status = U_ZERO_ERROR; + + SWTransMap::iterator swelement; + if ((swelement = transMap.find(ID)) != transMap.end()) + { + SWLog::systemlog->LogInformation("found element in map"); + SWTransData swstuff = (*swelement).second; + UParseError parseError; + //UErrorCode status; + //std::cout << "unregistering " << ID << std::endl; + //Transliterator::unregister(ID); + SWLog::systemlog->LogInformation("resource is %s", (const char *)(UnicodeCaster)swstuff.resource); + + // Get the rules + //std::cout << "importing: " << ID << ", " << resource << std::endl; + SWCharString ch(swstuff.resource); + UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status); + const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status); + ures_close(bundle); + //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD, + // parseError, status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("Failed to get rules"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + return false; + } + + + Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir, + parseError,status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("Failed to create transliterator"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + SWLog::systemlog->LogError("Parse error: line %s", parseError.line); + SWLog::systemlog->LogError("Parse error: offset %d", parseError.offset); + SWLog::systemlog->LogError("Parse error: preContext %s", *parseError.preContext); + SWLog::systemlog->LogError("Parse error: postContext %s", *parseError.postContext); + SWLog::systemlog->LogError("rules were"); +// SWLog::systemlog->LogError((const char *)rules); + return false; + } + + Transliterator::registerInstance(trans); + return true; + + //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status); + //return trans; + } + else + { + return false; + } +#else +return true; +#endif // _ICUSWORD_ +} + +Transliterator * UTF8Transliterator::createTrans(const UnicodeString& preID, const UnicodeString& ID, + const UnicodeString& postID, UTransDirection dir, UErrorCode &status ) +{ + // extract id to check from ID xxx;id;xxx + if (checkTrans(ID, status)) { + UnicodeString fullID = preID; + fullID += ID; + fullID += postID; + Transliterator *trans = Transliterator::createInstance(fullID,UTRANS_FORWARD,status); + if (U_FAILURE(status)) { + delete trans; + return NULL; + } + else { + return trans; + } + } + else { + return NULL; + } +} + +void UTF8Transliterator::setOptionValue(const char *ival) +{ + unsigned char i = option = NUMTARGETSCRIPTS; + while (i && stricmp(ival, optionstring[i])) { + i--; + option = i; + } +} + +const char *UTF8Transliterator::getOptionValue() +{ + return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; +} + +char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option) { // if we want transliteration + unsigned long i, j; + UErrorCode err = U_ZERO_ERROR; + UConverter * conv = NULL; + conv = ucnv_open("UTF-8", &err); + + bool compat = false; + bool noNFC = false; + + if (option == SE_JAMO) { + noNFC = true; + } + + // Convert UTF-8 string to UTF-16 (UChars) + j = strlen(text); + int32_t len = (j * 2) + 1; + UChar *source = new UChar[len]; + err = U_ZERO_ERROR; + len = ucnv_toUChars(conv, source, len, text, j, &err); + source[len] = 0; + + // Figure out which scripts are used in the string + unsigned char scripts[NUMSCRIPTS]; + + for (i = 0; i < NUMSCRIPTS; i++) { + scripts[i] = false; + } + + for (i = 0; i < len; i++) { + j = ublock_getCode(source[i]); + switch (j) { + case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; + case UBLOCK_GREEK: scripts[SE_GREEK] = true; break; + case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break; + case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break; + case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break; + case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break; + case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break; + case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break; + case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break; + case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break; + case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break; + case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break; + case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break; + case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break; + case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break; + case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break; + case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break; + case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break; + case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break; + case UBLOCK_THAI: scripts[SE_THAI] = true; break; + case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break; + case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break; + case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break; + case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break; + // needs Unicode 3.2? or 4.0? support from ICU + //case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; + case UBLOCK_CJK_RADICALS_SUPPLEMENT: + case UBLOCK_KANGXI_RADICALS: + case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: + case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: + scripts[SE_HAN] = true; + break; + case UBLOCK_CJK_COMPATIBILITY: + case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: + case UBLOCK_CJK_COMPATIBILITY_FORMS: + scripts[SE_HAN] = true; + compat = true; + break; + case UBLOCK_HANGUL_COMPATIBILITY_JAMO: + scripts[SE_HANGUL] = true; + compat = true; + break; + + default: scripts[SE_LATIN] = true; + } + } + scripts[option] = false; //turn off the reflexive transliteration + + //return if we have no transliteration to do for this text + j = 0; + for (i = 0; !j && i < NUMSCRIPTS; i++) { + if (scripts[i]) j++; + } + if (!j) { + ucnv_close(conv); + return 0; + } + + UnicodeString preid; + if (compat) { + preid = UnicodeString("NFKD;"); + } + else { + preid = UnicodeString("NFD;"); + } + + //Simple X to Latin transliterators + UnicodeString id; + if (scripts[SE_GREEK]) { + if (option == SE_BETA) + id = UnicodeString("Greek-Beta"); + else if (option == SE_BGREEK) + id = UnicodeString("Greek-BGreek"); + else { + if (!strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { + id = UnicodeString("Coptic-Latin"); + } + else { + id = UnicodeString("Greek-Latin"); + } + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_HEBREW]) { + if (option == SE_BETA) + id = UnicodeString("Hebrew-CCAT"); + else if (option == SE_SYRIAC) + id = UnicodeString("Hebrew-Syriac"); + else { + id = UnicodeString("Hebrew-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_CYRILLIC]) { + id = UnicodeString("Cyrillic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARABIC]) { + id = UnicodeString("Arabic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_SYRIAC]) { + if (option == SE_BETA) + id = UnicodeString("Syriac-CCAT"); + else if (option == SE_HEBREW) + id = UnicodeString("Syriac-Hebrew"); + else { + id = UnicodeString("Syriac-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_THAI]) { + id = UnicodeString("Thai-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GEORGIAN]) { + id = UnicodeString("Georgian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARMENIAN]) { + id = UnicodeString("Armenian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ETHIOPIC]) { + id = UnicodeString("Ethiopic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GOTHIC]) { + id = UnicodeString("Gothic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_UGARITIC]) { + id = UnicodeString("Ugaritic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HAN]) { + if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { + id = UnicodeString("Kanji-OnRomaji"); + } + else { + id = UnicodeString("Han-Pinyin"); + } + scripts[SE_LATIN] = true; + } + + // Inter-Kana and Kana to Latin transliterators + if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { + id = UnicodeString("Katakana-Hiragana"); + scripts[SE_HIRAGANA] = true; + } + else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { + id = UnicodeString("Hiragana-Katakana"); + scripts[SE_KATAKANA] = true; + } + else { + if (scripts[SE_KATAKANA]) { + id = UnicodeString("Katakana-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HIRAGANA]) { + id = UnicodeString("Hiragana-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Inter-Korean and Korean to Latin transliterators + if (option == SE_HANGUL && scripts[SE_JAMO]) { + noNFC = false; + scripts[SE_HANGUL] = true; + } + else if (option == SE_JAMO && scripts[SE_HANGUL]) { + noNFC = true; + scripts[SE_JAMO] = true; + } + else { + if (scripts[SE_HANGUL]) { + id = UnicodeString("Hangul-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_JAMO]) { + id = UnicodeString("Jamo-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Indic-Latin + if (option < SE_DEVANAGARI || option > SE_MALAYALAM) { + // Indic to Latin + if (scripts[SE_TAMIL]) { + id = UnicodeString("Tamil-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_BENGALI]) { + id = UnicodeString("Bengali-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GURMUKHI]) { + id = UnicodeString("Gurmukhi-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GUJARATI]) { + id = UnicodeString("Gujarati-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ORIYA]) { + id = UnicodeString("Oriya-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_TELUGU]) { + id = UnicodeString("Telugu-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_KANNADA]) { + id = UnicodeString("Kannada-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_MALAYALAM]) { + id = UnicodeString("Malayalam-Latin"); + scripts[SE_LATIN] = true; + } + } + else { + if (scripts[SE_LATIN]) { + id = UnicodeString("Latin-InterIndic"); + } + if (scripts[SE_DEVANAGARI]) { + id = UnicodeString("Devanagari-InterIndic"); + } + if (scripts[SE_TAMIL]) { + id = UnicodeString("Tamil-InterIndic"); + } + if (scripts[SE_BENGALI]) { + id = UnicodeString("Bengali-InterIndic"); + } + if (scripts[SE_GURMUKHI]) { + id = UnicodeString("Gurmurkhi-InterIndic"); + } + if (scripts[SE_GUJARATI]) { + id = UnicodeString("Gujarati-InterIndic"); + } + if (scripts[SE_ORIYA]) { + id = UnicodeString("Oriya-InterIndic"); + } + if (scripts[SE_TELUGU]) { + id = UnicodeString("Telugu-InterIndic"); + } + if (scripts[SE_KANNADA]) { + id = UnicodeString("Kannada-InterIndic"); + } + if (scripts[SE_MALAYALAM]) { + id = UnicodeString("Malayalam-InterIndic"); + } + + switch(option) { + case SE_DEVANAGARI: + id = UnicodeString("InterIndic-Devanagari"); + break; + case SE_TAMIL: + id = UnicodeString("InterIndic-Tamil"); + break; + case SE_BENGALI: + id = UnicodeString("InterIndic-Bengali"); + break; + case SE_GURMUKHI: + id = UnicodeString("InterIndic-Gurmukhi"); + break; + case SE_GUJARATI: + id = UnicodeString("InterIndic-Gujarati"); + break; + case SE_ORIYA: + id = UnicodeString("InterIndic-Oriya"); + break; + case SE_TELUGU: + id = UnicodeString("InterIndic-Telugu"); + break; + case SE_KANNADA: + id = UnicodeString("InterIndic-Kannada"); + break; + case SE_MALAYALAM: + id = UnicodeString("InterIndic-Malayalam"); + break; + default: + id = UnicodeString("InterIndic-Latin"); + scripts[SE_LATIN] = true; + break; + } + } + + if (scripts[SE_LATIN]) { + switch (option) { + case SE_GREEK: + id = UnicodeString("Latin-Greek"); + break; + case SE_HEBREW: + id = UnicodeString("Latin-Hebrew"); + break; + case SE_CYRILLIC: + id = UnicodeString("Latin-Cyrillic"); + break; + case SE_ARABIC: + id = UnicodeString("Latin-Arabic"); + break; + case SE_SYRIAC: + id = UnicodeString("Latin-Syriac"); + break; + case SE_THAI: + id = UnicodeString("Latin-Thai"); + break; + case SE_GEORGIAN: + id = UnicodeString("Latin-Georgian"); + break; + case SE_ARMENIAN: + id = UnicodeString("Latin-Armenian"); + break; + case SE_ETHIOPIC: + id = UnicodeString("Latin-Ethiopic"); + break; + case SE_GOTHIC: + id = UnicodeString("Latin-Gothic"); + break; + case SE_UGARITIC: + id = UnicodeString("Latin-Ugaritic"); + break; + case SE_COPTIC: + id = UnicodeString("Latin-Coptic"); + break; + case SE_KATAKANA: + id = UnicodeString("Latin-Katakana"); + break; + case SE_HIRAGANA: + id = UnicodeString("Latin-Hiragana"); + break; + case SE_JAMO: + id = UnicodeString("Latin-Jamo"); + break; + case SE_HANGUL: + id = UnicodeString("Latin-Hangul"); + break; + } + } + + if (option == SE_BASICLATIN) { + id = UnicodeString("Any-Latin1"); + } + UnicodeString postid; + if (noNFC) { + postid = UnicodeString(";NFD"); + } else { + postid = UnicodeString(";NFC"); + } + + //UParseError perr; + + err = U_ZERO_ERROR; + //Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err); + Transliterator * trans = createTrans(preid, id, postid, UTRANS_FORWARD, err); + if (trans && !U_FAILURE(err)) { + UnicodeString target = UnicodeString(source); + trans->transliterate(target); + len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err); + if (len < maxlen) *(text + len) = 0; + else *(text + maxlen) = 0; + delete trans; + } + ucnv_close(conv); + } + return 0; +} +#endif + + + diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp new file mode 100644 index 0000000..9aea6fe --- /dev/null +++ b/src/modules/filters/utf8utf16.cpp @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * UTF8UTF16 - SWFilter decendant to convert UTF-8 to UTF-16 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8utf16.h> + +UTF8UTF16::UTF8UTF16() { +} + + +char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + unsigned short schar; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0x1ffff) { + *to++ = (unsigned short)uchar; + } + else { + uchar -= 0x10000; + schar = 0xD800 | (uchar & 0x03ff); + uchar >>= 10; + uchar |= 0xDC00; + *to++ = (unsigned short)schar; + *to++ = (unsigned short)uchar; + } + } + *to = (unsigned short)0; + + return 0; +} + diff --git a/src/modules/genbook/rawgenbook/rawgenbook.cpp b/src/modules/genbook/rawgenbook/rawgenbook.cpp new file mode 100644 index 0000000..e22532a --- /dev/null +++ b/src/modules/genbook/rawgenbook/rawgenbook.cpp @@ -0,0 +1,216 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawGenBook'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawgenbook.h> +#include <rawstr.h> + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawGenBook Constructor - Initializes data for instance of RawGenBook + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawGenBook::RawGenBook(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWGenBook(iname, idesc, idisp, enc, dir, mark, ilang) { + int fileMode = O_RDWR; + char *buf = new char [ strlen (ipath) + 20 ]; + + entryBuf = 0; + path = 0; + stdstr(&path, ipath); + + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + delete key; + key = CreateKey(); + + + sprintf(buf, "%s.bdt", path); + bdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + +} + + +/****************************************************************************** + * RawGenBook Destructor - Cleans up instance of RawGenBook + */ + +RawGenBook::~RawGenBook() { + + FileMgr::systemFileMgr.close(bdtfd); + + if (path) + delete [] path; + + if (entryBuf) + delete [] entryBuf; +} + + +/****************************************************************************** + * RawGenBook::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawGenBook::getRawEntry() { + + __u32 offset = 0; + __u32 size = 0; + + TreeKeyIdx *key = 0; + try { + key = SWDYNAMIC_CAST(TreeKeyIdx, (this->key)); + } + catch ( ... ) {} + + if (!key) { + key = (TreeKeyIdx *)CreateKey(); + (*key) = *(this->key); + } + + if (entryBuf) + delete [] entryBuf; + + int dsize; + key->getUserData(&dsize); + if (dsize > 7) { + memcpy(&offset, key->getUserData(), 4); + offset = swordtoarch32(offset); + + memcpy(&size, key->getUserData() + 4, 4); + size = swordtoarch32(size); + + entrySize = size; // support getEntrySize call + + entryBuf = new char [ (size + 2) * FILTERPAD ]; + *entryBuf = 0; + lseek(bdtfd->getFd(), offset, SEEK_SET); + read(bdtfd->getFd(), entryBuf, size); + + rawFilter(entryBuf, size, key); + + if (!isUnicode()) + RawStr::preptext(entryBuf); + } + else { + entryBuf = new char [2]; + entryBuf[0] = 0; + entryBuf[1] = 0; + entrySize = 0; + } + + if (key != this->key) // free our key if we created a VerseKey + delete key; + + return entryBuf; +} + + +void RawGenBook::setEntry(const char *inbuf, long len) { + + __u32 offset = archtosword32(lseek(bdtfd->getFd(), 0, SEEK_END)); + __u32 size = 0; + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + + char userData[8]; + + if (!len) + len = strlen(inbuf); + + write(bdtfd->getFd(), inbuf, len); + + size = archtosword32(len); + memcpy(userData, &offset, 4); + memcpy(userData+4, &size, 4); + key->setUserData(userData, 8); + key->save(); +} + + +void RawGenBook::linkEntry(const SWKey *inkey) { + TreeKeyIdx *srckey = 0; + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(TreeKeyIdx, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) { + srckey = (TreeKeyIdx *)CreateKey(); + (*srckey) = *inkey; + } + + key->setUserData(srckey->getUserData(), 8); + key->save(); + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawGenBook::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawGenBook::deleteEntry() { + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + key->remove(); +} + + +char RawGenBook::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd; + signed char retval; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.bdt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + retval = TreeKeyIdx::create(path); + delete [] path; + return retval; +} + + +SWKey *RawGenBook::CreateKey() { + TreeKeyIdx *newKey = new TreeKeyIdx(path); + return newKey; +} diff --git a/src/modules/genbook/swgenbook.cpp b/src/modules/genbook/swgenbook.cpp new file mode 100644 index 0000000..589b0b9 --- /dev/null +++ b/src/modules/genbook/swgenbook.cpp @@ -0,0 +1,27 @@ +/****************************************************************************** + * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all + * types of Lexicon and Dictionary modules (hence the 'LD'). + */ + +#include <swgenbook.h> + + +/****************************************************************************** + * SWLD Constructor - Initializes data for instance of SWLD + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWGenBook::SWGenBook(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, "Generic Books", enc, dir, mark, ilang) { +} + + +/****************************************************************************** + * SWLD Destructor - Cleans up instance of SWLD + */ + +SWGenBook::~SWGenBook() { +} + diff --git a/src/modules/lexdict/rawld/rawld.cpp b/src/modules/lexdict/rawld/rawld.cpp new file mode 100644 index 0000000..058679a --- /dev/null +++ b/src/modules/lexdict/rawld/rawld.cpp @@ -0,0 +1,173 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawstr.h> +#include <rawld.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawLD::RawLD(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +RawLD::~RawLD() +{ +} + + +/****************************************************************************** + * RawLD::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void RawLD::strongsPad(char *buf) +{ + const char *check; + int size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * RawLD::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char RawLD::getEntry(long away) +{ + long start = 0; + unsigned short size = 0; + char *idxbuf = 0; + char retval = 0; + + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + if (!(retval = findoffset(buf, &start, &size, away))) { + readtext(start, &size, &idxbuf, &entrybuf); + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + delete [] idxbuf; + } + else { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ 5 ]; + *entrybuf = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * RawLD::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *RawLD::getRawEntry() { + + char ret = getEntry(); + if (!ret) { + if (!isUnicode()) + preptext(entrybuf); + } + else error = ret; + + return entrybuf; +} + + +/****************************************************************************** + * RawLD::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawLD::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void RawLD::setEntry(const char *inbuf, long len) { + settext(*key, inbuf, len); +} + + +void RawLD::linkEntry(const SWKey *inkey) { + linkentry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawLD::deleteEntry() { + settext(*key, ""); +} diff --git a/src/modules/lexdict/rawld4/rawld4.cpp b/src/modules/lexdict/rawld4/rawld4.cpp new file mode 100644 index 0000000..1bdf22f --- /dev/null +++ b/src/modules/lexdict/rawld4/rawld4.cpp @@ -0,0 +1,172 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawstr4.h> +#include <rawld4.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawLD4::RawLD4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr4(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +RawLD4::~RawLD4() +{ +} + + +/****************************************************************************** + * RawLD4::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void RawLD4::strongsPad(char *buf) +{ + const char *check; + long size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * RawLD4::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char RawLD4::getEntry(long away) +{ + long start = 0; + unsigned long size = 0; + char *idxbuf = 0; + char retval = 0; + + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findoffset(buf, &start, &size, away))) { + readtext(start, &size, &idxbuf, &entrybuf); + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + delete [] idxbuf; + } + else { + entrybuf = new char [ 5 ]; + *entrybuf = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * RawLD4::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *RawLD4::getRawEntry() { + + char ret = getEntry(); + if (!ret) { + if (!isUnicode()) + preptext(entrybuf); + } + else error = ret; + + return entrybuf; +} + + +/****************************************************************************** + * RawLD4::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawLD4::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void RawLD4::setEntry(const char *inbuf, long len) { + setText(*key, inbuf, len); +} + + +void RawLD4::linkEntry(const SWKey *inkey) { + linkentry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawLD4::deleteEntry() { + setText(*key, ""); +} diff --git a/src/modules/lexdict/swld.cpp b/src/modules/lexdict/swld.cpp new file mode 100644 index 0000000..d28a5b8 --- /dev/null +++ b/src/modules/lexdict/swld.cpp @@ -0,0 +1,76 @@ +/****************************************************************************** + * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all + * types of Lexicon and Dictionary modules (hence the 'LD'). + */ + +#include <swld.h> + + +/****************************************************************************** + * SWLD Constructor - Initializes data for instance of SWLD + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWLD::SWLD(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, "Lexicons / Dictionaries", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); + entkeytxt = new char [1]; + *entkeytxt = 0; +} + + +/****************************************************************************** + * SWLD Destructor - Cleans up instance of SWLD + */ + +SWLD::~SWLD() +{ + if (entkeytxt) + delete [] entkeytxt; +} + + +/****************************************************************************** + * SWLD::KeyText - Sets/gets module KeyText, getting from saved text if key is + * persistent + * + * ENT: ikeytext - value which to set keytext + * [0] - only get + * + * RET: pointer to keytext + */ + +const char *SWLD::KeyText(const char *ikeytext) +{ + if (key->Persist() && !ikeytext) { + getRawEntry(); // force module key to snap to entry + return entkeytxt; + } + else return SWModule::KeyText(ikeytext); +} + + +/****************************************************************************** + * SWLD::setPosition(SW_POSITION) - Positions this key if applicable + */ + +void SWLD::setPosition(SW_POSITION p) { + if (!key->Traversable()) { + switch (p) { + case POS_TOP: + *key = ""; + break; + case POS_BOTTOM: + *key = "zzzzzzzzz"; + break; + } + } + else *key = p; + getRawEntry(); +} + + diff --git a/src/modules/lexdict/zld/zld.cpp b/src/modules/lexdict/zld/zld.cpp new file mode 100644 index 0000000..047effa --- /dev/null +++ b/src/modules/lexdict/zld/zld.cpp @@ -0,0 +1,172 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <zstr.h> +#include <zld.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +zLD::zLD(const char *ipath, const char *iname, const char *idesc, long blockCount, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zStr(ipath, -1, blockCount, icomp), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) { + +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +zLD::~zLD() { + +} + + +/****************************************************************************** + * zLD::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void zLD::strongsPad(char *buf) { + const char *check; + long size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * zLD::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char zLD::getEntry(long away) { + char *idxbuf = 0; + char *ebuf = 0; + char retval = 0; + long index; + unsigned long size; + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findKeyIndex(buf, &index, away))) { + getText(index, &idxbuf, &ebuf); + size = strlen(ebuf) + 1; + entrybuf = new char [ size * FILTERPAD ]; + strcpy(entrybuf, ebuf); + + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + free(idxbuf); + free(ebuf); + } + else { + entrybuf = new char [ 5 ]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * zLD::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *zLD::getRawEntry() { + if (!getEntry() && !isUnicode()) { + prepText(entrybuf); + } + + return entrybuf; +} + + +/****************************************************************************** + * zLD::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void zLD::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void zLD::setEntry(const char *inbuf, long len) { + setText(*key, inbuf, len); +} + + +void zLD::linkEntry(const SWKey *inkey) { + zStr::linkEntry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zLD::deleteEntry() { + setText(*key, ""); +} diff --git a/src/modules/swmodule.cpp b/src/modules/swmodule.cpp new file mode 100644 index 0000000..f37df27 --- /dev/null +++ b/src/modules/swmodule.cpp @@ -0,0 +1,661 @@ +/****************************************************************************** + * swmodule.cpp -code for base class 'module'. Module is the basis for all + * types of modules (e.g. texts, commentaries, maps, lexicons, + * etc.) + */ + +#include <string.h> +#include <swmodule.h> +#include <utilfuns.h> +#include <regex.h> // GNU +#include <swfilter.h> +#include <versekey.h> // KLUDGE for Search +#ifndef _MSC_VER +#include <iostream> +#endif + +SWDisplay SWModule::rawdisp; +void SWModule::nullPercent(char percent, void *percentUserData) {} + +/****************************************************************************** + * SWModule Constructor - Initializes data for instance of SWModule + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + * imodtype - Type of Module (All modules will be displayed with + * others of same type under their modtype heading + * unicode - if this module is unicode + */ + +SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char* imodlang) { + key = CreateKey(); + entrybuf = new char [1]; + *entrybuf = 0; + config = &ownConfig; + entrybufallocsize = 0; + modname = 0; + error = 0; + moddesc = 0; + modtype = 0; + modlang = 0; + this->encoding = encoding; + this->direction = direction; + this->markup = markup; + entrySize= -1; + disp = (idisp) ? idisp : &rawdisp; + stdstr(&modname, imodname); + stdstr(&moddesc, imoddesc); + stdstr(&modtype, imodtype); + stdstr(&modlang, imodlang); + stripFilters = new FilterList(); + rawFilters = new FilterList(); + renderFilters = new FilterList(); + optionFilters = new FilterList(); + encodingFilters = new FilterList(); + skipConsecutiveLinks = true; + procEntAttr = true; +} + + +/****************************************************************************** + * SWModule Destructor - Cleans up instance of SWModule + */ + +SWModule::~SWModule() +{ + if (entrybuf) + delete [] entrybuf; + if (modname) + delete [] modname; + if (moddesc) + delete [] moddesc; + if (modtype) + delete [] modtype; + if (modlang) + delete [] modlang; + + if (key) { + if (!key->Persist()) + delete key; + } + + stripFilters->clear(); + rawFilters->clear(); + renderFilters->clear(); + optionFilters->clear(); + encodingFilters->clear(); + entryAttributes.clear(); + + delete stripFilters; + delete rawFilters; + delete renderFilters; + delete optionFilters; + delete encodingFilters; +} + + +/****************************************************************************** + * SWModule::CreateKey - Allocates a key of specific type for module + * + * RET: pointer to allocated key + */ + +SWKey *SWModule::CreateKey() +{ + return new SWKey(); +} + + +/****************************************************************************** + * SWModule::Error - Gets and clears error status + * + * RET: error status + */ + +char SWModule::Error() +{ + char retval = error; + + error = 0; + return retval; +} + + +/****************************************************************************** + * SWModule::Name - Sets/gets module name + * + * ENT: imodname - value which to set modname + * [0] - only get + * + * RET: pointer to modname + */ + +char *SWModule::Name(const char *imodname) +{ + return stdstr(&modname, imodname); +} + + +/****************************************************************************** + * SWModule::Description - Sets/gets module description + * + * ENT: imoddesc - value which to set moddesc + * [0] - only get + * + * RET: pointer to moddesc + */ + +char *SWModule::Description(const char *imoddesc) +{ + return stdstr(&moddesc, imoddesc); +} + + +/****************************************************************************** + * SWModule::Type - Sets/gets module type + * + * ENT: imodtype - value which to set modtype + * [0] - only get + * + * RET: pointer to modtype + */ + +char *SWModule::Type(const char *imodtype) +{ + return stdstr(&modtype, imodtype); +} + +/****************************************************************************** + * SWModule::Direction - Sets/gets module direction + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char direction + */ +char SWModule::Direction(signed char newdir) { + if (newdir != -1) + direction = newdir; + return direction; +} + +/****************************************************************************** + * SWModule::Encoding - Sets/gets module encoding + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char encoding + */ +char SWModule::Encoding(signed char newenc) { + if (newenc != -1) + encoding = newenc; + return encoding; +} + +/****************************************************************************** + * SWModule::Markup - Sets/gets module markup + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char markup + */ +char SWModule::Markup(signed char newmark) { + if (newmark != -1) + markup = newmark; + return markup; +} + + +/****************************************************************************** + * SWModule::Lang - Sets/gets module language + * + * ENT: imodlang - value which to set modlang + * [0] - only get + * + * RET: pointer to modname + */ + +char *SWModule::Lang(const char *imodlang) +{ + return stdstr(&modlang, imodlang); +} + + +/****************************************************************************** + * SWModule::Disp - Sets/gets display driver + * + * ENT: idisp - value which to set disp + * [0] - only get + * + * RET: pointer to disp + */ + +SWDisplay *SWModule::Disp(SWDisplay *idisp) +{ + if (idisp) + disp = idisp; + + return disp; +} + + +/****************************************************************************** + * SWModule::Display - Calls this modules display object and passes itself + * + * RET: error status + */ + +char SWModule::Display() +{ + disp->Display(*this); + return 0; +} + + +/****************************************************************************** + * SWModule::SetKey - Sets a key to this module for position to a particular + * record or set of records + * + * ENT: ikey - key with which to set this module + * + * RET: error status + */ + +char SWModule::SetKey(const SWKey &ikey) { + return SetKey(&ikey); +} + +char SWModule::SetKey(const SWKey *ikey) +{ + SWKey *oldKey = 0; + + if (key) { + if (!key->Persist()) // if we have our own copy + oldKey = key; + } + + if (!ikey->Persist()) { // if we are to keep our own copy + key = CreateKey(); + *key = *ikey; + } + else key = (SWKey *)ikey; // if we are to just point to an external key + + if (oldKey) + delete oldKey; + + return 0; +} + + +/****************************************************************************** + * SWModule::KeyText - Sets/gets module KeyText + * + * ENT: ikeytext - value which to set keytext + * [0] - only get + * + * RET: pointer to keytext + */ + +const char *SWModule::KeyText(const char *ikeytext) +{ + if (ikeytext) + SetKey(ikeytext); + + return *key; +} + + +/****************************************************************************** + * SWModule::setPosition(SW_POSITION) - Positions this modules to an entry + * + * ENT: p - position (e.g. TOP, BOTTOM) + * + * RET: *this + */ + +void SWModule::setPosition(SW_POSITION p) { + *key = p; + char saveError = key->Error(); + + switch (p) { + case POS_TOP: + (*this)++; + (*this)--; + break; + + case POS_BOTTOM: + (*this)--; + (*this)++; + break; + } + + error = saveError; +} + + +/****************************************************************************** + * SWModule::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void SWModule::increment(int steps) { + (*key) += steps; + error = key->Error(); +} + + +/****************************************************************************** + * SWModule::decrement - Decrements module key a number of entries + * + * ENT: decrement - Number of entries to jump backward + * + * RET: *this + */ + +void SWModule::decrement(int steps) { + (*key) -= steps; + error = key->Error(); +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &SWModule::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + SWKey *savekey = 0; + SWKey *searchkey = 0; + regex_t preg; + SWKey textkey; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + const char *sres; + terminateSearch = false; + char perc = 1; + bool savePEA = isProcessEntryAttributes(); + + processEntryAttributes(false); + listkey.ClearList(); + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (scope)?scope->clone():(key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + (*percent)(perc, percentUserData); + // MAJOR KLUDGE: VerseKey::Index still return index within testament. + // VerseKey::NewIndex should be moved to Index and Index should be some + // VerseKey specific name + VerseKey *vkcheck = 0; +#ifndef _WIN32_WCE + try { +#endif + vkcheck = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch (...) {} +#endif + // end MAJOR KLUDGE + + *this = BOTTOM; + // fix below when we find out the bug + long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index(); + if (!highIndex) + highIndex = 1; // avoid division by zero errors. + *this = TOP; + if (searchType >= 0) { + flags |=searchType|REG_NOSUB|REG_EXTENDED; + regcomp(&preg, istr, flags); + } + + (*percent)(++perc, percentUserData); + if (searchType == -2) { + wordBuf = (char *)calloc(sizeof(char), strlen(istr) + 1); + strcpy(wordBuf, istr); + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + } + + perc = 5; + (*percent)(perc, percentUserData); + + while (!Error() && !terminateSearch) { + + + long mindex = 0; + if (vkcheck) + mindex = vkcheck->NewIndex(); + else mindex = key->Index(); + float per = (float)mindex / highIndex; + per *= 93; + per += 5; + char newperc = (char)per; +// char newperc = (char)(5+(93*(((float)((vkcheck)?vkcheck->NewIndex():key->Index()))/highIndex))); + if (newperc > perc) { + perc = newperc; + (*percent)(perc, percentUserData); + } + else if (newperc < perc) { +#ifndef _MSC_VER + std::cerr << "Serious error: new percentage complete is less than previous value\n"; + std::cerr << "using vk? " << ((vkcheck)?"yes":"no") << "\n"; + std::cerr << "index: " << ((vkcheck)?vkcheck->NewIndex():key->Index()) << "\n"; + std::cerr << "highIndex: " << highIndex << "\n"; + std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n"; + std::cerr << "perc == " << (int )perc << "% \n"; +#endif + } + if (searchType >= 0) { + if (!regexec(&preg, StripText(), 0, 0, 0)) { + textkey = KeyText(); + listkey << textkey; + } + } + else { + if (searchType == -1) { + sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(StripText(), istr) : strstr(StripText(), istr); + if (sres) { + textkey = KeyText(); + listkey << textkey; + } + } + if (searchType == -2) { + int i; + const char *stripBuf = StripText(); + for (i = 0; i < wordCount; i++) { + sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(stripBuf, words[i]) : strstr(stripBuf, words[i]); + if (!sres) + break; + } + if (i == wordCount) { + textkey = KeyText(); + listkey << textkey; + } + + } + } + (*this)++; + } + if (searchType >= 0) + regfree(&preg); + + if (searchType == -2) { + free(words); + free(wordBuf); + } + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + listkey = TOP; + processEntryAttributes(savePEA); + (*percent)(100, percentUserData); + + return listkey; +} + + +/****************************************************************************** + * SWModule::StripText() - calls all stripfilters on current text + * + * ENT: buf - buf to massage instead of this modules current text + * len - max len of buf + * + * RET: this module's text at specified key location massaged by Strip filters + */ + +const char *SWModule::StripText(char *buf, int len) +{ + return RenderText(buf, len, false); +} + + +/****************************************************************************** + * SWModule::RenderText - calls all renderfilters on current text + * + * ENT: buf - buffer to Render instead of current module position + * + * RET: listkey set to verses that contain istr + */ + + const char *SWModule::RenderText(char *buf, int len, bool render) { + entryAttributes.clear(); + char *tmpbuf = (buf) ? buf : getRawEntry(); + SWKey *key = 0; + static char *null = ""; + + if (tmpbuf) { + unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) * FILTERPAD : len; + if (size > 0) { + key = (SWKey *)*this; + + optionFilter(tmpbuf, size, key); + + if (render) { + renderFilter(tmpbuf, size, key); + encodingFilter(tmpbuf, size, key); + } + else stripFilter(tmpbuf, size, key); + } + } + else { + tmpbuf = null; + } + + return tmpbuf; +} + + +/****************************************************************************** + * SWModule::RenderText - calls all renderfilters on current text + * + * ENT: tmpKey - key to use to grab text + * + * RET: this module's text at specified key location massaged by RenderFilers + */ + + const char *SWModule::RenderText(SWKey *tmpKey) +{ + SWKey *savekey; + const char *retVal; + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + SetKey(*tmpKey); + + retVal = RenderText(); + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + return retVal; +} + + +/****************************************************************************** + * SWModule::StripText - calls all StripTextFilters on current text + * + * ENT: tmpKey - key to use to grab text + * + * RET: this module's text at specified key location massaged by Strip filters + */ + +const char *SWModule::StripText(SWKey *tmpKey) +{ + SWKey *savekey; + const char *retVal; + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + SetKey(*tmpKey); + + retVal = StripText(); + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + return retVal; +} + + +const char *SWModule::getConfigEntry(const char *key) const { + ConfigEntMap::iterator it = config->find(key); + return (it != config->end()) ? it->second.c_str() : 0; +} + + +void SWModule::setConfig(ConfigEntMap *config) { + this->config = config; +} diff --git a/src/modules/texts/rawgbf/rawgbf.cpp b/src/modules/texts/rawgbf/rawgbf.cpp new file mode 100644 index 0000000..dd2fd47 --- /dev/null +++ b/src/modules/texts/rawgbf/rawgbf.cpp @@ -0,0 +1,84 @@ +/****************************************************************************** + * rawgbf.cpp - code for class 'RawGBF'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawgbf.h> + + +/****************************************************************************** + * RawGBF Constructor - Initializes data for instance of RawGBF + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawGBF::RawGBF(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp) : SWText(iname, idesc, idisp), RawVerse(ipath) +{ +} + + +/****************************************************************************** + * RawGBF Destructor - Cleans up instance of RawGBF + */ + +RawGBF::~RawGBF() +{ +} + + +/****************************************************************************** + * RawGBF::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +RawGBF::operator char*() +{ + long start; + unsigned short size; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ size * 3 ]; // extra for conversion to RTF or other. + + readtext(key->Testament(), start, size + 1, entrybuf); + preptext(entrybuf); + RenderText(entrybuf, size * 3); + + if (key != this->key) + delete key; + + return entrybuf; +} diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp new file mode 100644 index 0000000..c2214f8 --- /dev/null +++ b/src/modules/texts/rawtext/rawtext.cpp @@ -0,0 +1,580 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawText'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawtext.h> + +#include <map> +#include <list> +#include <algorithm> +#include <regex.h> // GNU + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawText Constructor - Initializes data for instance of RawText + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWText(iname, idesc, idisp, enc, dir, mark, ilang), + RawVerse(ipath) { + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + for (int loop = 0; loop < 2; loop++) { + fastSearch[loop] = 0; + string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); + if (!access(fastidxname.c_str(), 04)) { + fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); + if (!access(fastidxname.c_str(), 04)) + fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str()); + } + } +} + + +/****************************************************************************** + * RawText Destructor - Cleans up instance of RawText + */ + +RawText::~RawText() +{ + if (fastSearch[0]) + delete fastSearch[0]; + + if (fastSearch[1]) + delete fastSearch[1]; +} + + +/****************************************************************************** + * RawText::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawText::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) { } + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + readtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +signed char RawText::createSearchFramework() { + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + // dictionary holds words associated with a list + // containing every module position that contains + // the word. [0] Old Testament; [1] NT + map < string, list<long> > dictionary[2]; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + while (!Error()) { + long index = lkey->Index(); + wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1); + strcpy(wordBuf, StripText()); + + // grab each word from the text + word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); + while (word) { + + // make word upper case + toupperstr(word); + + // lookup word in dictionary (or make entry in dictionary + // for this word) and add this module position (index) to + // the word's associated list of module positions + dictionary[lkey->Testament()-1][word].push_back(index); + word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>"); + } + free(wordBuf); + (*this)++; + } + + // reposition module back to where it was before we were called + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + + // --------- Let's output an index from our dictionary ----------- + int datfd; + int idxfd; + map < string, list<long> >::iterator it; + list<long>::iterator it2; + unsigned long offset, entryoff; + unsigned short size; + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + // for old and new testament do... + for (int loop = 0; loop < 2; loop++) { + if ((datfd = open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) + return -1; + if ((idxfd = open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) { + close(datfd); + return -1; + } + + // iterate thru each word in the dictionary + for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) { + printf("%s: ", it->first.c_str()); + + // get our current offset in our word.dat file and write this as the start + // of the next entry in our database + offset = lseek(datfd, 0, SEEK_CUR); + write(idxfd, &offset, 4); + + // write our word out to the word.dat file, delineating with a \n + write(datfd, it->first.c_str(), strlen(it->first.c_str())); + write(datfd, "\n", 1); + + // force our mod position list for this word to be unique (remove + // duplicates that may exist if the word was found more than once + // in the verse + it->second.unique(); + + // iterate thru each mod position for this word and output it to + // our database + unsigned short count = 0; + for (it2 = it->second.begin(); it2 != it->second.end(); it2++) { + entryoff= *it2; + write(datfd, &entryoff, 4); + count++; + } + + // now see what our new position is in our word.dat file and + // determine the size of this database entry + size = lseek(datfd, 0, SEEK_CUR) - offset; + + // store the size of this database entry + write(idxfd, &size, 2); + printf("%d entries (size: %d)\n", count, size); + } + close(datfd); + close(idxfd); + } + return 0; +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + listkey.ClearList(); + + if ((fastSearch[0]) && (fastSearch[1])) { + + switch (searchType) { + case -2: { + + if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to + // ignore case + break; // can't handle fast case sensitive searches + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0; + try { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listkey; + } + + SWKey saveKey = *testKeyType; // save current place + + char error = 0; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + long start; + unsigned short size; + char *idxbuf = 0; + char *datbuf = 0; + list <long> indexes; + list <long> indexes2; + VerseKey vk; + vk = TOP; + + (*percent)(10, percentUserData); + + // toupper our copy of search string + stdstr(&wordBuf, istr); + toupperstr(wordBuf); + + // get list of individual words + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + + (*percent)(20, percentUserData); + + // clear our result set + indexes.erase(indexes.begin(), indexes.end()); + + // search both old and new testament indexes + for (int j = 0; j < 2; j++) { + // iterate thru each word the user passed to us. + for (int i = 0; i < wordCount; i++) { + + // clear this word's result set + indexes2.erase(indexes2.begin(), indexes2.end()); + error = 0; + + // iterate thru every word in the database that starts + // with our search word + for (int away = 0; !error; away++) { + idxbuf = 0; + + // find our word in the database and jump ahead _away_ + error = fastSearch[j]->findoffset(words[i], &start, &size, away); + + // get the word from the database + fastSearch[j]->getidxbufdat(start, &idxbuf); + + // check to see if it starts with our target word + if (strlen(idxbuf) > strlen(words[i])) + idxbuf[strlen(words[i])] = 0; +// else words[i][strlen(idxbuf)] = 0; + if (!strcmp(idxbuf, words[i])) { + + // get data for this word from database + free(idxbuf); + idxbuf = 0; + datbuf = 0; + fastSearch[j]->readtext(start, &size, &idxbuf, &datbuf); + + // we know that the data consists of sizof(long) + // records each a valid module position that constains + // this word + // + // iterate thru each of these module positions + long *keyindex = (long *)datbuf; + while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) { + if (i) { // if we're not on our first word + + // check to see if this word is already in the result set. + // This is our AND functionality + if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end()) + // add to new result set + indexes2.push_back(*keyindex); + } + else indexes2.push_back(*keyindex); + keyindex++; + } + free(datbuf); + } + else error = 1; // no more matches + free(idxbuf); + } + + // make new result set final result set + indexes = indexes2; + + percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData); + } + + // indexes contains our good verses, lets return them in a listkey + indexes.sort(); + + // iterate thru each good module position that meets the search + for (list <long>::iterator it = indexes.begin(); it != indexes.end(); it++) { + + // set a temporary verse key to this module position + vk.Testament(j+1); + vk.Error(); + vk.Index(*it); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listkey << (const char *) vk; + } + else listkey << (const char*) vk; + } + } + (*percent)(98, percentUserData); + + free(words); + free(wordBuf); + + *testKeyType = saveKey; // set current place back to original + + listkey = TOP; + (*percent)(100, percentUserData); + return listkey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listkey; + } + + // if we don't support this search, fall back to base class + return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + + +void RawText::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void RawText::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawText::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawText::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + +/****************************************************************************** + * RawText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} diff --git a/src/modules/texts/swtext.cpp b/src/modules/texts/swtext.cpp new file mode 100644 index 0000000..5f6b424 --- /dev/null +++ b/src/modules/texts/swtext.cpp @@ -0,0 +1,40 @@ +/****************************************************************************** + * swtext.cpp - code for base class 'SWText'- The basis for all text modules + */ + +#include <swtext.h> +#include <listkey.h> + + +/****************************************************************************** + * SWText Constructor - Initializes data for instance of SWText + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWText::SWText(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, "Biblical Texts", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); + skipConsecutiveLinks = false; +} + + +/****************************************************************************** + * SWText Destructor - Cleans up instance of SWText + */ + +SWText::~SWText() { +} + + +/****************************************************************************** + * SWText CreateKey - Create the correct key (VerseKey) for use with SWText + */ + +SWKey *SWText::CreateKey() +{ + return new VerseKey(); +} diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp new file mode 100644 index 0000000..c774693 --- /dev/null +++ b/src/modules/texts/ztext/ztext.cpp @@ -0,0 +1,309 @@ +/****************************************************************************** + * ztext.cpp - code for class 'zText'- a module that reads compressed text + * files: ot and nt using indexs ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +//#include <rawverse.h> +#include <ztext.h> +//#include <zlib.h> + + +/****************************************************************************** + * zText Constructor - Initializes data for instance of zText + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ +{ + blockType = iblockType; + lastWriteKey = 0; +} + + +/****************************************************************************** + * zText Destructor - Cleans up instance of zText + */ + +zText::~zText() +{ + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; +} + + +/****************************************************************************** + * zText::getRawEntry - Returns the current verse buffer + * + * RET: buffer with verse + */ + +char *zText::getRawEntry() +{ +/* + long start; + unsigned long size; + unsigned long destsize; + char *tmpbuf; + char *dest; + VerseKey *lkey = (VerseKey *) SWModule::key; + char sizebuf[3]; + + lkey->Verse(0); + if (chapcache != lkey->Index()) { + findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); + readtext(lkey->Testament(), start, 3, sizebuf); + memcpy(&size, sizebuf, 2); + tmpbuf = new char [ size + 1 ]; + readtext(lkey->Testament(), start + 2, size + 1 , tmpbuf); + //zBuf(&size, tmpbuf); + dest = new char [ (size*4) + 1 ]; + uncompress((Bytef *)dest, &destsize, (Bytef *) tmpbuf, size); + chapcache = lkey->Index(); + delete [] tmpbuf; + } + + //findoffset(key->Testament(), key->Index(), &start, &size); + findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); + + if (versebuf) + delete [] versebuf; + versebuf = new char [ size + 1 ]; + //memcpy(versebuf, Buf(), size); + memcpy(versebuf, dest, destsize); + delete [] dest; + + preptext(versebuf); + + return versebuf; +*/ + + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + //printf ("zText char *\n"); + + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + //printf ("checking cache\n"); + //printf ("finding offset\n"); + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + //printf ("deleting previous buffer\n"); + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + //printf ("getting text\n"); + zreadtext(key->Testament(), start, (size + 2), entrybuf); + //printf ("got text\n"); + + rawFilter(entrybuf, size, key); + + //printf ("preparing text\n"); + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + //printf ("returning text\n"); + return entrybuf; + +} + + +bool zText::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + + +void zText::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, key)) { + flushCache(); + } + delete lastWriteKey; + } + + settext(key->Testament(), key->Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key->clone(); // must delete + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void zText::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) { + } + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * zFiles::deleteEntry - deletes this entry + * + */ + +void zText::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * zText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + */ + +void zText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} |