diff options
Diffstat (limited to 'src')
123 files changed, 28930 insertions, 0 deletions
diff --git a/src/frontend/swdisp.cpp b/src/frontend/swdisp.cpp new file mode 100644 index 0000000..412ce2e --- /dev/null +++ b/src/frontend/swdisp.cpp @@ -0,0 +1,27 @@ +/****************************************************************************** + * swdisp.cpp - code for base class 'swdisp'. swdisp is the basis for all + * types of displays (e.g. raw textout, curses, xwindow, etc.) + */ + +#include <iostream> +#include <swmodule.h> +#include <swdisp.h> + +static const char *classes[] = {"SWDisplay", "SWObject", 0}; +SWClass SWDisplay::classdef(classes); + +/****************************************************************************** + * SWDisplay::Display - casts a module to a character pointer and displays it to + * raw output (overriden for different display types and + * module types if necessary) + * + * ENT: imodule - module to display + * + * RET: error status + */ + +char SWDisplay::Display(SWModule &imodule) +{ + std::cout << (const char *)imodule; + return 0; +} diff --git a/src/frontend/swlog.cpp b/src/frontend/swlog.cpp new file mode 100644 index 0000000..277a70d --- /dev/null +++ b/src/frontend/swlog.cpp @@ -0,0 +1,95 @@ +//--------------------------------------------------------------------------- + +#include <stdarg.h> +#include <stdio.h> +#ifndef _MSC_VER +#include <iostream> +#endif +#include "swlog.h" +//--------------------------------------------------------------------------- + + +SWLog *SWLog::systemlog = 0; + + +class __staticsystemlog { +public: + __staticsystemlog() { + SWLog::systemlog = new SWLog(); + } + ~__staticsystemlog() { + delete SWLog::systemlog; + } +} _staticsystemlog; + + +void SWLog::LogWarning(char *fmt, ...) +{ + char msg[2048]; + va_list argptr; + + if (logLevel >= 2) { + va_start(argptr, fmt); + vsprintf(msg, fmt, argptr); + va_end(argptr); + +#ifndef _MSC_VER + std::cerr << msg; + std::cerr << std::endl; +#endif + } +} + + +void SWLog::LogError(char *fmt, ...) +{ + char msg[2048]; + va_list argptr; + + if (logLevel) { + va_start(argptr, fmt); + vsprintf(msg, fmt, argptr); + va_end(argptr); + +#ifndef _MSC_VER + std::cerr << msg; + std::cerr << std::endl; +#endif + } +} + + +void SWLog::LogTimedInformation(char *fmt, ...) +{ + char msg[2048]; + va_list argptr; + + if (logLevel >= 4) { + va_start(argptr, fmt); + vsprintf(msg, fmt, argptr); + va_end(argptr); + +#ifndef _MSC_VER + std::cout << msg; + std::cout << std::endl; +#endif + } +} + + +void SWLog::LogInformation(char *fmt, ...) +{ + char msg[2048]; + va_list argptr; + + if (logLevel >= 3) { + va_start(argptr, fmt); + vsprintf(msg, fmt, argptr); + va_end(argptr); + +#ifndef _MSC_VER + std::cout << msg; + std::cout << std::endl; +#endif + } +} diff --git a/src/keys/listkey.cpp b/src/keys/listkey.cpp new file mode 100644 index 0000000..0d1ff33 --- /dev/null +++ b/src/keys/listkey.cpp @@ -0,0 +1,257 @@ +/****************************************************************************** + * listkey.cpp - code for base class 'ListKey'. ListKey is the basis for all + * types of keys that have lists of specified indexes + * (e.g. a list of verses, place, etc.) + */ + +#include <utilfuns.h> +#include <string.h> +#include <stdlib.h> +#include <swkey.h> +#include <listkey.h> + +static const char *classes[] = {"ListKey", "SWKey", "SWObject", 0}; +SWClass ListKey::classdef(classes); + +/****************************************************************************** + * ListKey Constructor - initializes instance of ListKey + * + * ENT: ikey - text key + */ + +ListKey::ListKey(const char *ikey): SWKey(ikey) { + arraymax = 0; + ClearList(); + init(); +} + + +ListKey::ListKey(ListKey const &k) : SWKey(k.keytext) { + arraymax = k.arraymax; + arraypos = k.arraypos; + arraycnt = k.arraycnt; + array = (arraymax)?(SWKey **)malloc(k.arraymax * sizeof(SWKey *)):0; + for (int i = 0; i < arraycnt; i++) + array[i] = k.array[i]->clone(); + init(); +} + + +void ListKey::init() { + myclass = &classdef; +} + + +SWKey *ListKey::clone() const +{ + return new ListKey(*this); +} + +/****************************************************************************** + * ListKey Destructor - cleans up instance of ListKey + */ + +ListKey::~ListKey() +{ + ClearList(); +} + + +/****************************************************************************** + * ListKey::ClearList - Clears out elements of list + */ + +void ListKey::ClearList() +{ + int loop; + + if (arraymax) { + for (loop = 0; loop < arraycnt; loop++) + delete array[loop]; + + free(array); + arraymax = 0; + } + arraycnt = 0; + arraypos = 0; + array = 0; +} + + +/****************************************************************************** + * ListKey::copyFrom Equates this ListKey to another ListKey object + * + * ENT: ikey - other ListKey object + */ + +void ListKey::copyFrom(const ListKey &ikey) { + ClearList(); + + arraymax = ikey.arraymax; + arraypos = ikey.arraypos; + arraycnt = ikey.arraycnt; + array = (arraymax)?(SWKey **)malloc(ikey.arraymax * sizeof(SWKey *)):0; + for (int i = 0; i < arraycnt; i++) + array[i] = ikey.array[i]->clone(); + + SetToElement(0); +} + + +/****************************************************************************** + * ListKey::add - Adds an element to the list + */ + +void ListKey::add(const SWKey &ikey) { + if (++arraycnt > arraymax) { + array = (SWKey **) ((array) ? realloc(array, (arraycnt + 32) * sizeof(SWKey *)) : calloc(arraycnt + 32, sizeof(SWKey *))); + arraymax = arraycnt + 32; + } + array[arraycnt-1] = ikey.clone(); + SetToElement(arraycnt-1); +} + + + +/****************************************************************************** + * ListKey::setPosition(SW_POSITION) - Positions this key + * + * ENT: p - position + * + * RET: *this + */ + +void ListKey::setPosition(SW_POSITION p) { + switch (p) { + case 1: // GCC won't compile P_TOP + SetToElement(0); + break; + case 2: // GCC won't compile P_BOTTOM + SetToElement(arraycnt-1); + break; + } +} + + +/****************************************************************************** + * ListKey::increment - Increments a number of elements + */ + +void ListKey::increment(int step) { + if (step < 0) { + decrement(step*-1); + return; + } + Error(); // clear error + for(; step && !Error(); step--) { + if (arraypos < arraycnt) { + (*(array[arraypos]))++; + if (array[arraypos]->Error()) { + SetToElement(arraypos+1); + } + else *this = (const char *)(*array[arraypos]); + } + else error = KEYERR_OUTOFBOUNDS; + } +} + + +/****************************************************************************** + * ListKey::decrement - Decrements a number of elements + */ + +void ListKey::decrement(int step) { + if (step < 0) { + increment(step*-1); + return; + } + Error(); // clear error + for(; step && !Error(); step--) { + if (arraypos > -1) { + (*(array[arraypos]))--; + if (array[arraypos]->Error()) { + SetToElement(arraypos-1, BOTTOM); + } + else *this = (const char *)(*array[arraypos]); + } + else error = KEYERR_OUTOFBOUNDS; + } +} + + +/****************************************************************************** + * ListKey::Count - Returns number of elements in list + */ + +int ListKey::Count() { + return arraycnt; +} + + +/****************************************************************************** + * ListKey::SetToElement - Sets key to element number + * + * ENT: ielement - element number to set to + * + * RET: error status + */ + +char ListKey::SetToElement(int ielement, SW_POSITION pos) { + arraypos = ielement; + if (arraypos >= arraycnt) { + arraypos = (arraycnt>0)?arraycnt - 1:0; + error = KEYERR_OUTOFBOUNDS; + } + else { + if (arraypos < 0) { + arraypos = 0; + error = KEYERR_OUTOFBOUNDS; + } + else { + error = 0; + } + } + + if (arraycnt) { + (*array[arraypos]) = pos; + *this = (const char *)(*array[arraypos]); + } + else *this = ""; + + return error; +} + + +/****************************************************************************** + * ListKey::GetElement - Gets a key element number + * + * ENT: pos - element number to get (or default current) + * + * RET: Key or null on error + */ + +SWKey *ListKey::GetElement(int pos) { + if (pos < 0) + pos = arraypos; + + if (pos >=arraycnt) + error = KEYERR_OUTOFBOUNDS; + + return (error) ? 0:array[pos]; +} + + +/****************************************************************************** + * ListKey::Remove - Removes current element from list + */ + +void ListKey::Remove() { + if ((arraypos > -1) && (arraypos < arraycnt)) { + delete array[arraypos]; + if (arraypos < arraycnt - 1) + memmove(&array[arraypos], &array[arraypos+1], (arraycnt - arraypos - 1) * sizeof(SWKey *)); + arraycnt--; + + SetToElement((arraypos)?arraypos-1:0); + } +} diff --git a/src/keys/strkey.cpp b/src/keys/strkey.cpp new file mode 100644 index 0000000..7e2d539 --- /dev/null +++ b/src/keys/strkey.cpp @@ -0,0 +1,41 @@ +/****************************************************************************** + * StrKey.cpp - code for class 'StrKey'- a standard string key class (used + * for modules that index on single strings (eg. cities, + * names, words, etc.) + */ + +#include <swmacs.h> +#include <utilfuns.h> +#include <strkey.h> +#include <string.h> +#include <stdio.h> + + +static const char *classes[] = {"StrKey", "SWKey", "SWObject", 0}; +SWClass StrKey::classdef(classes); + +/****************************************************************************** + * StrKey Constructor - initializes instance of StrKey + * + * ENT: ikey - text key (word, city, name, etc.) + */ + +StrKey::StrKey(const char *ikey) : SWKey(ikey) +{ + init(); +} + + +void StrKey::init() { + myclass = &classdef; +} + + +/****************************************************************************** + * StrKey Destructor - cleans up instance of StrKey + * + * ENT: ikey - text key + */ + +StrKey::~StrKey() { +} diff --git a/src/keys/swkey.cpp b/src/keys/swkey.cpp new file mode 100644 index 0000000..e633369 --- /dev/null +++ b/src/keys/swkey.cpp @@ -0,0 +1,196 @@ +/****************************************************************************** + * swkey.cpp - code for base class 'SWKey'. SWKey is the basis for all + * types of keys for indexing into modules (e.g. verse, word, + * place, etc.) + */ + +#include <swkey.h> +#include <utilfuns.h> +#include <string.h> + +static const char *classes[] = {"SWKey", "SWObject", 0}; +SWClass SWKey::classdef(classes); + +/****************************************************************************** + * SWKey Constructor - initializes instance of SWKey + * + * ENT: ikey - text key + */ + +SWKey::SWKey(const char *ikey) +{ + index = 0; + persist = 0; + keytext = 0; + error = 0; + stdstr(&keytext, ikey); + init(); +} + +SWKey::SWKey(SWKey const &k) +{ + index = k.index; + persist = k.persist; + userData = k.userData; + keytext = 0; + error = k.error; + stdstr(&keytext, k.keytext); + init(); +} + +void SWKey::init() { + myclass = &classdef; +} + +SWKey *SWKey::clone() const +{ + return new SWKey(*this); +} + +/****************************************************************************** + * SWKey Destructor - cleans up instance of SWKey + */ + +SWKey::~SWKey() { + if (keytext) + delete [] keytext; +} + + +/****************************************************************************** + * SWKey::Persist - Gets whether this object itself persists within a + * module that it was used to SetKey or just a copy. + * (1 - persists in module; 0 - a copy is attempted + * + * RET: value of persist + */ + +char SWKey::Persist() const +{ + return persist; +} + + +/****************************************************************************** + * SWKey::Persist - Set/gets whether this object itself persists within a + * module that it was used to SetKey or just a copy. + * (1 - persists in module; 0 - a copy is attempted + * + * ENT: ipersist - value which to set persist + * [-1] - only get + * + * RET: value of persist + */ + +char SWKey::Persist(signed char ipersist) +{ + if (ipersist != -1) + persist = ipersist; + + return persist; +} + + +/****************************************************************************** + * SWKey::Error - Gets and clears error status + * + * RET: error status + */ + +char SWKey::Error() +{ + char retval = error; + + error = 0; + return retval; +} + + +/****************************************************************************** + * SWKey::setText Equates this SWKey to a character string + * + * ENT: ikey - other swkey object + */ + +void SWKey::setText(const char *ikey) { + stdstr(&keytext, ikey); +} + + +/****************************************************************************** + * SWKey::copyFrom Equates this SWKey to another SWKey object + * + * ENT: ikey - other swkey object + */ + +void SWKey::copyFrom(const SWKey &ikey) { +// not desirable Persist(ikey.Persist()); + setText((const char *)ikey); +} + + +/****************************************************************************** + * SWKey::getText - returns text key if (char *) cast is requested + */ + +const char *SWKey::getText() const { + return keytext; +} + + +/****************************************************************************** + * SWKey::compare - Compares another VerseKey object + * + * ENT: ikey - key to compare with this one + * + * RET: > 0 if this key is greater than compare key + * < 0 + * 0 + */ + +int SWKey::compare(const SWKey &ikey) +{ + return strcmp((const char *)*this, (const char *)ikey); +} + + +/****************************************************************************** + * SWKey::setPosition(SW_POSITION) - Positions this key if applicable + */ + +void SWKey::setPosition(SW_POSITION p) { + switch (p) { + case POS_TOP: +// *this = ""; + break; + case POS_BOTTOM: +// *this = "zzzzzzzzz"; + break; + } +} + + +/****************************************************************************** + * SWKey::increment - Increments key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void SWKey::increment(int) { + error = KEYERR_OUTOFBOUNDS; +} + + +/****************************************************************************** + * SWKey::decrement - Decrements key a number of entries + * + * ENT: decrement - Number of entries to jump backward + * + * RET: *this + */ + +void SWKey::decrement(int) { + error = KEYERR_OUTOFBOUNDS; +} diff --git a/src/keys/treekey.cpp b/src/keys/treekey.cpp new file mode 100644 index 0000000..d92b7a4 --- /dev/null +++ b/src/keys/treekey.cpp @@ -0,0 +1,30 @@ +/****************************************************************************** + * versekey.h - code for class 'versekey'- a standard Biblical verse key + * + * $Id: treekey.cpp,v 1.2 2002/04/15 21:26:44 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <treekey.h> + +static const char *classes[] = {"TreeKey", "SWKey", "SWObject", 0}; +SWClass TreeKey::classdef(classes); + +void TreeKey::init() { + myclass = &classdef; +} diff --git a/src/keys/treekeyidx.cpp b/src/keys/treekeyidx.cpp new file mode 100644 index 0000000..acd9b5a --- /dev/null +++ b/src/keys/treekeyidx.cpp @@ -0,0 +1,590 @@ +/****************************************************************************** + * versekey.h - code for class 'versekey'- a standard Biblical verse key + * + * $Id: treekeyidx.cpp,v 1.7 2002/04/15 21:26:44 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <treekeyidx.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <string> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +using namespace std; +static const char nl = '\n'; +static const char *classes[] = {"TreeKeyIdx", "TreeKey", "SWKey", "SWObject", 0}; +SWClass TreeKeyIdx::classdef(classes); + + +TreeKeyIdx::TreeKeyIdx(const TreeKeyIdx &ikey) : currentNode() { + init(); + path = 0; + idxfd = 0; + datfd = 0; + copyFrom(ikey); +} + +TreeKeyIdx::TreeKeyIdx(const char *idxPath, int fileMode) : currentNode() { + char buf[127]; + + init(); + path = 0; + stdstr(&path, idxPath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd <= 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + error = errno; + } + else { + root(); + } +} + + +void TreeKeyIdx::init() { + myclass = &classdef; +} + + +TreeKeyIdx::~TreeKeyIdx () { + if (path) + delete [] path; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +const char *TreeKeyIdx::getLocalName() { + return currentNode.name; +} + + +const char *TreeKeyIdx::getUserData(int *size) { + if (size) + *size = (int)currentNode.dsize; + return currentNode.userData; +} + + +void TreeKeyIdx::setUserData(const char *userData, int size) { + if (currentNode.userData) + delete currentNode.userData; + + if (!size) + size = strlen(userData) + 1; + + currentNode.userData = new char [ size ]; + memcpy(currentNode.userData, userData, size); + currentNode.dsize = size; +} + +const char *TreeKeyIdx::setLocalName(const char *newName) { + stdstr(&(currentNode.name), newName); + return currentNode.name; +} + + +void TreeKeyIdx::save() { + saveTreeNode(¤tNode); +} + + +const char *TreeKeyIdx::getFullName() const { + TreeNode parent; + static string fullPath; + fullPath = currentNode.name; + parent.parent = currentNode.parent; + while (parent.parent > -1) { + getTreeNodeFromIdxOffset(parent.parent, &parent); + fullPath = ((string)parent.name) + (string) "/" + fullPath; + } + return fullPath.c_str(); +} + + +void TreeKeyIdx::root() { + error = getTreeNodeFromIdxOffset(0, ¤tNode); +} + + +bool TreeKeyIdx::parent() { + if (currentNode.parent > -1) { + error = getTreeNodeFromIdxOffset(currentNode.parent, ¤tNode); + return true; + } + return false; +} + + +bool TreeKeyIdx::firstChild() { + if (currentNode.firstChild > -1) { + error = getTreeNodeFromIdxOffset(currentNode.firstChild, ¤tNode); + return true; + } + return false; +} + + +bool TreeKeyIdx::nextSibling() { + if (currentNode.next > -1) { + error = getTreeNodeFromIdxOffset(currentNode.next, ¤tNode); + return true; + } + return false; +} + + +bool TreeKeyIdx::previousSibling() { + TreeNode iterator; + __u32 target = currentNode.offset; + if (currentNode.parent > -1) { + getTreeNodeFromIdxOffset(currentNode.parent, &iterator); + getTreeNodeFromIdxOffset(iterator.firstChild, &iterator); + if (iterator.offset != target) { + while ((iterator.next != target) && (iterator.next > -1)) + getTreeNodeFromIdxOffset(iterator.next, &iterator); + if (iterator.next > -1) { + error = getTreeNodeFromIdxOffset(iterator.offset, ¤tNode); + return true; + } + } + } + return false; +} + + +bool TreeKeyIdx::hasChildren() { + return (currentNode.firstChild > -1); +} + + +void TreeKeyIdx::append() { + TreeNode lastSib; + if (currentNode.offset) { + getTreeNodeFromIdxOffset(currentNode.offset, &lastSib); + while (lastSib.next > -1) { + getTreeNodeFromIdxOffset(lastSib.next, &lastSib); + } + __u32 idxOffset = lseek(idxfd->getFd(), 0, SEEK_END); + lastSib.next = idxOffset; + saveTreeNodeOffsets(&lastSib); + __u32 parent = currentNode.parent; + currentNode.clear(); + currentNode.offset = idxOffset; + currentNode.parent = parent; + } +} + + +void TreeKeyIdx::appendChild() { + if (firstChild()) { + append(); + } + else { + __u32 idxOffset = lseek(idxfd->getFd(), 0, SEEK_END); + currentNode.firstChild = idxOffset; + saveTreeNodeOffsets(¤tNode); + __u32 parent = currentNode.offset; + currentNode.clear(); + currentNode.offset = idxOffset; + currentNode.parent = parent; + } +} + + +void TreeKeyIdx::insertBefore() { +} + + +void TreeKeyIdx::remove() { +} + + +/****************************************************************************** + * TreeKeyIdx::Create - Creates new key idx/dat files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char TreeKeyIdx::create(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + TreeKeyIdx newTree(path); + TreeKeyIdx::TreeNode root; + stdstr(&(root.name), ""); + newTree.saveTreeNode(&root); + + delete [] path; + + return 0; +} + + +/****************************************************************************** + * zStr::getidxbufdat - Gets the index string at the given dat offset + * NOTE: buf is calloc'd, or if not null, realloc'd and must + * be free'd by calling function + * + * ENT: ioffset - offset in dat file to lookup + * node - address of pointer to allocate for storage of string + */ + +void TreeKeyIdx::getTreeNodeFromDatOffset(long ioffset, TreeNode *node) const { + char ch; + __s32 tmp; + __u16 tmp2; + + if (datfd > 0) { + + lseek(datfd->getFd(), ioffset, SEEK_SET); + + read(datfd->getFd(), &tmp, 4); + node->parent = swordtoarch32(tmp); + + read(datfd->getFd(), &tmp, 4); + node->next = swordtoarch32(tmp); + + read(datfd->getFd(), &tmp, 4); + node->firstChild = swordtoarch32(tmp); + + string name; + do { + read(datfd->getFd(), &ch, 1); + name += ch; + } while (ch); + + stdstr(&(node->name), name.c_str()); + + read(datfd->getFd(), &tmp2, 2); + node->dsize = swordtoarch16(tmp2); + + if (node->dsize) { + if (node->userData) + delete [] node->userData; + node->userData = new char [node->dsize]; + read(datfd->getFd(), node->userData, node->dsize); + } + } +} + + +/****************************************************************************** + * zStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is calloc'd, or if not null, realloc'd + * and must be freed by calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +char TreeKeyIdx::getTreeNodeFromIdxOffset(long ioffset, TreeNode *node) const { + __u32 offset; + char error = 0; + + if (ioffset < 0) { + ioffset = 0; + error = KEYERR_OUTOFBOUNDS; + } + + node->offset = ioffset; + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + if (read(idxfd->getFd(), &offset, 4) == 4) { + offset = swordtoarch32(offset); + getTreeNodeFromDatOffset(offset, node); + } + else { + lseek(idxfd->getFd(), -4, SEEK_END); + if (read(idxfd->getFd(), &offset, 4) == 4) { + offset = swordtoarch32(offset); + getTreeNodeFromDatOffset(offset, node); + } + error = KEYERR_OUTOFBOUNDS; + } + } + return error; +} + + +unsigned long TreeKeyIdx::getOffset() const { + return currentNode.offset; +} + +void TreeKeyIdx::setOffset(unsigned long offset) { + error = getTreeNodeFromIdxOffset(offset, ¤tNode); +} + + +void TreeKeyIdx::saveTreeNodeOffsets(TreeNode *node) { + long datOffset = 0; + __s32 tmp; + + if (idxfd > 0) { + lseek(idxfd->getFd(), node->offset, SEEK_SET); + if (read(idxfd->getFd(), &tmp, 4) != 4) { + datOffset = lseek(datfd->getFd(), 0, SEEK_END); + tmp = archtosword32(datOffset); + write(idxfd->getFd(), &tmp, 4); + } + else { + datOffset = swordtoarch32(tmp); + lseek(datfd->getFd(), datOffset, SEEK_SET); + } + + tmp = archtosword32(node->parent); + write(datfd->getFd(), &tmp, 4); + + tmp = archtosword32(node->next); + write(datfd->getFd(), &tmp, 4); + + tmp = archtosword32(node->firstChild); + write(datfd->getFd(), &tmp, 4); + } +} + + +void TreeKeyIdx::copyFrom(const TreeKeyIdx &ikey) { + + SWKey::copyFrom(ikey); + + currentNode.offset = ikey.currentNode.offset; + currentNode.parent = ikey.currentNode.parent; + currentNode.next = ikey.currentNode.next; + currentNode.firstChild = ikey.currentNode.firstChild; + stdstr(&(currentNode.name), ikey.currentNode.name); + currentNode.dsize = ikey.currentNode.dsize; + + if (currentNode.userData) + delete [] currentNode.userData; + if (currentNode.dsize) { + currentNode.userData = new char [ currentNode.dsize ]; + memcpy(currentNode.userData, ikey.currentNode.userData, currentNode.dsize); + } + else currentNode.userData = 0; + + bool newFiles = true; + + if (path && ikey.path) + newFiles = strcmp(path, ikey.path); + + if (newFiles) { + stdstr(&path, ikey.path); + + if (idxfd) { + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); + } + idxfd = FileMgr::systemFileMgr.open(ikey.idxfd->path, ikey.idxfd->mode, ikey.idxfd->perms); + datfd = FileMgr::systemFileMgr.open(ikey.datfd->path, ikey.datfd->mode, ikey.datfd->perms); + } +} + + +void TreeKeyIdx::saveTreeNode(TreeNode *node) { + long datOffset = 0; + __s32 tmp; + if (idxfd > 0) { + + lseek(idxfd->getFd(), node->offset, SEEK_SET); + datOffset = lseek(datfd->getFd(), 0, SEEK_END); + tmp = archtosword32(datOffset); + write(idxfd->getFd(), &tmp, 4); + + saveTreeNodeOffsets(node); + + write(datfd->getFd(), node->name, strlen(node->name)); + char null = 0; + write(datfd->getFd(), &null, 1); + + __u16 tmp2 = archtosword16(node->dsize); + write(datfd->getFd(), &tmp2, 2); + + if (node->dsize) { + write(datfd->getFd(), node->userData, node->dsize); + } + } +} + + +void TreeKeyIdx::setText(const char *ikey) { + char *buf = 0; + stdstr(&buf, ikey); + char *leaf = strtok(buf, "/"); + root(); + while ((leaf) && (!Error())) { + bool ok, inChild = false; + for (ok = firstChild(); ok; ok = nextSibling()) { + inChild = true; + if (!stricmp(leaf, getLocalName())) + break; + } + leaf = strtok(0, "/"); + if (!ok) { + if (inChild) { // if we didn't find a matching child node, default to first child + parent(); + firstChild(); + } + if (leaf) + error = KEYERR_OUTOFBOUNDS; + break; + } + } + delete [] buf; +} + + + +void TreeKeyIdx::copyFrom(const SWKey &ikey) { + SWKey::copyFrom(ikey); +} + +void TreeKeyIdx::setPosition(SW_POSITION p) { + switch (p) { + case POS_TOP: + root(); + break; + case POS_BOTTOM: + error = getTreeNodeFromIdxOffset(lseek(idxfd->getFd(), -4, SEEK_END), ¤tNode); + break; + } + Error(); // clear error from normalize +} + +const char *TreeKeyIdx::getText() const { + return getFullName(); +} + + +int TreeKeyIdx::_compare (const TreeKeyIdx & ikey) { + return (getOffset() - ikey.getOffset()); +} + + +int TreeKeyIdx::compare(const SWKey &ikey) { + TreeKeyIdx *treeKey = SWDYNAMIC_CAST(TreeKeyIdx, (&ikey)); + if (treeKey) + return _compare(*treeKey); + return SWKey::compare(ikey); +} + + +void TreeKeyIdx::decrement(int steps) { + error = getTreeNodeFromIdxOffset(currentNode.offset - (4*steps), ¤tNode); +} + +void TreeKeyIdx::increment(int steps) { + error = getTreeNodeFromIdxOffset(currentNode.offset + (4*steps), ¤tNode); + +/* + // assert positive + if (steps < 0) { + decrement(steps * -1); + return; + } + + while (steps > 0) { + if (!firstChild()) { + if (!nextSibbling() { + error = KEYERR_OUTOFBOUNDS; + return; + } + } + steps--; + } +*/ +} + + + +TreeKeyIdx::TreeNode::TreeNode() { + + name = 0; + stdstr(&name, ""); + userData = 0; + + clear(); +} + + +void TreeKeyIdx::TreeNode::clear() { + offset = 0; + parent = -1; + next = -1; + firstChild = -1; + dsize = 0; + + if (name) + delete [] name; + name = 0; + stdstr(&name, ""); + + if (userData) + delete [] userData; + userData = 0; +} + + +TreeKeyIdx::TreeNode::~TreeNode() { + if (name) + delete [] name; + + if (userData) + delete [] userData; +} diff --git a/src/keys/versekey.cpp b/src/keys/versekey.cpp new file mode 100644 index 0000000..05f1b8b --- /dev/null +++ b/src/keys/versekey.cpp @@ -0,0 +1,1450 @@ +/****************************************************************************** + * VerseKey.cpp - code for class 'VerseKey'- a standard Biblical verse key + */ + +#include <swmacs.h> +#include <utilfuns.h> +#include <string.h> +#include <stdio.h> +#include <fcntl.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <utilstr.h> +#include <swkey.h> +#include <swlog.h> +#include <versekey.h> +#include <localemgr.h> +extern "C" { +#include <roman.h> +} + + +static const char *classes[] = {"VerseKey", "SWKey", "SWObject", 0}; +SWClass VerseKey::classdef(classes); + +/****************************************************************************** + * Initialize static members of VerseKey + */ + +#include <canon.h> // Initialize static members of canonical books structure + +struct sbook *VerseKey::builtin_books[2] = {0,0}; +const char VerseKey::builtin_BMAX[2] = {39, 27}; +long *VerseKey::offsets[2][2] = {{VerseKey::otbks, VerseKey::otcps}, {VerseKey::ntbks, VerseKey::ntcps}}; +int VerseKey::instance = 0; +VerseKey::LocaleCache VerseKey::localeCache; + + +/****************************************************************************** + * VerseKey::init - initializes instance of VerseKey + */ + +void VerseKey::init() { + myclass = &classdef; + if (!instance) + initstatics(); + + instance++; + autonorm = 1; // default auto normalization to true + headings = 0; // default display headings option is false + upperBound = 0; + lowerBound = 0; + testament = 0; + book = 0; + chapter = 0; + verse = 0; + locale = 0; + + setLocale(LocaleMgr::systemLocaleMgr.getDefaultLocaleName()); +} + +/****************************************************************************** + * VerseKey Constructor - initializes instance of VerseKey + * + * ENT: ikey - base key (will take various forms of 'BOOK CH:VS'. See + * VerseKey::parse for more detailed information) + */ + +VerseKey::VerseKey(const SWKey *ikey) : SWKey(*ikey) +{ + init(); + if (ikey) + parse(); +} + + +/****************************************************************************** + * VerseKey Constructor - initializes instance of VerseKey + * + * ENT: ikey - text key (will take various forms of 'BOOK CH:VS'. See + * VerseKey::parse for more detailed information) + */ + +VerseKey::VerseKey(const char *ikey) : SWKey(ikey) +{ + init(); + if (ikey) + parse(); +} + + +VerseKey::VerseKey(VerseKey const &k) : SWKey(k) +{ + init(); + autonorm = k.autonorm; + headings = k.headings; + testament = k.Testament(); + book = k.Book(); + chapter = k.Chapter(); + verse = k.Verse(); + LowerBound(k.LowerBound()); + UpperBound(k.UpperBound()); +} + + +VerseKey::VerseKey(const char *min, const char *max) : SWKey() +{ + init(); + LowerBound(min); + UpperBound(max); + setPosition(TOP); +} + + +SWKey *VerseKey::clone() const +{ + return new VerseKey(*this); +} + + +/****************************************************************************** + * VerseKey Destructor - cleans up instance of VerseKey + * + * ENT: ikey - text key + */ + +VerseKey::~VerseKey() { + if (upperBound) + delete upperBound; + if (lowerBound) + delete lowerBound; + if (locale) + delete [] locale; + + --instance; +} + + +void VerseKey::setLocale(const char *name) { + char *BMAX; + struct sbook **books; + bool useCache = false; + + if (localeCache.name) + useCache = (!strcmp(localeCache.name, name)); + + if (!useCache) { // if we're setting params for a new locale + stdstr(&(localeCache.name), name); + localeCache.abbrevsCnt = 0; + } + + SWLocale *locale = (useCache) ? localeCache.locale : LocaleMgr::systemLocaleMgr.getLocale(name); + localeCache.locale = locale; + + if (locale) { + locale->getBooks(&BMAX, &books); + setBooks(BMAX, books); + setBookAbbrevs(locale->getBookAbbrevs(), localeCache.abbrevsCnt); + localeCache.abbrevsCnt = abbrevsCnt; + } + else { + setBooks(builtin_BMAX, builtin_books); + setBookAbbrevs(builtin_abbrevs, localeCache.abbrevsCnt); + localeCache.abbrevsCnt = abbrevsCnt; + } + stdstr(&(this->locale), localeCache.name); +} + + +void VerseKey::setBooks(const char *iBMAX, struct sbook **ibooks) { + BMAX = iBMAX; + books = ibooks; +} + + +void VerseKey::setBookAbbrevs(const struct abbrev *bookAbbrevs, unsigned int size) { + abbrevs = bookAbbrevs; + if (!size) { + for (abbrevsCnt = 0; *abbrevs[abbrevsCnt].ab; abbrevsCnt++) { + /* + if (strcmp(abbrevs[abbrevsCnt-1].ab, abbrevs[abbrevsCnt].ab) > 0) { + fprintf(stderr, "ERROR: book abbreviation (canon.h or locale) misordered at entry: %s\n", abbrevs[abbrevsCnt].ab); + exit(-1); + } + */ + } + for (int t = 0; t < 2; t++) { + for (int i = 0; i < BMAX[t]; i++) { + int bn = getBookAbbrev(books[t][i].name); + if ((bn-1)%39 != i) { + SWLog::systemlog->LogError("Book: %s does not have a matching toupper abbrevs entry! book number returned was: %d", books[t][i].name, bn); + } + } + } + } + else abbrevsCnt = size; +} + + +/****************************************************************************** + * VerseKey::initstatics - initializes statics. Performed only when first + * instance on VerseKey (or descendent) is created. + */ + +void VerseKey::initstatics() { + int l1, l2, chaptmp = 0; + + builtin_books[0] = otbooks; + builtin_books[1] = ntbooks; + + for (l1 = 0; l1 < 2; l1++) { + for (l2 = 0; l2 < builtin_BMAX[l1]; l2++) { + builtin_books[l1][l2].versemax = &vm[chaptmp]; + chaptmp += builtin_books[l1][l2].chapmax; + } + } +} + + +/****************************************************************************** + * VerseKey::parse - parses keytext into testament|book|chapter|verse + * + * RET: error status + */ + +char VerseKey::parse() +{ + + + testament = 1; + book = 1; + chapter = 1; + verse = 1; + + int error = 0; + + if (keytext) { + ListKey tmpListKey = VerseKey::ParseVerseList(keytext); + if (tmpListKey.Count()) { + SWKey::setText((const char *)tmpListKey); + for (testament = 1; testament < 3; testament++) { + for (book = 1; book <= BMAX[testament-1]; book++) { + if (!strncmp(keytext, books[testament-1][book-1].name, strlen(books[testament-1][book-1].name))) + break; + } + if (book <= BMAX[testament-1]) + break; + } + + if (testament < 3) { + sscanf(&keytext[strlen(books[testament-1][book-1].name)], "%d:%d", &chapter, &verse); + } + else error = 1; + } else error = 1; + } + Normalize(1); + freshtext(); + + return (this->error) ? this->error : (this->error = error); +} + + +/****************************************************************************** + * VerseKey::freshtext - refreshes keytext based on + * testament|book|chapter|verse + */ + +void VerseKey::freshtext() const +{ + char buf[2024]; + int realtest = testament; + int realbook = book; + + if (book < 1) { + if (testament < 1) + sprintf(buf, "[ Module Heading ]"); + else sprintf(buf, "[ Testament %d Heading ]", (int)testament); + } + else { + if (realbook > BMAX[realtest-1]) { + realbook -= BMAX[realtest-1]; + if (realtest < 2) + realtest++; + if (realbook > BMAX[realtest-1]) + realbook = BMAX[realtest-1]; + } + sprintf(buf, "%s %d:%d", books[realtest-1][realbook-1].name, chapter, verse); + } + + stdstr((char **)&keytext, buf); +} + + + +/****************************************************************************** + * VerseKey::getBookAbbrev - Attempts to find a book abbreviation for a buffer + * + * ENT: abbr - key for which to search; + * RET: book number or < 0 = not valid + */ + +int VerseKey::getBookAbbrev(const char *iabbr) +{ + int loop, diff, abLen, min, max, target, retVal = -1; + + char *abbr = 0; + + stdstr(&abbr, iabbr); + strstrip(abbr); + toupperstr(abbr); + abLen = strlen(abbr); + + if (abLen) { + min = 0; +// max = abbrevsCnt - 1; + max = abbrevsCnt; + while(1) { + target = min + ((max - min) / 2); + diff = strncmp(abbr, abbrevs[target].ab, abLen); + if ((!diff)||(target >= max)||(target <= min)) + break; + if (diff > 0) + min = target; + else max = target; + } + for (; target > 0; target--) { + if (strncmp(abbr, abbrevs[target-1].ab, abLen)) + break; + } + + retVal = (!diff) ? abbrevs[target].book : -1; + } + delete [] abbr; + return retVal; +} + +/****************************************************************************** + * VerseKey::ParseVerseList - Attempts to parse a buffer into separate + * verse entries returned in a ListKey + * + * ENT: buf - buffer to parse; + * defaultKey - if verse, chap, book, or testament is left off, + * pull info from this key (ie. Gen 2:3; 4:5; + * Gen would be used when parsing the 4:5 section) + * expandRange - whether or not to expand eg. John 1:10-12 or just + * save John 1:10 + * + * RET: ListKey reference filled with verse entries contained in buf + * + * COMMENT: This code works but wreaks. Rewrite to make more maintainable. + */ + +ListKey VerseKey::ParseVerseList(const char *buf, const char *defaultKey, bool expandRange) { + SWKey textkey; + + char book[255]; + char number[255]; + int tobook = 0; + int tonumber = 0; + int chap = -1, verse = -1; + int bookno = 0; + VerseKey curkey, lBound; + curkey.setLocale(getLocale()); + lBound.setLocale(getLocale()); + int loop; + char comma = 0; + char dash = 0; + const char *orig = buf; + ListKey tmpListKey; + ListKey internalListKey; + SWKey tmpDefaultKey = defaultKey; + char lastPartial = 0; + + curkey.AutoNormalize(0); + tmpListKey << tmpDefaultKey; + tmpListKey.GetElement()->userData = (void *)buf; + + while (*buf) { + switch (*buf) { + case ':': + number[tonumber] = 0; + tonumber = 0; + if (*number) + chap = atoi(number); + *number = 0; + break; + + case '-': + case ',': // on number new verse + case ';': // on number new chapter + number[tonumber] = 0; + tonumber = 0; + if (*number) { + if (chap >= 0) + verse = atoi(number); + else chap = atoi(number); + } + *number = 0; + book[tobook] = 0; + tobook = 0; + bookno = -1; + if (*book) { + for (loop = strlen(book) - 1; loop+1; loop--) { + if ((isdigit(book[loop])) || (book[loop] == ' ')) { + book[loop] = 0; + continue; + } + else { + if ((SW_toupper(book[loop])=='F')&&(loop)) { + if ((isdigit(book[loop-1])) || (book[loop-1] == ' ') || (SW_toupper(book[loop-1]) == 'F')) { + book[loop] = 0; + continue; + } + } + } + break; + } + + for (loop = strlen(book) - 1; loop+1; loop--) { + if (book[loop] == ' ') { + if (isroman(&book[loop+1])) { + if (verse == -1) { + verse = chap; + chap = from_rom(&book[loop+1]); + book[loop] = 0; + } + } + break; + } + } + + if ((!stricmp(book, "V")) || (!stricmp(book, "VER"))) { // Verse abbrev + if (verse == -1) { + verse = chap; + chap = VerseKey(tmpListKey).Chapter(); + *book = 0; + } + } + + bookno = getBookAbbrev(book); + } + if (((bookno > -1) || (!*book)) && ((*book) || (chap >= 0) || (verse >= 0))) { + char partial = 0; + curkey.Verse(1); + curkey.Chapter(1); + curkey.Book(1); + + if (bookno < 0) { + curkey.Testament(VerseKey(tmpListKey).Testament()); + curkey.Book(VerseKey(tmpListKey).Book()); + } + else { + curkey.Testament(1); + curkey.Book(bookno); + } + + if (((comma)||((verse < 0)&&(bookno < 0)))&&(!lastPartial)) { +// if (comma) { + curkey.Chapter(VerseKey(tmpListKey).Chapter()); + curkey.Verse(chap); // chap because this is the first number captured + } + else { + if (chap >= 0) { + curkey.Chapter(chap); + } + else { + partial++; + curkey.Chapter(1); + } + if (verse >= 0) { + curkey.Verse(verse); + } + else { + partial++; + curkey.Verse(1); + } + } + + if ((*buf == '-') && (expandRange)) { // if this is a dash save lowerBound and wait for upper + VerseKey newElement; + newElement.LowerBound(curkey); + newElement.setPosition(TOP); + tmpListKey << newElement; + tmpListKey.GetElement()->userData = (void *)buf; + } + else { + if (!dash) { // if last separator was not a dash just add + if (expandRange && partial) { + VerseKey newElement; + newElement.LowerBound(curkey); + if (partial > 1) + curkey.setPosition(MAXCHAPTER); + if (partial > 0) + curkey = MAXVERSE; + newElement.UpperBound(curkey); + newElement = TOP; + tmpListKey << newElement; + tmpListKey.GetElement()->userData = (void *)buf; + } + else { + tmpListKey << (const SWKey &)(const SWKey)(const char *)curkey; + tmpListKey.GetElement()->userData = (void *)buf; + } + } + else if (expandRange) { + VerseKey *newElement = SWDYNAMIC_CAST(VerseKey, tmpListKey.GetElement()); + if (newElement) { + if (partial > 1) + curkey = MAXCHAPTER; + if (partial > 0) + curkey = MAXVERSE; + newElement->UpperBound(curkey); + *newElement = TOP; + tmpListKey.GetElement()->userData = (void *)buf; + } + } + } + lastPartial = partial; + } + *book = 0; + chap = -1; + verse = -1; + if (*buf == ',') + comma = 1; + else comma = 0; + if (*buf == '-') + dash = 1; + else dash = 0; + break; + case 10: // ignore these + case 13: + break; + case '.': + if (buf > orig) // ignore (break) if preceeding char is not a digit + if (!isdigit(*(buf-1))) + break; + + default: + if (isdigit(*buf)) { + number[tonumber++] = *buf; + } + else { + switch (*buf) { + case ' ': // ignore these and don't reset number + case 'f': + case 'F': + break; + default: + number[tonumber] = 0; + tonumber = 0; + break; + } + } + if (chap == -1) + book[tobook++] = *buf; + } + buf++; + } + number[tonumber] = 0; + tonumber = 0; + if (*number) { + if (chap >= 0) + verse = atoi(number); + else chap = atoi(number); + } + *number = 0; + book[tobook] = 0; + tobook = 0; + if (*book) { + for (loop = strlen(book) - 1; loop+1; loop--) { + if ((isdigit(book[loop])) || (book[loop] == ' ')) { + book[loop] = 0; + continue; + } + else { + if ((SW_toupper(book[loop])=='F')&&(loop)) { + if ((isdigit(book[loop-1])) || (book[loop-1] == ' ') || (SW_toupper(book[loop-1]) == 'F')) { + book[loop] = 0; + continue; + } + } + } + break; + } + + for (loop = strlen(book) - 1; loop+1; loop--) { + if (book[loop] == ' ') { + if (isroman(&book[loop+1])) { + if (verse == -1) { + verse = chap; + chap = from_rom(&book[loop+1]); + book[loop] = 0; + } + } + break; + } + } + + if ((!stricmp(book, "V")) || (!stricmp(book, "VER"))) { // Verse abbrev. + if (verse == -1) { + verse = chap; + chap = VerseKey(tmpListKey).Chapter(); + *book = 0; + } + } + + bookno = getBookAbbrev(book); + } + if (((bookno > -1) || (!*book)) && ((*book) || (chap >= 0) || (verse >= 0))) { + char partial = 0; + curkey.Verse(1); + curkey.Chapter(1); + curkey.Book(1); + + if (bookno < 0) { + curkey.Testament(VerseKey(tmpListKey).Testament()); + curkey.Book(VerseKey(tmpListKey).Book()); + } + else { + curkey.Testament(1); + curkey.Book(bookno); + } + + if (((comma)||((verse < 0)&&(bookno < 0)))&&(!lastPartial)) { +// if (comma) { + curkey.Chapter(VerseKey(tmpListKey).Chapter()); + curkey.Verse(chap); // chap because this is the first number captured + } + else { + if (chap >= 0) { + curkey.Chapter(chap); + } + else { + partial++; + curkey.Chapter(1); + } + if (verse >= 0) { + curkey.Verse(verse); + } + else { + partial++; + curkey.Verse(1); + } + } + + if ((*buf == '-') && (expandRange)) { // if this is a dash save lowerBound and wait for upper + VerseKey newElement; + newElement.LowerBound(curkey); + newElement = TOP; + tmpListKey << newElement; + tmpListKey.GetElement()->userData = (void *)buf; + } + else { + if (!dash) { // if last separator was not a dash just add + if (expandRange && partial) { + VerseKey newElement; + newElement.LowerBound(curkey); + if (partial > 1) + curkey = MAXCHAPTER; + if (partial > 0) + curkey = MAXVERSE; + newElement.UpperBound(curkey); + newElement = TOP; + tmpListKey << newElement; + tmpListKey.GetElement()->userData = (void *)buf; + } + else { + tmpListKey << (const SWKey &)(const SWKey)(const char *)curkey; + tmpListKey.GetElement()->userData = (void *)buf; + } + } + else if (expandRange) { + VerseKey *newElement = SWDYNAMIC_CAST(VerseKey, tmpListKey.GetElement()); + if (newElement) { + if (partial > 1) + curkey = MAXCHAPTER; + if (partial > 0) + curkey = MAXVERSE; + newElement->UpperBound(curkey); + *newElement = TOP; + tmpListKey.GetElement()->userData = (void *)buf; + } + } + } + } + *book = 0; + tmpListKey = TOP; + tmpListKey.Remove(); // remove defaultKey + internalListKey = tmpListKey; + internalListKey = TOP; // Align internalListKey to first element before passing back; + + return internalListKey; +} + + +/****************************************************************************** + * VerseKey::LowerBound - sets / gets the lower boundary for this key + */ + +VerseKey &VerseKey::LowerBound(const char *lb) +{ + if (!lowerBound) + initBounds(); + + (*lowerBound) = lb; + lowerBound->Normalize(); + + return (*lowerBound); +} + + +/****************************************************************************** + * VerseKey::UpperBound - sets / gets the upper boundary for this key + */ + +VerseKey &VerseKey::UpperBound(const char *ub) +{ + if (!upperBound) + initBounds(); + +// need to set upperbound parsing to resolve to max verse/chap if not specified + (*upperBound) = ub; + if (*upperBound < *lowerBound) + *upperBound = *lowerBound; + upperBound->Normalize(); + +// until we have a proper method to resolve max verse/chap use this kludge + int len = strlen(ub); + bool alpha = false; + bool versespec = false; + bool chapspec = false; + for (int i = 0; i < len; i++) { + if (isalpha(ub[i])) + alpha = true; + if (ub[i] == ':') // if we have a : we assume verse spec + versespec = true; + if ((isdigit(ub[i])) && (alpha)) // if digit after alpha assume chap spec + chapspec = true; + } + if (!chapspec) + *upperBound = MAXCHAPTER; + if (!versespec) + *upperBound = MAXVERSE; + + +// -- end kludge + + return (*upperBound); +} + + +/****************************************************************************** + * VerseKey::LowerBound - sets / gets the lower boundary for this key + */ + +VerseKey &VerseKey::LowerBound() const +{ + if (!lowerBound) + initBounds(); + + return (*lowerBound); +} + + +/****************************************************************************** + * VerseKey::UpperBound - sets / gets the upper boundary for this key + */ + +VerseKey &VerseKey::UpperBound() const +{ + if (!upperBound) + initBounds(); + + return (*upperBound); +} + + +/****************************************************************************** + * VerseKey::ClearBounds - clears bounds for this VerseKey + */ + +void VerseKey::ClearBounds() +{ + initBounds(); +} + + +void VerseKey::initBounds() const +{ + if (!upperBound) { + upperBound = new VerseKey(); + upperBound->AutoNormalize(0); + upperBound->Headings(1); + } + if (!lowerBound) { + lowerBound = new VerseKey(); + lowerBound->AutoNormalize(0); + lowerBound->Headings(1); + } + + lowerBound->Testament(0); + lowerBound->Book(0); + lowerBound->Chapter(0); + lowerBound->Verse(0); + + upperBound->Testament(2); + upperBound->Book(BMAX[1]); + upperBound->Chapter(books[1][BMAX[1]-1].chapmax); + upperBound->Verse(books[1][BMAX[1]-1].versemax[upperBound->Chapter()-1]); +} + + +/****************************************************************************** + * VerseKey::copyFrom - Equates this VerseKey to another VerseKey + */ + +void VerseKey::copyFrom(const VerseKey &ikey) { + SWKey::copyFrom(ikey); + + parse(); +} + + +/****************************************************************************** + * VerseKey::copyFrom - Equates this VerseKey to another SWKey + */ + +void VerseKey::copyFrom(const SWKey &ikey) { + SWKey::copyFrom(ikey); + + parse(); +} + + +/****************************************************************************** + * VerseKey::getText - refreshes keytext before returning if cast to + * a (char *) is requested + */ + +const char *VerseKey::getText() const { + freshtext(); + return keytext; +} + + +const char *VerseKey::getShortText() const { + static char *stext = 0; + char buf[2047]; + freshtext(); + if (book < 1) { + if (testament < 1) + sprintf(buf, "[ Module Heading ]"); + else sprintf(buf, "[ Testament %d Heading ]", (int)testament); + } + else { + sprintf(buf, "%s %d:%d", books[testament-1][book-1].prefAbbrev, chapter, verse); + } + stdstr(&stext, buf); + return stext; +} + + +const char *VerseKey::getBookName() const { + return books[testament-1][book-1].name; +} + + +const char *VerseKey::getBookAbbrev() const { + return books[testament-1][book-1].prefAbbrev; +} +/****************************************************************************** + * VerseKey::setPosition(SW_POSITION) - Positions this key + * + * ENT: p - position + * + * RET: *this + */ + +void VerseKey::setPosition(SW_POSITION p) { + switch (p) { + case POS_TOP: + testament = LowerBound().Testament(); + book = LowerBound().Book(); + chapter = LowerBound().Chapter(); + verse = LowerBound().Verse(); + break; + case POS_BOTTOM: + testament = UpperBound().Testament(); + book = UpperBound().Book(); + chapter = UpperBound().Chapter(); + verse = UpperBound().Verse(); + break; + case POS_MAXVERSE: + Normalize(); + verse = books[testament-1][book-1].versemax[chapter-1]; + break; + case POS_MAXCHAPTER: + verse = 1; + Normalize(); + chapter = books[testament-1][book-1].chapmax; + break; + } + Normalize(1); + Error(); // clear error from normalize +} + + +/****************************************************************************** + * VerseKey::increment - Increments key a number of verses + * + * ENT: step - Number of verses to jump forward + * + * RET: *this + */ + +void VerseKey::increment(int step) { + char ierror = 0; + Index(Index() + step); + while ((!verse) && (!headings) && (!ierror)) { + Index(Index() + 1); + ierror = Error(); + } + + error = (ierror) ? ierror : error; +} + + +/****************************************************************************** + * VerseKey::decrement - Decrements key a number of verses + * + * ENT: step - Number of verses to jump backward + * + * RET: *this + */ + +void VerseKey::decrement(int step) { + char ierror = 0; + + Index(Index() - step); + while ((!verse) && (!headings) && (!ierror)) { + Index(Index() - 1); + ierror = Error(); + } + if ((ierror) && (!headings)) + (*this)++; + + error = (ierror) ? ierror : error; +} + + +/****************************************************************************** + * VerseKey::Normalize - checks limits and normalizes if necessary (e.g. + * Matthew 29:47 = Mark 2:2). If last verse is + * exceeded, key is set to last Book CH:VS + * RET: *this + */ + +void VerseKey::Normalize(char autocheck) +{ + error = 0; + + if ((autocheck) && (!autonorm)) // only normalize if we were explicitely called or if autonorm is turned on + return; + + if ((headings) && (!verse)) // this is cheeze and temporary until deciding what actions should be taken. + return; // so headings should only be turned on when positioning with Index() or incrementors + + while ((testament < 3) && (testament > 0)) { + + if (book > BMAX[testament-1]) { + book -= BMAX[testament-1]; + testament++; + continue; + } + + if (book < 1) { + if (--testament > 0) { + book += BMAX[testament-1]; + } + continue; + } + + if (chapter > books[testament-1][book-1].chapmax) { + chapter -= books[testament-1][book-1].chapmax; + book++; + continue; + } + + if (chapter < 1) { + if (--book > 0) { + chapter += books[testament-1][book-1].chapmax; + } + else { + if (testament > 1) { + chapter += books[0][BMAX[0]-1].chapmax; + } + } + continue; + } + + if (verse > books[testament-1][book-1].versemax[chapter-1]) { // -1 because e.g chapter 1 of Matthew is books[1][0].versemax[0] + verse -= books[testament-1][book-1].versemax[chapter++ - 1]; + continue; + } + + if (verse < 1) { + if (--chapter > 0) { + verse += books[testament-1][book-1].versemax[chapter-1]; + } + else { + if (book > 1) { + verse += books[testament-1][book-2].versemax[books[testament-1][book-2].chapmax-1]; + } + else { + if (testament > 1) { + verse += books[0][BMAX[0]-1].versemax[books[0][BMAX[0]-1].chapmax-1]; + } + } + } + continue; + } + + break; // If we've made it this far (all failure checks continue) we're ok + } + + if (testament > 2) { + testament = 2; + book = BMAX[testament-1]; + chapter = books[testament-1][book-1].chapmax; + verse = books[testament-1][book-1].versemax[chapter-1]; + error = KEYERR_OUTOFBOUNDS; + } + + if (testament < 1) { + error = ((!headings) || (testament < 0) || (book < 0)) ? KEYERR_OUTOFBOUNDS : 0; + testament = ((headings) ? 0 : 1); + book = ((headings) ? 0 : 1); + chapter = ((headings) ? 0 : 1); + verse = ((headings) ? 0 : 1); + } + if (_compare(UpperBound()) > 0) { + *this = UpperBound(); + error = KEYERR_OUTOFBOUNDS; + } + if (_compare(LowerBound()) < 0) { + *this = LowerBound(); + error = KEYERR_OUTOFBOUNDS; + } +} + + +/****************************************************************************** + * VerseKey::Testament - Gets testament + * + * RET: value of testament + */ + +char VerseKey::Testament() const +{ + return testament; +} + + +/****************************************************************************** + * VerseKey::Book - Gets book + * + * RET: value of book + */ + +char VerseKey::Book() const +{ + return book; +} + + +/****************************************************************************** + * VerseKey::Chapter - Gets chapter + * + * RET: value of chapter + */ + +int VerseKey::Chapter() const +{ + return chapter; +} + + +/****************************************************************************** + * VerseKey::Verse - Gets verse + * + * RET: value of verse + */ + +int VerseKey::Verse() const +{ + return verse; +} + + +/****************************************************************************** + * VerseKey::Testament - Sets/gets testament + * + * ENT: itestament - value which to set testament + * [MAXPOS(char)] - only get + * + * RET: if unchanged -> value of testament + * if changed -> previous value of testament + */ + +char VerseKey::Testament(char itestament) +{ + char retval = testament; + + if (itestament != MAXPOS(char)) { + testament = itestament; + Normalize(1); + } + return retval; +} + + +/****************************************************************************** + * VerseKey::Book - Sets/gets book + * + * ENT: ibook - value which to set book + * [MAXPOS(char)] - only get + * + * RET: if unchanged -> value of book + * if changed -> previous value of book + */ + +char VerseKey::Book(char ibook) +{ + char retval = book; + + Chapter(1); + book = ibook; + Normalize(1); + + return retval; +} + + +/****************************************************************************** + * VerseKey::Chapter - Sets/gets chapter + * + * ENT: ichapter - value which to set chapter + * [MAXPOS(int)] - only get + * + * RET: if unchanged -> value of chapter + * if changed -> previous value of chapter + */ + +int VerseKey::Chapter(int ichapter) +{ + int retval = chapter; + + Verse(1); + chapter = ichapter; + Normalize(1); + + return retval; +} + + +/****************************************************************************** + * VerseKey::Verse - Sets/gets verse + * + * ENT: iverse - value which to set verse + * [MAXPOS(int)] - only get + * + * RET: if unchanged -> value of verse + * if changed -> previous value of verse + */ + +int VerseKey::Verse(int iverse) +{ + int retval = verse; + + verse = iverse; + Normalize(1); + + return retval; +} + + +/****************************************************************************** + * VerseKey::AutoNormalize - Sets/gets flag that tells VerseKey to auto- + * matically normalize itself when modified + * + * ENT: iautonorm - value which to set autonorm + * [MAXPOS(char)] - only get + * + * RET: if unchanged -> value of autonorm + * if changed -> previous value of autonorm + */ + +char VerseKey::AutoNormalize(char iautonorm) +{ + char retval = autonorm; + + if (iautonorm != MAXPOS(char)) { + autonorm = iautonorm; + Normalize(1); + } + return retval; +} + + +/****************************************************************************** + * VerseKey::Headings - Sets/gets flag that tells VerseKey to include + * chap/book/testmnt/module headings + * + * ENT: iheadings - value which to set headings + * [MAXPOS(char)] - only get + * + * RET: if unchanged -> value of headings + * if changed -> previous value of headings + */ + +char VerseKey::Headings(char iheadings) +{ + char retval = headings; + + if (iheadings != MAXPOS(char)) { + headings = iheadings; + Normalize(1); + } + return retval; +} + + +/****************************************************************************** + * VerseKey::findindex - binary search to find the index closest, but less + * than the given value. + * + * ENT: array - long * to array to search + * size - number of elements in the array + * value - value to find + * + * RET: the index into the array that is less than but closest to value + */ + +int VerseKey::findindex(long *array, int size, long value) +{ + int lbound, ubound, tval; + + lbound = 0; + ubound = size - 1; + while ((ubound - lbound) > 1) { + tval = lbound + (ubound-lbound)/2; + if (array[tval] <= value) + lbound = tval; + else ubound = tval; + } + return (array[ubound] <= value) ? ubound : lbound; +} + + +/****************************************************************************** + * VerseKey::Index - Gets index based upon current verse + * + * RET: offset + */ + +long VerseKey::Index() const +{ + long offset; + + if (!testament) { // if we want module heading + offset = 0; + verse = 0; + } + else { + if (!book) + chapter = 0; + if (!chapter) + verse = 0; + + offset = offsets[testament-1][0][book]; + offset = offsets[testament-1][1][(int)offset + chapter]; + if (!(offset|verse)) // if we have a testament but nothing else. + offset = 1; + } + return (offset + verse); +} + + +/****************************************************************************** + * VerseKey::Index - Gets index based upon current verse + * + * RET: offset + */ + +long VerseKey::NewIndex() const +{ + static long otMaxIndex = 32300 - 8245; // total positions - new testament positions +// static long otMaxIndex = offsets[0][1][(int)offsets[0][0][BMAX[0]] + books[0][BMAX[0]].chapmax]; + return ((testament-1) * otMaxIndex) + Index(); +} + + +/****************************************************************************** + * VerseKey::Index - Sets index based upon current verse + * + * ENT: iindex - value to set index to + * + * RET: offset + */ + +long VerseKey::Index(long iindex) +{ + long offset; + +// This is the dirty stuff -------------------------------------------- + + if (!testament) + testament = 1; + + if (iindex < 1) { // if (-) or module heading + if (testament < 2) { + if (iindex < 0) { + testament = 0; // previously we changed 0 -> 1 + error = KEYERR_OUTOFBOUNDS; + } + else testament = 0; // we want module heading + } + else { + testament--; + iindex = (offsets[testament-1][1][offsize[testament-1][1]-1] + books[testament-1][BMAX[testament-1]-1].versemax[books[testament-1][BMAX[testament-1]-1].chapmax-1]) + iindex; // What a doozy! ((offset of last chapter + number of verses in the last chapter) + iindex) + } + } + +// -------------------------------------------------------------------- + + + if (testament) { + if ((!error) && (iindex)) { + offset = findindex(offsets[testament-1][1], offsize[testament-1][1], iindex); + verse = iindex - offsets[testament-1][1][offset]; + book = findindex(offsets[testament-1][0], offsize[testament-1][0], offset); + chapter = offset - offsets[testament-1][0][VerseKey::book]; + verse = (chapter) ? verse : 0; // funny check. if we are index=1 (testmt header) all gets set to 0 exept verse. Don't know why. Fix if you figure out. Think its in the offsets table. + if (verse) { // only check if -1 won't give negative + if (verse > books[testament-1][book-1].versemax[chapter-1]) { + if (testament > 1) { + verse = books[testament-1][book-1].versemax[chapter-1]; + error = KEYERR_OUTOFBOUNDS; + } + else { + testament++; + Index(verse - books[testament-2][book-1].versemax[chapter-1]); + } + } + } + } + } + if (_compare(UpperBound()) > 0) { + *this = UpperBound(); + error = KEYERR_OUTOFBOUNDS; + } + if (_compare(LowerBound()) < 0) { + *this = LowerBound(); + error = KEYERR_OUTOFBOUNDS; + } + return Index(); +} + + +/****************************************************************************** + * VerseKey::compare - Compares another SWKey object + * + * ENT: ikey - key to compare with this one + * + * RET: >0 if this versekey is greater than compare versekey + * <0 < + * 0 = + */ + +int VerseKey::compare(const SWKey &ikey) +{ + VerseKey ivkey = (const char *)ikey; + return _compare(ivkey); +} + + +/****************************************************************************** + * VerseKey::_compare - Compares another VerseKey object + * + * ENT: ikey - key to compare with this one + * + * RET: >0 if this versekey is greater than compare versekey + * <0 < + * 0 = + */ + +int VerseKey::_compare(const VerseKey &ivkey) +{ + long keyval1 = 0; + long keyval2 = 0; + + keyval1 += Testament() * 1000000000; + keyval2 += ivkey.Testament() * 1000000000; + keyval1 += Book() * 1000000; + keyval2 += ivkey.Book() * 1000000; + keyval1 += Chapter() * 1000; + keyval2 += ivkey.Chapter() * 1000; + keyval1 += Verse(); + keyval2 += ivkey.Verse(); + keyval1 -= keyval2; + keyval1 = (keyval1) ? ((keyval1 > 0) ? 1 : -1) /*keyval1/labs(keyval1)*/:0; // -1 | 0 | 1 + return keyval1; +} + + +const char *VerseKey::getOSISRef() const { + static char buf[5][254]; + static char loop = 0; + + if (loop > 4) + loop = 0; + + static char *osisotbooks[] = { + "Gen","Exod","Lev","Num","Deut","Josh","Judg","Ruth","_1Sam","_2Sam", + "_1Kgs","_2Kgs","_1Chr","_2Chr","Ezra","Neh","Esth","Job","Ps", + "Prov", // added this. Was not in OSIS spec + "Eccl", + "Song","Isa","Jer","Lam","Ezek","Dan","Hos","Joel","Amos","Obad", + "Jonah","Mic","Nah","Hab","Zeph","Hag","Zech","Mal","Bar","PrAzar", + "Bel","Sus","_1Esd","_2Esd","AddEsth","EpJer","Jdt","_1Macc","_2Macc","_3Macc", + "_4Macc","PrMan","Ps151","Sir","Tob","Wis"}; + static char *osisntbooks[] = { + "Matt","Mark","Luke","John","Acts","Rom","_1Cor","_2Cor","Gal","Eph", + "Phil","Col","_1Thess","_2Thess","_1Tim","_2Tim","Titus","Phlm","Heb","Jas", + "_1Pet","_2Pet","_1John","_2John","_3John","Jude","Rev"}; + static char **osisbooks[] = { osisotbooks, osisntbooks }; + if (Verse()) + sprintf(buf[loop], "%s.%d.%d", osisbooks[Testament()-1][Book()-1], (int)Chapter(), (int)Verse()); + else if (Chapter()) + sprintf(buf[loop], "%s.%d", osisbooks[Testament()-1][Book()-1], (int)Chapter()); + else if (Book()) + sprintf(buf[loop], "%s", osisbooks[Testament()-1][Book()-1]); + else sprintf(buf[loop], ""); + return buf[loop++]; +} diff --git a/src/mgr/Makefile.am b/src/mgr/Makefile.am new file mode 100644 index 0000000..c648032 --- /dev/null +++ b/src/mgr/Makefile.am @@ -0,0 +1,26 @@ +mgrdir = $(top_srcdir)/src/mgr + +if CONFDEF +globdef = -DGLOBCONFPATH=\"${globalconfdir}/sword.conf\" +else +globdef = +endif + +if ICU +icudatadir = -DICUDATA=\"${pkglibdir}\" +else +icudatadir = +endif + + +DEFS += $(globdef) $(icudatadir) + +libsword_la_SOURCES += $(mgrdir)/swconfig.cpp +libsword_la_SOURCES += $(mgrdir)/swmgr.cpp +libsword_la_SOURCES += $(mgrdir)/swfiltermgr.cpp +libsword_la_SOURCES += $(mgrdir)/encfiltmgr.cpp +libsword_la_SOURCES += $(mgrdir)/markupfiltmgr.cpp +libsword_la_SOURCES += $(mgrdir)/filemgr.cpp +libsword_la_SOURCES += $(mgrdir)/swlocale.cpp +libsword_la_SOURCES += $(mgrdir)/localemgr.cpp +libsword_la_SOURCES += $(mgrdir)/swcacher.cpp diff --git a/src/mgr/encfiltmgr.cpp b/src/mgr/encfiltmgr.cpp new file mode 100644 index 0000000..ab55de9 --- /dev/null +++ b/src/mgr/encfiltmgr.cpp @@ -0,0 +1,148 @@ +/****************************************************************************** + * swencodingmgr.cpp - implementaion of class EncodingFilterMgr, subclass of + * used to transcode all module text to a requested + * encoding. + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <encfiltmgr.h> + +#include <scsuutf8.h> +#include <latin1utf8.h> + +#include <unicodertf.h> +#include <utf8latin1.h> +#include <utf8utf16.h> +#include <utf8html.h> + +#include <swmgr.h> + +/****************************************************************************** + * EncodingFilterMgr Constructor - initializes instance of EncodingFilterMgr + * + * ENT: + * enc - Encoding format to emit + */ + +EncodingFilterMgr::EncodingFilterMgr (char enc) + : SWFilterMgr() { + + scsuutf8 = new SCSUUTF8(); + latin1utf8 = new Latin1UTF8(); + + encoding = enc; + + switch (encoding) { + case ENC_LATIN1: + targetenc = new UTF8Latin1(); + break; + case ENC_UTF16: + targetenc = new UTF8UTF16(); + break; + case ENC_RTF: + targetenc = new UnicodeRTF(); + break; + case ENC_HTML: + targetenc = new UTF8HTML(); + break; + default: // i.e. case ENC_UTF8 + targetenc = NULL; + } +} + +/****************************************************************************** + * EncodingFilterMgr Destructor - Cleans up instance of EncodingFilterMgr + */ +EncodingFilterMgr::~EncodingFilterMgr() { + if (scsuutf8) + delete scsuutf8; + if (latin1utf8) + delete latin1utf8; + if (targetenc) + delete targetenc; +} + +void EncodingFilterMgr::AddRawFilters(SWModule *module, ConfigEntMap §ion) { + + ConfigEntMap::iterator entry; + + string encoding = ((entry = section.find("Encoding")) != section.end()) ? (*entry).second : (string)""; + if (encoding.empty() || !stricmp(encoding.c_str(), "Latin-1")) { + module->AddRawFilter(latin1utf8); + } + else if (!stricmp(encoding.c_str(), "SCSU")) { + module->AddRawFilter(scsuutf8); + } +} + +void EncodingFilterMgr::AddEncodingFilters(SWModule *module, ConfigEntMap §ion) { + if (targetenc) + module->AddEncodingFilter(targetenc); +} + +/****************************************************************************** + * EncodingFilterMgr::Encoding - sets/gets encoding + * + * ENT: enc - new encoding or 0 to simply get the current encoding + * + * RET: encoding + */ +char EncodingFilterMgr::Encoding(char enc) { + if (enc && enc != encoding) { + encoding = enc; + SWFilter * oldfilter = targetenc; + + switch (encoding) { + case ENC_LATIN1: + targetenc = new UTF8Latin1(); + break; + case ENC_UTF16: + targetenc = new UTF8UTF16(); + break; + case ENC_RTF: + targetenc = new UnicodeRTF(); + break; + case ENC_HTML: + targetenc = new UTF8HTML(); + break; + default: // i.e. case ENC_UTF8 + targetenc = NULL; + } + + ModMap::const_iterator module; + + if (oldfilter != targetenc) { + if (oldfilter) { + if (!targetenc) { + for (module = getParentMgr()->Modules.begin(); module != getParentMgr()->Modules.end(); module++) + module->second->RemoveRenderFilter(oldfilter); + } + else { + for (module = getParentMgr()->Modules.begin(); module != getParentMgr()->Modules.end(); module++) + module->second->ReplaceRenderFilter(oldfilter, targetenc); + } + delete oldfilter; + } + else if (targetenc) { + for (module = getParentMgr()->Modules.begin(); module != getParentMgr()->Modules.end(); module++) + module->second->AddRenderFilter(targetenc); + } + } + + } + return encoding; +} diff --git a/src/mgr/filemgr.cpp b/src/mgr/filemgr.cpp new file mode 100644 index 0000000..0b31576 --- /dev/null +++ b/src/mgr/filemgr.cpp @@ -0,0 +1,266 @@ +/****************************************************************************** + * filemgr.cpp - implementation of class FileMgr used for pooling file + * handles + * + * $Id: filemgr.cpp,v 1.22 2002/07/31 20:26:38 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <filemgr.h> +#include <utilstr.h> + +#include <dirent.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +// ---------------- statics ----------------- +FileMgr FileMgr::systemFileMgr; + +// --------------- end statics -------------- + + +FileDesc::FileDesc(FileMgr *parent, char *path, int mode, int perms, bool tryDowngrade) { + this->parent = parent; + this->path = 0; + stdstr(&this->path, path); + this->mode = mode; + this->perms = perms; + this->tryDowngrade = tryDowngrade; + offset = 0; + fd = -77; +} + + +FileDesc::~FileDesc() { + if (fd > 0) + close(fd); + + if (path) + delete [] path; +} + + +int FileDesc::getFd() { + if (fd == -77) + fd = parent->sysOpen(this); + return fd; +} + + +FileMgr::FileMgr(int maxFiles) { + this->maxFiles = maxFiles; // must be at least 2 + files = 0; +} + + +FileMgr::~FileMgr() { + FileDesc *tmp; + + while(files) { + tmp = files->next; + delete files; + files = tmp; + } +} + + +FileDesc *FileMgr::open(char *path, int mode, bool tryDowngrade) { + return open(path, mode, S_IREAD | S_IWRITE, tryDowngrade); +} + +FileDesc *FileMgr::open(char *path, int mode, int perms, bool tryDowngrade) { + FileDesc **tmp, *tmp2; + + for (tmp = &files; *tmp; tmp = &((*tmp)->next)) { + if ((*tmp)->fd < 0) // insert as first non-system_open file + break; + } + + tmp2 = new FileDesc(this, path, mode, perms, tryDowngrade); + tmp2->next = *tmp; + *tmp = tmp2; + + return tmp2; +} + + +void FileMgr::close(FileDesc *file) { + FileDesc **loop; + + for (loop = &files; *loop; loop = &((*loop)->next)) { + if (*loop == file) { + *loop = (*loop)->next; + delete file; + break; + } + } +} + + +// to truncate a file at its current position +// leaving byte at current possition intact +// deleting everything afterward. +signed char FileMgr::trunc(FileDesc *file) { + + static const char *writeTest = "x"; + long size = lseek(file->getFd(), 1, SEEK_CUR); + if (size == 1) // was empty + size = 0; + char nibble [ 32767 ]; + bool writable = write(file->getFd(), writeTest, 1); + int bytes = 0; + + if (writable) { + // get tmpfilename + char *buf = new char [ strlen(file->path) + 10 ]; + int i; + for (i = 0; i < 9999; i++) { + sprintf(buf, "%stmp%.4d", file->path, i); + if (!existsFile(buf)) + break; + } + if (i == 9999) + return -2; + + int fd = ::open(buf, O_CREAT|O_RDWR, S_IREAD|S_IWRITE); + if (fd < 0) + return -3; + + lseek(file->getFd(), 0, SEEK_SET); + while (size > 0) { + bytes = read(file->getFd(), nibble, 32767); + write(fd, nibble, (bytes < size)?bytes:size); + size -= bytes; + } + // zero out the file + ::close(file->fd); + file->fd = ::open(file->path, O_TRUNC, S_IREAD|S_IWRITE); + ::close(file->fd); + file->fd = -77; // force file open by filemgr + // copy tmp file back (dumb, but must preserve file permissions) + lseek(fd, 0, SEEK_SET); + do { + bytes = read(fd, nibble, 32767); + write(file->getFd(), nibble, bytes); + } while (bytes == 32767); + + ::close(fd); + ::close(file->fd); + unlink(buf); // remove our tmp file + file->fd = -77; // causes file to be swapped out forcing open on next call to getFd() + } + else { // put offset back and return failure + lseek(file->getFd(), -1, SEEK_CUR); + return -1; + } + return 0; +} + + +int FileMgr::sysOpen(FileDesc *file) { + FileDesc **loop; + int openCount = 1; // because we are presently opening 1 file, and we need to be sure to close files to accomodate, if necessary + + for (loop = &files; *loop; loop = &((*loop)->next)) { + + if ((*loop)->fd > 0) { + if (++openCount > maxFiles) { + (*loop)->offset = lseek((*loop)->fd, 0, SEEK_CUR); + ::close((*loop)->fd); + (*loop)->fd = -77; + } + } + + if (*loop == file) { + if (*loop != files) { + *loop = (*loop)->next; + file->next = files; + files = file; + } + if ((!access(file->path, 04)) || ((file->mode & O_CREAT) == O_CREAT)) { // check for at least file exists / read access before we try to open + char tries = (((file->mode & O_RDWR) == O_RDWR) && (file->tryDowngrade)) ? 2 : 1; // try read/write if possible + for (int i = 0; i < tries; i++) { + if (i > 0) { + file->mode = (file->mode & ~O_RDWR); // remove write access + file->mode = (file->mode | O_RDONLY);// add read access + } + file->fd = ::open(file->path, file->mode, file->perms); + + if (file->fd >= 0) + break; + } + + if (file->fd >= 0) + lseek(file->fd, file->offset, SEEK_SET); + } + else file->fd = -1; + if (!*loop) + break; + } + } + return file->fd; +} + + +signed char FileMgr::existsFile(const char *ipath, const char *ifileName) +{ + int len = strlen(ipath) + ((ifileName)?strlen(ifileName):0) + 3; + char *ch; + char *path = new char [ len ]; + strcpy(path, ipath); + + if ((path[strlen(path)-1] == '\\') || (path[strlen(path)-1] == '/')) + path[strlen(path)-1] = 0; + + if (ifileName) { + ch = path + strlen(path); + sprintf(ch, "/%s", ifileName); + } + signed char retVal = !access(path, 04); + delete [] path; + return retVal; +} + + +signed char FileMgr::existsDir(const char *ipath, const char *idirName) +{ + char *ch; + int len = strlen(ipath) + ((idirName)?strlen(idirName):0) + 1; + if (idirName) + len += strlen(idirName); + char *path = new char [ len ]; + strcpy(path, ipath); + + if ((path[strlen(path)-1] == '\\') || (path[strlen(path)-1] == '/')) + path[strlen(path)-1] = 0; + + if (idirName) { + ch = path + strlen(path); + sprintf(ch, "/%s", idirName); + } + signed char retVal = !access(path, 04); + delete [] path; + return retVal; +} diff --git a/src/mgr/localemgr.cpp b/src/mgr/localemgr.cpp new file mode 100644 index 0000000..bc12f4c --- /dev/null +++ b/src/mgr/localemgr.cpp @@ -0,0 +1,184 @@ +/****************************************************************************** + * localemgr.cpp - implementation of class LocaleMgr used to interact with + * registered locales for a sword installation + * + * $Id: localemgr.cpp,v 1.12 2002/06/19 09:24:44 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#include <unixstr.h> +#endif +#include <sys/stat.h> +#include <dirent.h> + +#include <swmgr.h> +#include <utilfuns.h> + +#include <localemgr.h> +#include <filemgr.h> + + +LocaleMgr LocaleMgr::systemLocaleMgr; + + +LocaleMgr::LocaleMgr(const char *iConfigPath) { + char *prefixPath = 0; + char *configPath = 0; + char configType = 0; + string path; + + defaultLocaleName = 0; + + char *lang = getenv ("LANG"); + if (lang) { + if (strlen(lang) > 0) + setDefaultLocaleName(lang); + else setDefaultLocaleName("en_us"); + } + else setDefaultLocaleName("en_us"); + + if (!iConfigPath) + SWMgr::findConfig(&configType, &prefixPath, &configPath); + else configPath = (char *)iConfigPath; + + if (prefixPath) { + switch (configType) { + case 2: + int i; + for (i = strlen(configPath)-1; ((i) && (configPath[i] != '/') && (configPath[i] != '\\')); i--); + configPath[i] = 0; + path = configPath; + path += "/"; + break; + default: + path = prefixPath; + if ((prefixPath[strlen(prefixPath)-1] != '\\') && (prefixPath[strlen(prefixPath)-1] != '/')) + path += "/"; + + break; + } + if (FileMgr::existsDir(path.c_str(), "locales.d")) { + path += "locales.d"; + loadConfigDir(path.c_str()); + } + } + + if (prefixPath) + delete [] prefixPath; + + if (configPath) + delete [] configPath; +} + + +LocaleMgr::~LocaleMgr() { + if (defaultLocaleName) + delete [] defaultLocaleName; + deleteLocales(); +} + + +void LocaleMgr::loadConfigDir(const char *ipath) { + DIR *dir; + struct dirent *ent; + string newmodfile; + LocaleMap::iterator it; + + if ((dir = opendir(ipath))) { + rewinddir(dir); + while ((ent = readdir(dir))) { + if ((strcmp(ent->d_name, ".")) && (strcmp(ent->d_name, ".."))) { + newmodfile = ipath; + if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) + newmodfile += "/"; + newmodfile += ent->d_name; + SWLocale *locale = new SWLocale(newmodfile.c_str()); + if (locale->getName()) { + it = locales.find(locale->getName()); + if (it != locales.end()) { + *((*it).second) += *locale; + delete locale; + } + else locales.insert(LocaleMap::value_type(locale->getName(), locale)); + } + else delete locale; + } + } + closedir(dir); + } +} + + +void LocaleMgr::deleteLocales() { + + LocaleMap::iterator it; + + for (it = locales.begin(); it != locales.end(); it++) + delete (*it).second; + + locales.erase(locales.begin(), locales.end()); +} + + +SWLocale *LocaleMgr::getLocale(const char *name) { + LocaleMap::iterator it; + + it = locales.find(name); + if (it != locales.end()) + return (*it).second; + + return 0; +} + + +list <string> LocaleMgr::getAvailableLocales() { + list <string> retVal; + for (LocaleMap::iterator it = locales.begin(); it != locales.end(); it++) + retVal.push_back((*it).second->getName()); + + return retVal; +} + + +const char *LocaleMgr::translate(const char *text, const char *localeName) { + SWLocale *target; + if (!localeName) { + localeName = getDefaultLocaleName(); + } + target = getLocale(localeName); + if (target) + return target->translate(text); + return text; +} + + +const char *LocaleMgr::getDefaultLocaleName() { + return defaultLocaleName; +} + + +void LocaleMgr::setDefaultLocaleName(const char *name) { + stdstr(&defaultLocaleName, name); +} diff --git a/src/mgr/markupfiltmgr.cpp b/src/mgr/markupfiltmgr.cpp new file mode 100644 index 0000000..8dc68ea --- /dev/null +++ b/src/mgr/markupfiltmgr.cpp @@ -0,0 +1,236 @@ +/****************************************************************************** + * swmarkupmgr.cpp - implementaion of class MarkupFilterMgr, subclass of + * used to transcode all module text to a requested + * markup. + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <thmlplain.h> +#include <gbfplain.h> +#include <thmlgbf.h> +#include <gbfthml.h> +#include <thmlhtml.h> +#include <gbfhtml.h> +#include <plainhtml.h> +#include <thmlhtmlhref.h> +#include <gbfhtmlhref.h> +#include <thmlrtf.h> +#include <gbfrtf.h> +#include <gbfosis.h> +#include <thmlosis.h> + +#include <markupfiltmgr.h> + +#include <swmgr.h> + + +/****************************************************************************** + * MarkupFilterMgr Constructor - initializes instance of MarkupFilterMgr + * + * ENT: + * enc - Encoding format to emit + * mark - Markup format to emit + */ + +MarkupFilterMgr::MarkupFilterMgr (char mark, char enc) + : EncodingFilterMgr(enc) { + + markup = mark; + + CreateFilters(markup); +} + + +/****************************************************************************** + * MarkupFilterMgr Destructor - Cleans up instance of MarkupFilterMgr + */ + +MarkupFilterMgr::~MarkupFilterMgr() { + if (fromthml) + delete (fromthml); + if (fromgbf) + delete (fromgbf); + if (fromplain) + delete (fromplain); + if (fromosis) + delete (fromosis); +} + +/****************************************************************************** + * MarkupFilterMgr::Markup - sets/gets markup + * + * ENT: mark - new encoding or 0 to simply get the current markup + * + * RET: markup + */ +char MarkupFilterMgr::Markup(char mark) { + if (mark && mark != markup) { + markup = mark; + ModMap::const_iterator module; + + SWFilter * oldplain = fromplain; + SWFilter * oldthml = fromthml; + SWFilter * oldgbf = fromgbf; + SWFilter * oldosis = fromosis; + + CreateFilters(markup); + + for (module = getParentMgr()->Modules.begin(); module != getParentMgr()->Modules.end(); module++) + switch (module->second->Markup()) { + case FMT_THML: + if (oldthml != fromthml) { + if (oldthml) { + if (!fromthml) { + module->second->RemoveRenderFilter(oldthml); + } + else { + module->second->ReplaceRenderFilter(oldthml, fromthml); + } + } + else if (fromthml) { + module->second->AddRenderFilter(fromthml); + } + } + break; + case FMT_GBF: + if (oldgbf != fromgbf) { + if (oldgbf) { + if (!fromgbf) { + module->second->RemoveRenderFilter(oldgbf); + } + else { + module->second->ReplaceRenderFilter(oldgbf, fromgbf); + } + } + else if (fromgbf) { + module->second->AddRenderFilter(fromgbf); + } + break; + } + case FMT_PLAIN: + if (oldplain != fromplain) { + if (oldplain) { + if (!fromplain) { + module->second->RemoveRenderFilter(oldplain); + } + else { + module->second->ReplaceRenderFilter(oldplain, fromplain); + } + } + else if (fromplain) { + module->second->AddRenderFilter(fromplain); + } + break; + } + case FMT_OSIS: + if (oldosis != fromosis) { + if (oldosis) { + if (!fromosis) { + module->second->RemoveRenderFilter(oldosis); + } + else { + module->second->ReplaceRenderFilter(oldosis, fromosis); + } + } + else if (fromosis) { + module->second->AddRenderFilter(fromosis); + } + break; + } + } + + if (oldthml) + delete oldthml; + if (oldgbf) + delete oldgbf; + if (oldplain) + delete oldplain; + if (oldosis) + delete oldosis; + } + return markup; +} + +void MarkupFilterMgr::AddRenderFilters(SWModule *module, ConfigEntMap §ion) { + switch (module->Markup()) { + case FMT_THML: + if (fromthml) + module->AddRenderFilter(fromthml); + break; + case FMT_GBF: + if (fromgbf) + module->AddRenderFilter(fromgbf); + break; + case FMT_PLAIN: + if (fromplain) + module->AddRenderFilter(fromplain); + break; + case FMT_OSIS: + if (fromosis) + module->AddRenderFilter(fromosis); + break; + } +} + +void MarkupFilterMgr::CreateFilters(char markup) { + + switch (markup) { + case FMT_PLAIN: + fromplain = NULL; + fromthml = new ThMLPlain(); + fromgbf = new GBFPlain(); + fromosis = NULL; + break; + case FMT_THML: + fromplain = NULL; + fromthml = NULL; + fromgbf = new GBFThML(); + fromosis = NULL; + break; + case FMT_GBF: + fromplain = NULL; + fromthml = new ThMLGBF(); + fromgbf = NULL; + fromosis = NULL; + break; + case FMT_HTML: + fromplain = new PLAINHTML(); + fromthml = new ThMLHTML(); + fromgbf = new GBFHTML(); + fromosis = NULL; + break; + case FMT_HTMLHREF: + fromplain = NULL; + fromthml = new ThMLHTMLHREF(); + fromgbf = new GBFHTMLHREF(); + fromosis = NULL; + break; + case FMT_RTF: + fromplain = NULL; + fromthml = new ThMLRTF(); + fromgbf = new GBFRTF(); + fromosis = NULL; + break; + case FMT_OSIS: + fromplain = NULL; + fromthml = new ThMLOSIS(); + fromgbf = new GBFOSIS(); + fromosis = NULL; + break; + } + +} diff --git a/src/mgr/swcacher.cpp b/src/mgr/swcacher.cpp new file mode 100644 index 0000000..8128a70 --- /dev/null +++ b/src/mgr/swcacher.cpp @@ -0,0 +1,43 @@ +/****************************************************************************** + * swcacher.h - definition of class SWCacher used to provide an interface for + * objects that cache and want a standard interface for cleaning up. + * + * $Id: swcacher.cpp,v 1.1 2002/03/16 01:12:37 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <swcacher.h> + + +SWCacher::SWCacher() { +} + + +SWCacher::~SWCacher() { +} + + +void SWCacher::flush() { +} + +long SWCacher::resourceConsumption() { + return 0; +} + +long SWCacher::lastAccess() { + return 0; +} diff --git a/src/mgr/swconfig.cpp b/src/mgr/swconfig.cpp new file mode 100644 index 0000000..d73d475 --- /dev/null +++ b/src/mgr/swconfig.cpp @@ -0,0 +1,163 @@ +/****************************************************************************** + * swconfig.cpp - implementation of Class SWConfig used for saving and + * retrieval of configuration information + * + * $Id: swconfig.cpp,v 1.9 2002/07/28 01:48:38 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <swconfig.h> +#include <utilfuns.h> + + +SWConfig::SWConfig(const char * ifilename) { + filename = ifilename; + Load(); +} + + +SWConfig::~SWConfig() { +} + + +char SWConfig::getline(FILE *fp, string &line) +{ + char retval = 0; + char buf[255]; + + line = ""; + + while (fgets(buf, 254, fp)) { + while (buf[strlen(buf)-1] == '\n' || buf[strlen(buf)-1] == '\r') + buf[strlen(buf)-1] = 0; + + if (buf[strlen(buf)-1] == '\\') { + buf[strlen(buf)-1] = 0; + line += buf; + continue; + } + line += buf; + + if (strlen(buf) < 253) { + retval = 1; + break; + } + } + return retval; +} + + +void SWConfig::Load() { + FILE *cfile; + char *buf, *data; + string line; + ConfigEntMap cursect; + string sectname; + bool first = true; + + Sections.erase(Sections.begin(), Sections.end()); + + if ((cfile = fopen(filename.c_str(), "r"))) { + while (getline(cfile, line)) { + buf = new char [ line.length() + 1 ]; + strcpy(buf, line.c_str()); + if (*strstrip(buf) == '[') { + if (!first) + Sections.insert(SectionMap::value_type(sectname, cursect)); + else first = false; + + cursect.erase(cursect.begin(), cursect.end()); + + strtok(buf, "]"); + sectname = buf+1; + } + else { + strtok(buf, "="); + if ((*buf) && (*buf != '=')) { + if ((data = strtok(NULL, ""))) + cursect.insert(ConfigEntMap::value_type(buf, strstrip(data))); + else cursect.insert(ConfigEntMap::value_type(buf, "")); + } + } + delete [] buf; + } + if (!first) + Sections.insert(SectionMap::value_type(sectname, cursect)); + + fclose(cfile); + } +} + + +void SWConfig::Save() { + FILE *cfile; + string buf; + SectionMap::iterator sit; + ConfigEntMap::iterator entry; + string sectname; + + if ((cfile = fopen(filename.c_str(), "w"))) { + + for (sit = Sections.begin(); sit != Sections.end(); sit++) { + buf = "\n["; + buf += (*sit).first.c_str(); + buf += "]\n"; + fputs(buf.c_str(), cfile); + for (entry = (*sit).second.begin(); entry != (*sit).second.end(); entry++) { + buf = (*entry).first.c_str(); + buf += "="; + buf += (*entry).second.c_str(); + buf += "\n"; + fputs(buf.c_str(), cfile); + } + } + fputs("\n", cfile); // so getline will find last line + fclose(cfile); + } +} + + +void SWConfig::augment(SWConfig &addFrom) { + + SectionMap::iterator section; + ConfigEntMap::iterator entry, start, end; + + for (section = addFrom.Sections.begin(); section != addFrom.Sections.end(); section++) { + for (entry = (*section).second.begin(); entry != (*section).second.end(); entry++) { + start = Sections[section->first].lower_bound(entry->first); + end = Sections[section->first].upper_bound(entry->first); + if (start != end) { + if (((++start) != end) + || ((++(addFrom.Sections[section->first].lower_bound(entry->first))) != addFrom.Sections[section->first].upper_bound(entry->first))) { + for (--start; start != end; start++) { + if (!strcmp(start->second.c_str(), entry->second.c_str())) + break; + } + if (start == end) + Sections[(*section).first].insert(ConfigEntMap::value_type((*entry).first, (*entry).second)); + } + else Sections[section->first][entry->first.c_str()] = entry->second.c_str(); + } + else Sections[section->first][entry->first.c_str()] = entry->second.c_str(); + } + } +} + + +ConfigEntMap & SWConfig::operator [] (const char *section) { + return Sections[section]; +} diff --git a/src/mgr/swfiltermgr.cpp b/src/mgr/swfiltermgr.cpp new file mode 100644 index 0000000..264b5a6 --- /dev/null +++ b/src/mgr/swfiltermgr.cpp @@ -0,0 +1,90 @@ +/****************************************************************************** + * swfiltermgr.cpp - definition of class SWFilterMgr used as an interface to + * manage filters on a module + * + * $Id: swfiltermgr.cpp,v 1.2 2001/11/30 12:04:34 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <swfiltermgr.h> + + +SWFilterMgr::SWFilterMgr() { +} + + +SWFilterMgr::~SWFilterMgr() { +} + + +void SWFilterMgr::setParentMgr(SWMgr *parentMgr) { + this->parentMgr = parentMgr; +} + + +SWMgr *SWFilterMgr::getParentMgr() { + return parentMgr; +} + + +void SWFilterMgr::AddGlobalOptions(SWModule * module, ConfigEntMap & section, ConfigEntMap::iterator start, ConfigEntMap::iterator end) { +} + + +void SWFilterMgr::AddLocalOptions(SWModule * module, ConfigEntMap & section, ConfigEntMap::iterator start, ConfigEntMap::iterator end) { +} + + +/** +* Adds the encoding filters which are defined in "section" to the SWModule object "module". +* @param module To this module the encoding filter(s) are added +* @param section We use this section to get a list of filters we should apply to the module +*/ + +void SWFilterMgr::AddEncodingFilters(SWModule * module, ConfigEntMap & section) { +} + + +/** +* Adds the render filters which are defined in "section" to the SWModule object "module". +* @param module To this module the render filter(s) are added +* @param section We use this section to get a list of filters we should apply to the module +*/ + +void SWFilterMgr::AddRenderFilters(SWModule * module, ConfigEntMap & section) { +} + + +/** +* Adds the strip filters which are defined in "section" to the SWModule object "module". +* @param module To this module the strip filter(s) are added +* @param section We use this section to get a list of filters we should apply to the module +*/ + +void SWFilterMgr::AddStripFilters(SWModule * module, ConfigEntMap & section) { +} + + +/** +* Adds the raw filters which are defined in "section" to the SWModule object "module". +* @param module To this module the raw filter(s) are added +* @param section We use this section to get a list of filters we should apply to the module +*/ + +void SWFilterMgr::AddRawFilters(SWModule * module, ConfigEntMap & section) { +} + diff --git a/src/mgr/swlocale.cpp b/src/mgr/swlocale.cpp new file mode 100644 index 0000000..d85d1eb --- /dev/null +++ b/src/mgr/swlocale.cpp @@ -0,0 +1,140 @@ +/****************************************************************************** + * swlocale.cpp - implementation of Class SWLocale used for retrieval + * of locale lookups + * + * $Id: swlocale.cpp,v 1.4 2002/07/28 01:48:38 scribe Exp $ + * + * Copyright 2000 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <swlocale.h> +#include <utilfuns.h> + + +SWLocale::SWLocale(const char * ifilename) { + ConfigEntMap::iterator confEntry; + + name = 0; + description = 0; + bookAbbrevs = 0; + BMAX = 0; + books = 0; + localeSource = new SWConfig(ifilename); + + confEntry = localeSource->Sections["Meta"].find("Name"); + if (confEntry != localeSource->Sections["Meta"].end()) + stdstr(&name, (*confEntry).second.c_str()); + + confEntry = localeSource->Sections["Meta"].find("Description"); + if (confEntry != localeSource->Sections["Meta"].end()) + stdstr(&description, (*confEntry).second.c_str()); +} + + +SWLocale::~SWLocale() { + + delete localeSource; + + if (description) + delete [] description; + + if (name) + delete [] name; + + if (bookAbbrevs) + delete [] bookAbbrevs; + + if (BMAX) { + for (int i = 0; i < 2; i++) + delete [] books[i]; + delete [] BMAX; + delete [] books; + } +} + + +const char *SWLocale::translate(const char *text) { + LookupMap::iterator entry; + + entry = lookupTable.find(text); + + if (entry == lookupTable.end()) { + ConfigEntMap::iterator confEntry; + confEntry = localeSource->Sections["Text"].find(text); + if (confEntry == localeSource->Sections["Text"].end()) + lookupTable.insert(LookupMap::value_type(text, text)); + else lookupTable.insert(LookupMap::value_type(text, (*confEntry).second.c_str())); + entry = lookupTable.find(text); + } + return (*entry).second.c_str(); +} + + +const char *SWLocale::getName() { + return name; +} + + +const char *SWLocale::getDescription() { + return description; +} + + +void SWLocale::augment(SWLocale &addFrom) { + *localeSource += *addFrom.localeSource; +} + + +const struct abbrev *SWLocale::getBookAbbrevs() { + static const char *nullstr = ""; + if (!bookAbbrevs) { + ConfigEntMap::iterator it; + int i; + int size = localeSource->Sections["Book Abbrevs"].size(); + bookAbbrevs = new struct abbrev[size + 1]; + for (i = 0, it = localeSource->Sections["Book Abbrevs"].begin(); it != localeSource->Sections["Book Abbrevs"].end(); it++, i++) { + bookAbbrevs[i].ab = (*it).first.c_str(); + bookAbbrevs[i].book = atoi((*it).second.c_str()); + } + bookAbbrevs[i].ab = nullstr; + bookAbbrevs[i].book = -1; + } + + return bookAbbrevs; +} + + +void SWLocale::getBooks(char **iBMAX, struct sbook ***ibooks) { + if (!BMAX) { + BMAX = new char [2]; + BMAX[0] = VerseKey::builtin_BMAX[0]; + BMAX[1] = VerseKey::builtin_BMAX[1]; + + books = new struct sbook *[2]; + books[0] = new struct sbook[BMAX[0]]; + books[1] = new struct sbook[BMAX[1]]; + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < BMAX[i]; j++) { + books[i][j] = VerseKey::builtin_books[i][j]; + books[i][j].name = translate(VerseKey::builtin_books[i][j].name); + } + } + } + + *iBMAX = BMAX; + *ibooks = books; +} diff --git a/src/mgr/swmgr.cpp b/src/mgr/swmgr.cpp new file mode 100644 index 0000000..ff36acc --- /dev/null +++ b/src/mgr/swmgr.cpp @@ -0,0 +1,1084 @@ +/****************************************************************************** + * swmgr.cpp - implementaion of class SWMgr used to interact with an install + * base of sword modules. + * + * $Id: swmgr.cpp,v 1.79 2002/08/09 05:53:48 scribe Exp $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#include <unixstr.h> +#endif +#include <sys/stat.h> +#ifndef _MSC_VER +#include <iostream> +#endif +#include <dirent.h> + +#include <swmgr.h> +#include <rawtext.h> +#include <rawgenbook.h> +#include <rawcom.h> +#include <hrefcom.h> +#include <rawld.h> +#include <rawld4.h> +#include <utilfuns.h> +#include <gbfplain.h> +#include <thmlplain.h> +#include <gbfstrongs.h> +#include <gbffootnotes.h> +#include <gbfheadings.h> +#include <gbfmorph.h> +#include <thmlstrongs.h> +#include <thmlfootnotes.h> +#include <thmlheadings.h> +#include <thmlmorph.h> +#include <thmllemma.h> +#include <thmlscripref.h> +#include <cipherfil.h> +#include <rawfiles.h> +#include <ztext.h> +#include <zld.h> +#include <zcom.h> +#include <lzsscomprs.h> +#include <utf8greekaccents.h> +#include <utf8cantillation.h> +#include <utf8hebrewpoints.h> +#include <greeklexattribs.h> +#include <swfiltermgr.h> + + + +#ifdef _ICU_ +#include <utf8transliterator.h> +bool SWMgr::isICU = true; +#else +bool SWMgr::isICU = false; +#endif + +#ifndef EXCLUDEZLIB +#include <zipcomprs.h> +#endif + +bool SWMgr::debug = false; + +#ifdef GLOBCONFPATH +const char *SWMgr::globalConfPath = GLOBCONFPATH; +#else +const char *SWMgr::globalConfPath = "/etc/sword.conf:/usr/local/etc/sword.conf"; +#endif + +void SWMgr::init() { + SWFilter *tmpFilter = 0; + configPath = 0; + prefixPath = 0; + configType = 0; + myconfig = 0; + mysysconfig = 0; + homeConfig = 0; + + + cipherFilters.clear(); + optionFilters.clear(); + cleanupFilters.clear(); + + tmpFilter = new GBFStrongs(); + optionFilters.insert(FilterMap::value_type("GBFStrongs", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new GBFFootnotes(); + optionFilters.insert(FilterMap::value_type("GBFFootnotes", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new GBFMorph(); + optionFilters.insert(FilterMap::value_type("GBFMorph", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new GBFHeadings(); + optionFilters.insert(FilterMap::value_type("GBFHeadings", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLStrongs(); + optionFilters.insert(FilterMap::value_type("ThMLStrongs", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLFootnotes(); + optionFilters.insert(FilterMap::value_type("ThMLFootnotes", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLMorph(); + optionFilters.insert(FilterMap::value_type("ThMLMorph", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLHeadings(); + optionFilters.insert(FilterMap::value_type("ThMLHeadings", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLLemma(); + optionFilters.insert(FilterMap::value_type("ThMLLemma", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new ThMLScripref(); + optionFilters.insert(FilterMap::value_type("ThMLScripref", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new UTF8GreekAccents(); + optionFilters.insert(FilterMap::value_type("UTF8GreekAccents", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new UTF8HebrewPoints(); + optionFilters.insert(FilterMap::value_type("UTF8HebrewPoints", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new UTF8Cantillation(); + optionFilters.insert(FilterMap::value_type("UTF8Cantillation", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + + tmpFilter = new GreekLexAttribs(); + optionFilters.insert(FilterMap::value_type("GreekLexAttribs", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + +// UTF8Transliterator needs to be handled differently because it should always available as an option, for all modules +#ifdef _ICU_ + transliterator = new UTF8Transliterator(); + optionFilters.insert(FilterMap::value_type("UTF8Transliterator", transliterator)); + options.push_back(transliterator->getOptionName()); + cleanupFilters.push_back(transliterator); +#endif + + gbfplain = new GBFPlain(); + cleanupFilters.push_back(gbfplain); + + thmlplain = new ThMLPlain(); + cleanupFilters.push_back(thmlplain); +} + + +SWMgr::SWMgr(SWFilterMgr *filterMgr) { + commonInit(0, 0, true, filterMgr); +} + + +SWMgr::SWMgr(SWConfig *iconfig, SWConfig *isysconfig, bool autoload, SWFilterMgr *filterMgr) { + commonInit(iconfig, isysconfig, autoload, filterMgr); +} + + +void SWMgr::commonInit(SWConfig * iconfig, SWConfig * isysconfig, bool autoload, SWFilterMgr *filterMgr) { + this->filterMgr = filterMgr; + if (filterMgr) + filterMgr->setParentMgr(this); + + init(); + + if (iconfig) { + config = iconfig; + myconfig = 0; + } + else config = 0; + if (isysconfig) { + sysconfig = isysconfig; + mysysconfig = 0; + } + else sysconfig = 0; + + if (autoload) + Load(); +} + + +SWMgr::SWMgr(const char *iConfigPath, bool autoload, SWFilterMgr *filterMgr) { + + string path; + + this->filterMgr = filterMgr; + if (filterMgr) + filterMgr->setParentMgr(this); + + init(); + + path = iConfigPath; + if ((iConfigPath[strlen(iConfigPath)-1] != '\\') && (iConfigPath[strlen(iConfigPath)-1] != '/')) + path += "/"; + if (FileMgr::existsFile(path.c_str(), "mods.conf")) { + stdstr(&prefixPath, path.c_str()); + path += "mods.conf"; + stdstr(&configPath, path.c_str()); + } + else { + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + stdstr(&prefixPath, path.c_str()); + path += "mods.d"; + stdstr(&configPath, path.c_str()); + configType = 1; + } + } + + config = 0; + sysconfig = 0; + + if (autoload && configPath) + Load(); +} + + +SWMgr::~SWMgr() { + + DeleteMods(); + + for (FilterList::iterator it = cleanupFilters.begin(); it != cleanupFilters.end(); it++) + delete (*it); + + if (homeConfig) + delete homeConfig; + + if (myconfig) + delete myconfig; + + if (prefixPath) + delete [] prefixPath; + + if (configPath) + delete [] configPath; + + if (filterMgr) + delete filterMgr; +} + + +void SWMgr::findConfig(char *configType, char **prefixPath, char **configPath, list<string> *augPaths) { + string path; + ConfigEntMap::iterator entry; + ConfigEntMap::iterator lastEntry; + + char *envsworddir = getenv ("SWORD_PATH"); + char *envhomedir = getenv ("HOME"); + + *configType = 0; + +#ifndef _MSC_VER + // check working directory +if (debug) + std::cerr << "Checking working directory for mods.conf..."; +#endif + + if (FileMgr::existsFile(".", "mods.conf")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, "./"); + stdstr(configPath, "./mods.conf"); + return; + } + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking working directory for mods.d..."; +#endif + + if (FileMgr::existsDir(".", "mods.d")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, "./"); + stdstr(configPath, "./mods.d"); + *configType = 1; + return; + } + + + // check environment variable SWORD_PATH +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking SWORD_PATH..."; +#endif + + if (envsworddir != NULL) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found (" << envsworddir << ")\n"; +#endif + + path = envsworddir; + if ((envsworddir[strlen(envsworddir)-1] != '\\') && (envsworddir[strlen(envsworddir)-1] != '/')) + path += "/"; + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking $SWORD_PATH for mods.conf..."; +#endif + + if (FileMgr::existsFile(path.c_str(), "mods.conf")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.conf"; + stdstr(configPath, path.c_str()); + return; + } + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking $SWORD_PATH for mods.d..."; +#endif + + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.d"; + stdstr(configPath, path.c_str()); + *configType = 1; + return; + } + } + + + // check for systemwide globalConfPath + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nParsing " << globalConfPath << "..."; +#endif + + char *globPaths = 0; + char *gfp; + stdstr(&globPaths, globalConfPath); + for (gfp = strtok(globPaths, ":"); gfp; gfp = strtok(0, ":")) { + + #ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking for " << gfp << "..."; +#endif + + if (FileMgr::existsFile(gfp)) + break; + } + + if (gfp) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + SWConfig etcconf(gfp); + if ((entry = etcconf.Sections["Install"].find("DataPath")) != etcconf.Sections["Install"].end()) { + path = (*entry).second; + if (((*entry).second.c_str()[strlen((*entry).second.c_str())-1] != '\\') && ((*entry).second.c_str()[strlen((*entry).second.c_str())-1] != '/')) + path += "/"; + +#ifndef _MSC_VER +if (debug) + std::cerr << "DataPath in " << gfp << " is set to: " << path; +#endif + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking for mods.conf in DataPath "; +#endif + if (FileMgr::existsFile(path.c_str(), "mods.conf")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.conf"; + stdstr(configPath, path.c_str()); + *configType = 1; + } + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking for mods.d in DataPath "; +#endif + + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.d"; + stdstr(configPath, path.c_str()); + *configType = 1; + } + } + if (augPaths) { + augPaths->clear(); + entry = etcconf.Sections["Install"].lower_bound("AugmentPath"); + lastEntry = etcconf.Sections["Install"].upper_bound("AugmentPath"); + for (;entry != lastEntry; entry++) { + path = entry->second; + if ((entry->second.c_str()[strlen(entry->second.c_str())-1] != '\\') && (entry->second.c_str()[strlen(entry->second.c_str())-1] != '/')) + path += "/"; + augPaths->push_back(path); + } + } + } + + delete [] globPaths; + if (*configType) + return; + + // check ~/.sword/ + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking home directory for ~/.sword/mods.conf" << path; +#endif + + if (envhomedir != NULL) { + path = envhomedir; + if ((envhomedir[strlen(envhomedir)-1] != '\\') && (envhomedir[strlen(envhomedir)-1] != '/')) + path += "/"; + path += ".sword/"; + if (FileMgr::existsFile(path.c_str(), "mods.conf")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << " found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.conf"; + stdstr(configPath, path.c_str()); + return; + } + +#ifndef _MSC_VER +if (debug) + std::cerr << "\nChecking home directory for ~/.sword/mods.d" << path; +#endif + + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + +#ifndef _MSC_VER +if (debug) + std::cerr << "found\n"; +#endif + + stdstr(prefixPath, path.c_str()); + path += "mods.d"; + stdstr(configPath, path.c_str()); + *configType = 2; + return; + } + } +} + + +void SWMgr::loadConfigDir(const char *ipath) +{ + DIR *dir; + struct dirent *ent; + string newmodfile; + + if ((dir = opendir(ipath))) { + rewinddir(dir); + while ((ent = readdir(dir))) { + if ((strcmp(ent->d_name, ".")) && (strcmp(ent->d_name, ".."))) { + newmodfile = ipath; + if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) + newmodfile += "/"; + newmodfile += ent->d_name; + if (config) { + SWConfig tmpConfig(newmodfile.c_str()); + *config += tmpConfig; + } + else config = myconfig = new SWConfig(newmodfile.c_str()); + } + } + closedir(dir); + if (!config) { // if no .conf file exist yet, create a default + newmodfile = ipath; + if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) + newmodfile += "/"; + newmodfile += "globals.conf"; + config = myconfig = new SWConfig(newmodfile.c_str()); + } + } +} + + +void SWMgr::augmentModules(const char *ipath) { + string path = ipath; + if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) + path += "/"; + if (FileMgr::existsDir(path.c_str(), "mods.d")) { + char *savePrefixPath = 0; + char *saveConfigPath = 0; + SWConfig *saveConfig = 0; + stdstr(&savePrefixPath, prefixPath); + stdstr(&prefixPath, path.c_str()); + path += "mods.d"; + stdstr(&saveConfigPath, configPath); + stdstr(&configPath, path.c_str()); + saveConfig = config; + config = myconfig = 0; + loadConfigDir(configPath); + + CreateMods(); + + stdstr(&prefixPath, savePrefixPath); + delete []savePrefixPath; + stdstr(&configPath, saveConfigPath); + delete []saveConfigPath; + (*saveConfig) += *config; + homeConfig = myconfig; + config = myconfig = saveConfig; + } +} + + +/*********************************************************************** + * SWMgr::Load - loads actual modules + * + * RET: status - 0 = ok; -1 no config found; 1 = no modules installed + * + */ + +signed char SWMgr::Load() { + signed char ret = 0; + + if (!config) { // If we weren't passed a config object at construction, find a config file + if (!configPath) // If we weren't passed a config path at construction... + findConfig(&configType, &prefixPath, &configPath, &augPaths); + if (configPath) { + if (configType) + loadConfigDir(configPath); + else config = myconfig = new SWConfig(configPath); + } + } + + if (config) { + SectionMap::iterator Sectloop, Sectend; + ConfigEntMap::iterator Entryloop, Entryend; + + DeleteMods(); + + for (Sectloop = config->Sections.lower_bound("Globals"), Sectend = config->Sections.upper_bound("Globals"); Sectloop != Sectend; Sectloop++) { // scan thru all 'Globals' sections + for (Entryloop = (*Sectloop).second.lower_bound("AutoInstall"), Entryend = (*Sectloop).second.upper_bound("AutoInstall"); Entryloop != Entryend; Entryloop++) // scan thru all AutoInstall entries + InstallScan((*Entryloop).second.c_str()); // Scan AutoInstall entry directory for new modules and install + } + if (configType) { // force reload on config object because we may have installed new modules + delete myconfig; + config = myconfig = 0; + loadConfigDir(configPath); + } + else config->Load(); + + CreateMods(); + + for (list<string>::iterator pathIt = augPaths.begin(); pathIt != augPaths.end(); pathIt++) { + augmentModules(pathIt->c_str()); + } +// augment config with ~/.sword/mods.d if it exists --------------------- + char *envhomedir = getenv ("HOME"); + if (envhomedir != NULL && configType != 2) { // 2 = user only + string path = envhomedir; + if ((envhomedir[strlen(envhomedir)-1] != '\\') && (envhomedir[strlen(envhomedir)-1] != '/')) + path += "/"; + path += ".sword/"; + augmentModules(path.c_str()); + } +// ------------------------------------------------------------------------- + if ( !Modules.size() ) // config exists, but no modules + ret = 1; + + } + else { + SWLog::systemlog->LogError("SWMgr: Can't find 'mods.conf' or 'mods.d'. Try setting:\n\tSWORD_PATH=<directory containing mods.conf>\n\tOr see the README file for a full description of setup options (%s)", (configPath) ? configPath : "<configPath is null>"); + ret = -1; + } + + return ret; +} + +SWModule *SWMgr::CreateMod(string name, string driver, ConfigEntMap §ion) +{ + string description, datapath, misc1; + ConfigEntMap::iterator entry; + SWModule *newmod = 0; + string lang, sourceformat, encoding; + signed char direction, enc, markup; + + description = ((entry = section.find("Description")) != section.end()) ? (*entry).second : (string)""; + lang = ((entry = section.find("Lang")) != section.end()) ? (*entry).second : (string)"en"; + sourceformat = ((entry = section.find("SourceType")) != section.end()) ? (*entry).second : (string)""; + encoding = ((entry = section.find("Encoding")) != section.end()) ? (*entry).second : (string)""; + datapath = prefixPath; + if ((prefixPath[strlen(prefixPath)-1] != '\\') && (prefixPath[strlen(prefixPath)-1] != '/')) + datapath += "/"; + misc1 += ((entry = section.find("DataPath")) != section.end()) ? (*entry).second : (string)""; + char *buf = new char [ strlen(misc1.c_str()) + 1 ]; + char *buf2 = buf; + strcpy(buf, misc1.c_str()); +// for (; ((*buf2) && ((*buf2 == '.') || (*buf2 == '/') || (*buf2 == '\\'))); buf2++); + for (; ((*buf2) && ((*buf2 == '/') || (*buf2 == '\\'))); buf2++); + if (*buf2) + datapath += buf2; + delete [] buf; + + section["AbsoluteDataPath"] = datapath; + + if (!stricmp(sourceformat.c_str(), "GBF")) + markup = FMT_GBF; + else if (!stricmp(sourceformat.c_str(), "ThML")) + markup = FMT_THML; + else if (!stricmp(sourceformat.c_str(), "OSIS")) + markup = FMT_OSIS; + else + markup = FMT_PLAIN; + + if (!stricmp(encoding.c_str(), "SCSU")) + enc = ENC_SCSU; + else if (!stricmp(encoding.c_str(), "UTF-8")) { + enc = ENC_UTF8; + } + else enc = ENC_LATIN1; + + if ((entry = section.find("Direction")) == section.end()) { + direction = DIRECTION_LTR; + } + else if (!stricmp((*entry).second.c_str(), "rtol")) { + direction = DIRECTION_RTL; + } + else if (!stricmp((*entry).second.c_str(), "bidi")) { + direction = DIRECTION_BIDI; + } + else { + direction = DIRECTION_LTR; + } + + if ((!stricmp(driver.c_str(), "zText")) || (!stricmp(driver.c_str(), "zCom"))) { + SWCompress *compress = 0; + int blockType = CHAPTERBLOCKS; + misc1 = ((entry = section.find("BlockType")) != section.end()) ? (*entry).second : (string)"CHAPTER"; + if (!stricmp(misc1.c_str(), "VERSE")) + blockType = VERSEBLOCKS; + else if (!stricmp(misc1.c_str(), "CHAPTER")) + blockType = CHAPTERBLOCKS; + else if (!stricmp(misc1.c_str(), "BOOK")) + blockType = BOOKBLOCKS; + + misc1 = ((entry = section.find("CompressType")) != section.end()) ? (*entry).second : (string)"LZSS"; +#ifndef EXCLUDEZLIB + if (!stricmp(misc1.c_str(), "ZIP")) + compress = new ZipCompress(); + else +#endif + if (!stricmp(misc1.c_str(), "LZSS")) + compress = new LZSSCompress(); + + if (compress) { + if (!stricmp(driver.c_str(), "zText")) + newmod = new zText(datapath.c_str(), name.c_str(), description.c_str(), blockType, compress, 0, enc, direction, markup, lang.c_str()); + else newmod = new zCom(datapath.c_str(), name.c_str(), description.c_str(), blockType, compress, 0, enc, direction, markup, lang.c_str()); + } + } + + if (!stricmp(driver.c_str(), "RawText")) { + newmod = new RawText(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + + // backward support old drivers + if (!stricmp(driver.c_str(), "RawGBF")) { + newmod = new RawText(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + + if (!stricmp(driver.c_str(), "RawCom")) { + newmod = new RawCom(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + + if (!stricmp(driver.c_str(), "RawFiles")) { + newmod = new RawFiles(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + + if (!stricmp(driver.c_str(), "HREFCom")) { + misc1 = ((entry = section.find("Prefix")) != section.end()) ? (*entry).second : (string)""; + newmod = new HREFCom(datapath.c_str(), misc1.c_str(), name.c_str(), description.c_str()); + } + + if (!stricmp(driver.c_str(), "RawLD")) + newmod = new RawLD(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + + if (!stricmp(driver.c_str(), "RawLD4")) + newmod = new RawLD4(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + + if (!stricmp(driver.c_str(), "zLD")) { + SWCompress *compress = 0; + int blockCount; + misc1 = ((entry = section.find("BlockCount")) != section.end()) ? (*entry).second : (string)"200"; + blockCount = atoi(misc1.c_str()); + blockCount = (blockCount) ? blockCount : 200; + + misc1 = ((entry = section.find("CompressType")) != section.end()) ? (*entry).second : (string)"LZSS"; +#ifndef EXCLUDEZLIB + if (!stricmp(misc1.c_str(), "ZIP")) + compress = new ZipCompress(); + else +#endif + if (!stricmp(misc1.c_str(), "LZSS")) + compress = new LZSSCompress(); + + if (compress) { + newmod = new zLD(datapath.c_str(), name.c_str(), description.c_str(), blockCount, compress, 0, enc, direction, markup, lang.c_str()); + } + } + + if (!stricmp(driver.c_str(), "RawGenBook")) { + newmod = new RawGenBook(datapath.c_str(), name.c_str(), description.c_str(), 0, enc, direction, markup, lang.c_str()); + } + // if a specific module type is set in the config, use this + if ((entry = section.find("Type")) != section.end()) + newmod->Type(entry->second.c_str()); + + newmod->setConfig(§ion); + return newmod; +} + + +void SWMgr::AddGlobalOptions(SWModule *module, ConfigEntMap §ion, ConfigEntMap::iterator start, ConfigEntMap::iterator end) { + for (;start != end; start++) { + FilterMap::iterator it; + it = optionFilters.find((*start).second); + if (it != optionFilters.end()) { + module->AddOptionFilter((*it).second); // add filter to module and option as a valid option + OptionsList::iterator loop; + for (loop = options.begin(); loop != options.end(); loop++) { + if (!strcmp((*loop).c_str(), (*it).second->getOptionName())) + break; + } + if (loop == options.end()) // if we have not yet included the option + options.push_back((*it).second->getOptionName()); + } + } + if (filterMgr) + filterMgr->AddGlobalOptions(module, section, start, end); +#ifdef _ICU_ + module->AddOptionFilter(transliterator); +#endif +} + + +void SWMgr::AddLocalOptions(SWModule *module, ConfigEntMap §ion, ConfigEntMap::iterator start, ConfigEntMap::iterator end) +{ + for (;start != end; start++) { + FilterMap::iterator it; + it = optionFilters.find((*start).second); + if (it != optionFilters.end()) { + module->AddOptionFilter((*it).second); // add filter to module + } + } + + if (filterMgr) + filterMgr->AddLocalOptions(module, section, start, end); +} + + +void SWMgr::AddRawFilters(SWModule *module, ConfigEntMap §ion) { + string sourceformat, cipherKey; + ConfigEntMap::iterator entry; + + cipherKey = ((entry = section.find("CipherKey")) != section.end()) ? (*entry).second : (string)""; + if (!cipherKey.empty()) { + SWFilter *cipherFilter = new CipherFilter(cipherKey.c_str()); + cipherFilters.insert(FilterMap::value_type(module->Name(), cipherFilter)); + cleanupFilters.push_back(cipherFilter); + module->AddRawFilter(cipherFilter); + } + + if (filterMgr) + filterMgr->AddRawFilters(module, section); +} + + +void SWMgr::AddEncodingFilters(SWModule *module, ConfigEntMap §ion) { + + if (filterMgr) + filterMgr->AddEncodingFilters(module, section); +} + + +void SWMgr::AddRenderFilters(SWModule *module, ConfigEntMap §ion) { + string sourceformat; + ConfigEntMap::iterator entry; + + sourceformat = ((entry = section.find("SourceType")) != section.end()) ? (*entry).second : (string)""; + + // Temporary: To support old module types + // TODO: Remove at 1.6.0 release? + if (sourceformat.empty()) { + sourceformat = ((entry = section.find("ModDrv")) != section.end()) ? (*entry).second : (string)""; + if (!stricmp(sourceformat.c_str(), "RawGBF")) + sourceformat = "GBF"; + else sourceformat = ""; + } + +// process module - eg. follows +// if (!stricmp(sourceformat.c_str(), "GBF")) { +// module->AddRenderFilter(gbftortf); +// } + + if (filterMgr) + filterMgr->AddRenderFilters(module, section); + +} + + +void SWMgr::AddStripFilters(SWModule *module, ConfigEntMap §ion) +{ + string sourceformat; + ConfigEntMap::iterator entry; + + sourceformat = ((entry = section.find("SourceType")) != section.end()) ? (*entry).second : (string)""; + // Temporary: To support old module types + if (sourceformat.empty()) { + sourceformat = ((entry = section.find("ModDrv")) != section.end()) ? (*entry).second : (string)""; + if (!stricmp(sourceformat.c_str(), "RawGBF")) + sourceformat = "GBF"; + else sourceformat = ""; + } + + if (!stricmp(sourceformat.c_str(), "GBF")) { + module->AddStripFilter(gbfplain); + } + else if (!stricmp(sourceformat.c_str(), "ThML")) { + module->AddStripFilter(thmlplain); + } + + if (filterMgr) + filterMgr->AddStripFilters(module, section); + +} + + +void SWMgr::CreateMods() { + SectionMap::iterator it; + ConfigEntMap::iterator start; + ConfigEntMap::iterator end; + ConfigEntMap::iterator entry; + SWModule *newmod; + string driver, misc1; + for (it = config->Sections.begin(); it != config->Sections.end(); it++) { + ConfigEntMap §ion = (*it).second; + newmod = 0; + + driver = ((entry = section.find("ModDrv")) != section.end()) ? (*entry).second : (string)""; + if (!driver.empty()) { + newmod = CreateMod((*it).first, driver, section); + if (newmod) { + start = (*it).second.lower_bound("GlobalOptionFilter"); + end = (*it).second.upper_bound("GlobalOptionFilter"); + AddGlobalOptions(newmod, section, start, end); + + start = (*it).second.lower_bound("LocalOptionFilter"); + end = (*it).second.upper_bound("LocalOptionFilter"); + AddLocalOptions(newmod, section, start, end); + + AddRawFilters(newmod, section); + AddStripFilters(newmod, section); + AddRenderFilters(newmod, section); + AddEncodingFilters(newmod, section); + + Modules.insert(ModMap::value_type(newmod->Name(), newmod)); + } + } + } +} + + +void SWMgr::DeleteMods() { + + ModMap::iterator it; + + for (it = Modules.begin(); it != Modules.end(); it++) + delete (*it).second; + + Modules.clear(); +} + + +void SWMgr::InstallScan(const char *dirname) +{ + DIR *dir; + struct dirent *ent; + int conffd = 0; + string newmodfile; + string targetName; + + if (!access(dirname, 04)) { + if ((dir = opendir(dirname))) { + rewinddir(dir); + while ((ent = readdir(dir))) { + if ((strcmp(ent->d_name, ".")) && (strcmp(ent->d_name, ".."))) { + newmodfile = dirname; + if ((dirname[strlen(dirname)-1] != '\\') && (dirname[strlen(dirname)-1] != '/')) + newmodfile += "/"; + newmodfile += ent->d_name; + if (configType) { + if (config > 0) + close(conffd); + targetName = configPath; + if ((configPath[strlen(configPath)-1] != '\\') && (configPath[strlen(configPath)-1] != '/')) + targetName += "/"; + targetName += ent->d_name; + conffd = open(targetName.c_str(), O_WRONLY|O_CREAT, S_IREAD|S_IWRITE); + } + else { + if (conffd < 1) { + conffd = open(config->filename.c_str(), O_WRONLY|O_APPEND); + if (conffd > 0) + lseek(conffd, 0L, SEEK_END); + } + } + AddModToConfig(conffd, newmodfile.c_str()); + unlink(newmodfile.c_str()); + } + } + if (conffd > 0) + close(conffd); + closedir(dir); + } + } +} + + +char SWMgr::AddModToConfig(int conffd, const char *fname) +{ + int modfd; + char ch; + + SWLog::systemlog->LogTimedInformation("Found new module [%s]. Installing...", fname); + modfd = open(fname, O_RDONLY); + ch = '\n'; + write(conffd, &ch, 1); + while (read(modfd, &ch, 1) == 1) + write(conffd, &ch, 1); + ch = '\n'; + write(conffd, &ch, 1); + close(modfd); + return 0; +} + + +void SWMgr::setGlobalOption(const char *option, const char *value) +{ + for (FilterMap::iterator it = optionFilters.begin(); it != optionFilters.end(); it++) { + if ((*it).second->getOptionName()) { + if (!stricmp(option, (*it).second->getOptionName())) + (*it).second->setOptionValue(value); + } + } +} + + +const char *SWMgr::getGlobalOption(const char *option) +{ + for (FilterMap::iterator it = optionFilters.begin(); it != optionFilters.end(); it++) { + if ((*it).second->getOptionName()) { + if (!stricmp(option, (*it).second->getOptionName())) + return (*it).second->getOptionValue(); + } + } + return 0; +} + + +const char *SWMgr::getGlobalOptionTip(const char *option) +{ + for (FilterMap::iterator it = optionFilters.begin(); it != optionFilters.end(); it++) { + if ((*it).second->getOptionName()) { + if (!stricmp(option, (*it).second->getOptionName())) + return (*it).second->getOptionTip(); + } + } + return 0; +} + + +OptionsList SWMgr::getGlobalOptions() +{ + return options; +} + + +OptionsList SWMgr::getGlobalOptionValues(const char *option) +{ + OptionsList options; + for (FilterMap::iterator it = optionFilters.begin(); it != optionFilters.end(); it++) { + if ((*it).second->getOptionName()) { + if (!stricmp(option, (*it).second->getOptionName())) { + options = (*it).second->getOptionValues(); + break; // just find the first one. All option filters with the same option name should expect the same values + } + } + } + return options; +} + + +signed char SWMgr::setCipherKey(const char *modName, const char *key) { + FilterMap::iterator it; + ModMap::iterator it2; + + // check for filter that already exists + it = cipherFilters.find(modName); + if (it != cipherFilters.end()) { + ((CipherFilter *)(*it).second)->getCipher()->setCipherKey(key); + return 0; + } + // check if module exists + else { + it2 = Modules.find(modName); + if (it2 != Modules.end()) { + SWFilter *cipherFilter = new CipherFilter(key); + cipherFilters.insert(FilterMap::value_type(modName, cipherFilter)); + cleanupFilters.push_back(cipherFilter); + (*it2).second->AddRawFilter(cipherFilter); + return 0; + } + } + return -1; +} diff --git a/src/modules/comments/hrefcom/hrefcom.cpp b/src/modules/comments/hrefcom/hrefcom.cpp new file mode 100644 index 0000000..200e21f --- /dev/null +++ b/src/modules/comments/hrefcom/hrefcom.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * hrefcom.cpp - code for class 'HREFCom'- a module that produces HTML HREFs + * pointing to actual text desired. Uses standard + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <hrefcom.h> + + + /****************************************************************************** + * HREFCom Constructor - Initializes data for instance of HREFCom + * + * ENT: iname - Internal name for module + * iprefix - string to prepend to each HREF (e.g. "file://mods/com/jfb/") + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +HREFCom::HREFCom(const char *ipath, const char *iprefix, const char *iname, const char *idesc, SWDisplay *idisp) : RawVerse(ipath), SWCom(iname, idesc, idisp) +{ + prefix = 0; + stdstr(&prefix, iprefix); +} + + +/****************************************************************************** + * HREFCom Destructor - Cleans up instance of HREFCom + */ + +HREFCom::~HREFCom() +{ + if (prefix) + delete [] prefix; +} + + +/****************************************************************************** + * HREFCom::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *HREFCom::getRawEntry() { + long start; + unsigned short size; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = ((size + 2) + strlen(prefix)) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + tmpbuf = new char [ size + 10 ]; + + readtext(key->Testament(), start, size + 2, tmpbuf); + sprintf(entrybuf, "%s%s", prefix, tmpbuf); + preptext(entrybuf); + + delete [] tmpbuf; + + if (key != this->key) + delete key; + + return entrybuf; +} diff --git a/src/modules/comments/rawcom/rawcom.cpp b/src/modules/comments/rawcom/rawcom.cpp new file mode 100644 index 0000000..f71b6ec --- /dev/null +++ b/src/modules/comments/rawcom/rawcom.cpp @@ -0,0 +1,221 @@ +/****************************************************************************** + * rawcom.cpp - code for class 'RawCom'- a module that reads raw commentary + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawcom.h> + + + /****************************************************************************** + * RawCom Constructor - Initializes data for instance of RawCom + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawCom::RawCom(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding encoding, SWTextDirection dir, SWTextMarkup markup, const char* ilang) + : RawVerse(ipath), + SWCom(iname, idesc, idisp, encoding, dir, markup, ilang){ +} + + +/****************************************************************************** + * RawCom Destructor - Cleans up instance of RawCom + */ + +RawCom::~RawCom() +{ +} + + +/****************************************************************************** + * RawCom::getRawEntry() - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawCom::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + readtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (key != this->key) + delete key; + + return entrybuf; +} + + +/****************************************************************************** + * RawCom::increment - Increments module key a number of entries + * + * ENT: steps - Number of entries to jump forward + * + * RET: *this + */ + +void RawCom::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} + + +void RawCom::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void RawCom::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawCom::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawCom::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} diff --git a/src/modules/comments/rawfiles/rawfiles.cpp b/src/modules/comments/rawfiles/rawfiles.cpp new file mode 100644 index 0000000..c073a73 --- /dev/null +++ b/src/modules/comments/rawfiles/rawfiles.cpp @@ -0,0 +1,274 @@ +/****************************************************************************** + * rawfiles.cpp - code for class 'RawFiles'- a module that produces HTML HREFs + * pointing to actual text desired. Uses standard + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawfiles.h> +#include <filemgr.h> + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + + /****************************************************************************** + * RawFiles Constructor - Initializes data for instance of RawFiles + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawFiles::RawFiles(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawVerse(ipath, O_RDWR), SWCom(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawFiles Destructor - Cleans up instance of RawFiles + */ + +RawFiles::~RawFiles() +{ +} + + +/****************************************************************************** + * RawFiles::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawFiles::getRawEntry() { + FileDesc *datafile; + long start = 0; + unsigned short size = 0; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (entrybuf) + delete [] entrybuf; + + if (size) { + tmpbuf = new char [ (size + 2) + strlen(path) + 5 ]; + sprintf(tmpbuf,"%s/",path); + readtext(key->Testament(), start, (size + 2), tmpbuf+strlen(tmpbuf)); + datafile = FileMgr::systemFileMgr.open(tmpbuf, O_RDONLY|O_BINARY); + delete [] tmpbuf; + if (datafile->getFd() > 0) { + size = lseek(datafile->getFd(), 0, SEEK_END); + entrybuf = new char [ size * FILTERPAD ]; + memset(entrybuf, 0, size * FILTERPAD); + lseek(datafile->getFd(), 0, SEEK_SET); + read(datafile->getFd(), entrybuf, size); +// preptext(entrybuf); + } + else { + entrybuf = new char [2]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + FileMgr::systemFileMgr.close(datafile); + } + else { + entrybuf = new char [2]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + + if (key != this->key) + delete key; + + return entrybuf; +} + + +/****************************************************************************** + * RawFiles::setEntry(char *)- Update the modules current key entry with + * provided text + */ + +void RawFiles::setEntry(const char *inbuf, long len) { + FileDesc *datafile; + long start; + unsigned short size; + char *tmpbuf; + VerseKey *key = 0; + + len = (len<0)?strlen(inbuf):len; + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (size) { + tmpbuf = new char [ (size + 3) + strlen(path) + 1 ]; + sprintf(tmpbuf, "%s/", path); + readtext(key->Testament(), start, (size + 2), tmpbuf+strlen(tmpbuf)); + } + else { + tmpbuf = new char [ 16 + strlen(path) + 1 ]; + sprintf(tmpbuf, "%s/%s", path, getnextfilename()); + settext(key->Testament(), key->Index(), tmpbuf+strlen(path)+1); + } + datafile = FileMgr::systemFileMgr.open(tmpbuf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + delete [] tmpbuf; + if (datafile->getFd() > 0) { + write(datafile->getFd(), inbuf, len); + } + FileMgr::systemFileMgr.close(datafile); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * RawFiles::linkEntry(SWKey *)- Link the modules current key entry with + * another module entry + * + * RET: *this + */ + +void RawFiles::linkEntry(const SWKey *inkey) { + + long start; + unsigned short size; + char *tmpbuf; + const VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (size) { + tmpbuf = new char [ size + 2]; + readtext(key->Testament(), start, size + 2, tmpbuf); + + if (key != inkey) + delete key; + key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + settext(key->Testament(), key->Index(), tmpbuf); + } + + if (key != inkey) + delete key; +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawFiles::deleteEntry() { + + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * RawFiles::getnextfilename - generates a valid filename in which to store + * an entry + * + * RET: filename + */ + +char *RawFiles::getnextfilename() { + static char incfile[255]; + long number; + FileDesc *datafile; + + sprintf(incfile, "%s/incfile", path); + datafile = FileMgr::systemFileMgr.open(incfile, O_RDONLY|O_BINARY); + if (read(datafile->getFd(), &number, 4) != 4) + number = 0; + number++; + FileMgr::systemFileMgr.close(datafile); + + datafile = FileMgr::systemFileMgr.open(incfile, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + write(datafile->getFd(), &number, 4); + FileMgr::systemFileMgr.close(datafile); + sprintf(incfile, "%.7ld", number-1); + return incfile; +} + + +char RawFiles::createModule (const char *path) { + char *incfile = new char [ strlen (path) + 16 ]; + static long zero = 0; + FileDesc *datafile; + + sprintf(incfile, "%s/incfile", path); + datafile = FileMgr::systemFileMgr.open(incfile, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + delete [] incfile; + write(datafile->getFd(), &zero, 4); + FileMgr::systemFileMgr.close(datafile); + + return RawVerse::createModule (path); +} + + + diff --git a/src/modules/comments/swcom.cpp b/src/modules/comments/swcom.cpp new file mode 100644 index 0000000..1feb0cf --- /dev/null +++ b/src/modules/comments/swcom.cpp @@ -0,0 +1,30 @@ +/****************************************************************************** + * swcom.cpp - code for base class 'SWCom'- The basis for all commentary + * modules + */ + +#include <swcom.h> + + +/****************************************************************************** + * SWCom Constructor - Initializes data for instance of SWCom + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWCom::SWCom(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, "Commentaries", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); +} + + +/****************************************************************************** + * SWCom Destructor - Cleans up instance of SWCom + */ + +SWCom::~SWCom() +{ +} diff --git a/src/modules/comments/zcom/zcom.cpp b/src/modules/comments/zcom/zcom.cpp new file mode 100644 index 0000000..a0b35c3 --- /dev/null +++ b/src/modules/comments/zcom/zcom.cpp @@ -0,0 +1,255 @@ +/****************************************************************************** + * rawcom.cpp - code for class 'zCom'- a module that reads raw commentary + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <zverse.h> +#include <zcom.h> + + +/****************************************************************************** + * zCom Constructor - Initializes data for instance of zCom + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zCom::zCom(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWCom(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ +{ + blockType = iblockType; + lastWriteKey = 0; +} + +/****************************************************************************** + * zCom Destructor - Cleans up instance of zCom + */ + +zCom::~zCom() { + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; +} + +/****************************************************************************** + * zCom::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ +char *zCom::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + zreadtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +bool zCom::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + +void zCom::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, key)) { + flushCache(); + } + delete lastWriteKey; + } + + settext(key->Testament(), key->Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key->clone(); // must delete + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void zCom::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) { + } + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + +/****************************************************************************** + * zCom::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zCom::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * zCom::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void zCom::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} + diff --git a/src/modules/common/entriesblk.cpp b/src/modules/common/entriesblk.cpp new file mode 100644 index 0000000..d38cf53 --- /dev/null +++ b/src/modules/common/entriesblk.cpp @@ -0,0 +1,166 @@ +#include <entriesblk.h> +#include <stdlib.h> +#include <string.h> + +const int EntriesBlock::METAHEADERSIZE = 4; + // count(4); +const int EntriesBlock::METAENTRYSIZE = 8; + // offset(4); size(4); + +EntriesBlock::EntriesBlock(const char *iBlock, unsigned long size) { + block = (char *)calloc(1, size); + memcpy(block, iBlock, size); +} + + +EntriesBlock::EntriesBlock() { + block = (char *)calloc(1, sizeof(__u32)); +} + + +EntriesBlock::~EntriesBlock() { + free(block); +} + + +void EntriesBlock::setCount(int count) { + __u32 rawCount = archtosword32(count); + memcpy(block, &rawCount, sizeof(__u32)); +} + + +int EntriesBlock::getCount() { + __u32 count = 0; + memcpy(&count, block, sizeof(__u32)); + count = swordtoarch32(count); + return count; +} + + +void EntriesBlock::getMetaEntry(int index, unsigned long *offset, unsigned long *size) { + __u32 rawOffset = 0; + __u32 rawSize = 0; + *offset = 0; + *size = 0; + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(&rawOffset, block + METAHEADERSIZE + (index * METAENTRYSIZE), sizeof(rawOffset)); + memcpy(&rawSize, block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), sizeof(rawSize)); + + *offset = (unsigned long)swordtoarch32(rawOffset); + *size = (unsigned long)swordtoarch32(rawSize); +} + + +void EntriesBlock::setMetaEntry(int index, unsigned long offset, unsigned long size) { + __u32 rawOffset = archtosword32(offset); + __u32 rawSize = archtosword32(size); + + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE), &rawOffset, sizeof(rawOffset)); + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), &rawSize, sizeof(rawSize)); +} + + +const char *EntriesBlock::getRawData(unsigned long *retSize) { + unsigned long max = 4; + int loop; + unsigned long offset; + unsigned long size; + for (loop = 0; loop < getCount(); loop++) { + getMetaEntry(loop, &offset, &size); + max = ((offset + size) > max) ? (offset + size) : max; + } + *retSize = max; + return block; +} + + +int EntriesBlock::addEntry(const char *entry) { + unsigned long dataSize; + getRawData(&dataSize); + unsigned long len = strlen(entry); + unsigned long offset; + unsigned long size; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + // new meta entry + new data size + 1 because null + block = (char *)realloc(block, dataSize + METAENTRYSIZE + len + 1); + // shift right to make room for new meta entry + memmove(block + dataStart + METAENTRYSIZE, block + dataStart, dataSize - dataStart); + + for (int loop = 0; loop < count; loop++) { + getMetaEntry(loop, &offset, &size); + if (offset) { // if not a deleted entry + offset += METAENTRYSIZE; + setMetaEntry(loop, offset, size); + } + } + + offset = dataSize; // original dataSize before realloc + size = len + 1; + // add our text to the end + memcpy(block + offset + METAENTRYSIZE, entry, size); + // increment count + setCount(count + 1); + // add our meta entry + setMetaEntry(count, offset + METAENTRYSIZE, size); + // return index of our new entry + return count; +} + + +const char *EntriesBlock::getEntry(int entryIndex) { + unsigned long offset; + unsigned long size; + static char *empty = ""; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? block+offset : empty; +} + + +unsigned long EntriesBlock::getEntrySize(int entryIndex) { + unsigned long offset; + unsigned long size; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? size : 0; +} + + +void EntriesBlock::removeEntry(int entryIndex) { + unsigned long offset; + unsigned long size, size2; + unsigned long dataSize; + getRawData(&dataSize); + getMetaEntry(entryIndex, &offset, &size); + unsigned long len = size - 1; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + + if (!offset) // already deleted + return; + + // shift left to retrieve space used for old entry + memmove(block + offset, block + offset + size, dataSize - (offset + size)); + + // fix offset for all entries after our entry that were shifted left + for (int loop = entryIndex + 1; loop < count; loop++) { + getMetaEntry(loop, &offset, &size2); + if (offset) { // if not a deleted entry + offset -= size; + setMetaEntry(loop, offset, size2); + } + } + + // zero out our meta entry + setMetaEntry(entryIndex, 0L, 0); +} + + diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp new file mode 100644 index 0000000..3606fbc --- /dev/null +++ b/src/modules/common/lzsscomprs.cpp @@ -0,0 +1,665 @@ +/****************************************************************************** + * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that + * provides LZSS compression + */ + +#include <string.h> +#include <stdlib.h> +#include <lzsscomprs.h> + + +/****************************************************************************** + * LZSSCompress Statics + */ + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char LZSSCompress::m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int LZSSCompress::m_match_position; +short int LZSSCompress::m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int LZSSCompress::m_lson[N + 1]; +short int LZSSCompress::m_rson[N + 257]; +short int LZSSCompress::m_dad[N + 1]; + + +/****************************************************************************** + * LZSSCompress Constructor - Initializes data for instance of LZSSCompress + * + */ + +LZSSCompress::LZSSCompress() : SWCompress() { +} + + +/****************************************************************************** + * LZSSCompress Destructor - Cleans up instance of LZSSCompress + */ + +LZSSCompress::~LZSSCompress() { +} + + +/****************************************************************************** + * LZSSCompress::InitTree - This function initializes the tree nodes to + * "empty" states. + */ + +void LZSSCompress::InitTree(void) { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) { + m_rson[i] = NOT_USED; + } +} + + +/****************************************************************************** + * LZSSCompress::InsertNode - This function inserts a string from the ring + * buffer into one of the trees. It loads the + * match position and length member variables + * for the longest match. + * + * The string to be inserted is identified by + * the parameter Pos, A full F bytes are + * inserted. So, + * m_ring_buffer[Pos ... Pos+F-1] + * are inserted. + * + * If the matched length is exactly F, then an + * old node is removed in favor of the new one + * (because the old one will be deleted + * sooner). + * + * Note that Pos plays a dual role. It is + * used as both a position in the ring buffer + * and also as a tree node. + * m_ring_buffer[Pos] defines a character that + * is used to identify a tree node. + * + * ENT: pos - position in the buffer + */ + +void LZSSCompress::InsertNode(short int Pos) +{ + short int i; + short int p; + int cmp; + unsigned char * key; + +/* + ASSERT(Pos >= 0); + ASSERT(Pos < N); +*/ + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) { + if (cmp >= 0) { + if (m_rson[p] != NOT_USED) { + p = m_rson[p]; + } + else { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else { + if (m_lson[p] != NOT_USED) { + p = m_lson[p]; + } + else { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) { + m_rson[ m_dad[p] ] = Pos; + } + else { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::DeleteNode - This function removes the node "Node" from the + * tree. + * + * ENT: node - node to be removed + */ + +void LZSSCompress::DeleteNode(short int Node) +{ + short int q; + +/* + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); +*/ + + if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) { + q = m_rson[Node]; + } + else { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) { + do { + q = m_rson[q]; + } while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) { + m_rson[ m_dad[Node] ] = q; + } + else { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void LZSSCompress::Encode(void) +{ + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a <position,length> pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight <position,length> pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars((char *) &(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (!mask) { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars((char *) code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) { + // Get next character... + + if (GetChars((char *) &c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars((char *) code_buf, code_buf_pos); + } + + + // must set zlen for parent class to know length of compressed buffer + zlen = zpos; +} + + +/****************************************************************************** + * LZSSCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void LZSSCompress::Decode(void) +{ + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + unsigned long totalLen = 0; + + direct = 1; // set direction needed by parent [Get|Send]Chars() + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else { + // Next byte must be a flag. + + if (GetChars((char *) &flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) { + if (GetChars((char *) c, 1) != 1) + break; + + if (SendChars((char *) c, 1) != 1) { + totalLen++; + break; + } + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // <position,length> pair. The position is in 12 bits and + // the length is in 4 bits. + + else { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars((char *) c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" :characters to the output stream. + + if (SendChars((char *) c, len) != (unsigned int)len) { + totalLen += len; + break; + } + } + } + slen = totalLen; +} diff --git a/src/modules/common/rawstr.cpp b/src/modules/common/rawstr.cpp new file mode 100644 index 0000000..c7363d9 --- /dev/null +++ b/src/modules/common/rawstr.cpp @@ -0,0 +1,551 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <rawstr.h> +#include <sysdata.h> +/****************************************************************************** + * RawStr Statics + */ + +int RawStr::instance = 0; +char RawStr::nl = '\n'; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr::RawStr(const char *ipath, int fileMode) +{ + char buf[127]; + + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr::~RawStr() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbufdat(long ioffset, char **buf) +{ + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + toupperstr_utf8(*buf); + } +} + + +/****************************************************************************** + * RawStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr::findoffset(const char *ikey, long *start, unsigned short *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 6; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + toupperstr_utf8(key); + + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-6:6; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 6) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + while (away) { + long laststart = *start; + unsigned short lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 6 : -6; + + bool bad = false; + if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr::preptext(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr::readtext(long istart, unsigned short *isize, char **idxbuf, char **buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + if (*buf) + delete [] *buf; + *buf = new char [ ++(*isize) * FILTERPAD ]; + *idxbuf = new char [ (*isize) * FILTERPAD ]; + + memset(*buf, 0, *isize); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)((*isize) - 1)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, *isize - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(*buf + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr::settext(const char *ikey, const char *buf, long len) +{ + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned short size; + unsigned short outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + toupperstr_utf8(key); + + len = (len < 0) ? strlen(buf) : len; + + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != -2) // not a new file + idxoff += 6; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 6, &start, &size, 0, &idxoff); + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + (len); + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len > 0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (int)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 2); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+6, shiftSize-6); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + settext(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawstr4.cpp b/src/modules/common/rawstr4.cpp new file mode 100644 index 0000000..da0789b --- /dev/null +++ b/src/modules/common/rawstr4.cpp @@ -0,0 +1,555 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <rawstr4.h> +#include <sysdata.h> + +/****************************************************************************** + * RawStr Statics + */ + +int RawStr4::instance = 0; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr4::RawStr4(const char *ipath, int fileMode) +{ + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr4::~RawStr4() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr4::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbufdat(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr4::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) { + *targetbuf = *trybuf; + } + *targetbuf = 0; + trybuf = 0; + toupperstr_utf8(targetbuf); + } +} + + +/****************************************************************************** + * RawStr4::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr4::findoffset(const char *ikey, long *start, unsigned long *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 8; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + toupperstr_utf8(key); + + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf) { // In case of extra entry at end of idx + tryoff += (tryoff > (maxoff / 2))?-8:8; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 8) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + while (away) { + long laststart = *start; + unsigned long lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 8 : -8; + + bool bad = false; + if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr4::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr4::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr4::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr4::readtext(long istart, unsigned long *isize, char **idxbuf, char **buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + if (*buf) + delete [] *buf; + *buf = new char [ ++(*isize) * FILTERPAD ]; + *idxbuf = new char [ (*isize) * FILTERPAD ]; + + memset(*buf, 0, *isize); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)((*isize) - 1)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, *isize - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(*buf + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr4::setText(const char *ikey, const char *buf, long len) { + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned long size; + unsigned long outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + toupperstr_utf8(key); + + len = (len < 0) ? strlen(buf) : len; + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != -2) // not a new file + idxoff += 8; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0/*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len > 0)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 8, &start, &size, 0, &idxoff); + ++size; + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + len; + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len>0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (long)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 4); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+8, shiftSize-8); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr4::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr4::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawverse.cpp b/src/modules/common/rawverse.cpp new file mode 100644 index 0000000..f77fbe5 --- /dev/null +++ b/src/modules/common/rawverse.cpp @@ -0,0 +1,348 @@ +/****************************************************************************** + * rawverse.cpp - code for class 'RawVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#include <sys/stat.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <versekey.h> +#include <sysdata.h> + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + +/****************************************************************************** + * RawVerse Statics + */ + +int RawVerse::instance = 0; +const char *RawVerse::nl = "\r\n"; + + +/****************************************************************************** + * RawVerse Constructor - Initializes data for instance of RawVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawVerse::RawVerse(const char *ipath, int fileMode) +{ + char *buf; + + path = 0; + stdstr(&path, ipath); + buf = new char [ strlen(path) + 80 ]; + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.vss", path); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.vss", path); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot", path); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt", path); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + instance++; +} + + +/****************************************************************************** + * RawVerse Destructor - Cleans up instance of RawVerse + */ + +RawVerse::~RawVerse() +{ + int loop1; + + if (path) + delete [] path; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + } +} + + +/****************************************************************************** + * RawVerse::findoffset - Finds the offset of the key verse from the indexes + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void RawVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) { + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + if (idxfp[testmt-1]->getFd() >= 0) { + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), start, 4); + long len = read(idxfp[testmt-1]->getFd(), size, 2); // read size + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (len < 2) { + *size = (unsigned short)((*start) ? (lseek(textfp[testmt-1]->getFd(), 0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file + } + } + else { + *start = 0; + *size = 0; + } +} + + +/****************************************************************************** + * RawVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = 10; +// *to++ = nl[1]; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; +// *to++ = nl[0]; + *to++ = 10; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawVerse::readtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 2 (null)(null) + * buf - buffer to store text + * + */ + +void RawVerse::readtext(char testmt, long start, unsigned short size, char *buf) { + memset(buf, 0, size+1); + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + if (size) { + if (textfp[testmt-1]->getFd() >= 0) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + read(textfp[testmt-1]->getFd(), buf, (int)size - 2); + } + } +} + + +/****************************************************************************** + * RawVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawVerse::settext(char testmt, long idxoff, const char *buf, long len) +{ + long start, outstart; + unsigned short size; + unsigned short outsize; + + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + size = outsize = (len < 0) ? strlen(buf) : len; + + start = outstart = lseek(textfp[testmt-1]->getFd(), 0, SEEK_END); + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + + if (size) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + write(textfp[testmt-1]->getFd(), buf, (int)size); + + // add a new line to make data file easier to read in an editor + write(textfp[testmt-1]->getFd(), nl, 2); + } + else { + start = 0; + } + + outstart = archtosword32(start); + outsize = archtosword16(size); + + write(idxfp[testmt-1]->getFd(), &outstart, 4); + write(idxfp[testmt-1]->getFd(), &outsize, 2); + + +} + + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long start; + unsigned short size; + + destidxoff *= 6; + srcidxoff *= 6; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(idxfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), &start, 4); + read(idxfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(idxfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(idxfp[testmt-1]->getFd(), &start, 4); + write(idxfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char RawVerse::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.vss", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.vss", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} diff --git a/src/modules/common/sapphire.cpp b/src/modules/common/sapphire.cpp new file mode 100644 index 0000000..686bccb --- /dev/null +++ b/src/modules/common/sapphire.cpp @@ -0,0 +1,228 @@ +/* sapphire.cpp -- the Saphire II stream cipher class. + Dedicated to the Public Domain the author and inventor: + (Michael Paul Johnson). This code comes with no warranty. + Use it at your own risk. + Ported from the Pascal implementation of the Sapphire Stream + Cipher 9 December 1994. + Added hash pre- and post-processing 27 December 1994. + Modified initialization to make index variables key dependent, + made the output function more resistant to cryptanalysis, + and renamed to Sapphire II 2 January 1995 +*/ + + +#ifdef WIN32 +#include <memory.h> +#endif + +#ifdef UNIX +#include <memory.h> +#include <unistd.h> +#else +#ifndef _MSC_VER +#include <mem.h> +#endif +#endif + +#ifdef _WIN32_WCE +#include <string.h> +#endif + +#include "sapphire.h" + +unsigned char sapphire::keyrand(int limit, + unsigned char *user_key, + unsigned char keysize, + unsigned char *rsum, + unsigned *keypos) + { + unsigned u, // Value from 0 to limit to return. + retry_limiter, // No infinite loops allowed. + mask; // Select just enough bits. + + if (!limit) return 0; // Avoid divide by zero error. + retry_limiter = 0; + mask = 1; // Fill mask with enough bits to cover + while (mask < (unsigned)limit) // the desired range. + mask = (mask << 1) + 1; + do + { + *rsum = cards[*rsum] + user_key[(*keypos)++]; + if (*keypos >= keysize) + { + *keypos = 0; // Recycle the user key. + *rsum += keysize; // key "aaaa" != key "aaaaaaaa" + } + u = mask & *rsum; + if (++retry_limiter > 11) + u %= limit; // Prevent very rare long loops. + } + while (u > (unsigned)limit); + return u; + } + +void sapphire::initialize(unsigned char *key, unsigned char keysize) + { + // Key size may be up to 256 bytes. + // Pass phrases may be used directly, with longer length + // compensating for the low entropy expected in such keys. + // Alternatively, shorter keys hashed from a pass phrase or + // generated randomly may be used. For random keys, lengths + // of from 4 to 16 bytes are recommended, depending on how + // secure you want this to be. + + int i; + unsigned char toswap, swaptemp, rsum; + unsigned keypos; + + // If we have been given no key, assume the default hash setup. + + if (keysize < 1) + { + hash_init(); + return; + } + + // Start with cards all in order, one of each. + + for (i=0;i<256;i++) + cards[i] = i; + + // Swap the card at each position with some other card. + + toswap = 0; + keypos = 0; // Start with first byte of user key. + rsum = 0; + for (i=255;i>=0;i--) + { + toswap = keyrand(i, key, keysize, &rsum, &keypos); + swaptemp = cards[i]; + cards[i] = cards[toswap]; + cards[toswap] = swaptemp; + } + + // Initialize the indices and data dependencies. + // Indices are set to different values instead of all 0 + // to reduce what is known about the state of the cards + // when the first byte is emitted. + + rotor = cards[1]; + ratchet = cards[3]; + avalanche = cards[5]; + last_plain = cards[7]; + last_cipher = cards[rsum]; + + toswap = swaptemp = rsum = 0; + keypos = 0; + } + +void sapphire::hash_init(void) + { + // This function is used to initialize non-keyed hash + // computation. + + int i, j; + + // Initialize the indices and data dependencies. + + rotor = 1; + ratchet = 3; + avalanche = 5; + last_plain = 7; + last_cipher = 11; + + // Start with cards all in inverse order. + + for (i=0, j=255;i<256;i++,j--) + cards[i] = (unsigned char) j; + } + +sapphire::sapphire(unsigned char *key, unsigned char keysize) + { + if (key && keysize) + initialize(key, keysize); + } + +void sapphire::burn(void) + { + // Destroy the key and state information in RAM. + memset(cards, 0, 256); + rotor = ratchet = avalanche = last_plain = last_cipher = 0; + } + +sapphire::~sapphire() + { + burn(); + } + +unsigned char sapphire::encrypt(unsigned char b) + { +#ifdef USBINARY + // Picture a single enigma rotor with 256 positions, rewired + // on the fly by card-shuffling. + + // This cipher is a variant of one invented and written + // by Michael Paul Johnson in November, 1993. + + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_cipher = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_plain = b; + return last_cipher; +#else + return b; +#endif + } + +unsigned char sapphire::decrypt(unsigned char b) + { + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_plain = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_cipher = b; + return last_plain; + } + +void sapphire::hash_final(unsigned char *hash, // Destination + unsigned char hashlength) // Size of hash. + { + int i; + + for (i=255;i>=0;i--) + encrypt((unsigned char) i); + for (i=0;i<hashlength;i++) + hash[i] = encrypt(0); + } + diff --git a/src/modules/common/swcipher.cpp b/src/modules/common/swcipher.cpp new file mode 100644 index 0000000..d221b8b --- /dev/null +++ b/src/modules/common/swcipher.cpp @@ -0,0 +1,123 @@ +/****************************************************************************** + * swcipher.cpp - code for class 'SWCipher'- a driver class that provides + * cipher utilities. + */ + +#include <string.h> +#include <stdlib.h> +#include <swcipher.h> + + +/****************************************************************************** + * SWCipher Constructor - Initializes data for instance of SWCipher + * + */ + +SWCipher::SWCipher(unsigned char *key) { + master.initialize(key, strlen((char *)key)); + buf = 0; +} + + +/****************************************************************************** + * SWCipher Destructor - Cleans up instance of SWCipher + */ + +SWCipher::~SWCipher() +{ + if (buf) + free(buf); +} + + +char *SWCipher::Buf(const char *ibuf, unsigned int ilen) +{ + if (ibuf) { + + if (buf) + free(buf); + + if (!ilen) { + len = strlen(buf); + ilen = len + 1; + } + else len = ilen; + + buf = (char *) malloc(ilen); + memcpy(buf, ibuf, ilen); + cipher = false; + } + + Decode(); + + return buf; +} + + +char *SWCipher::cipherBuf(unsigned int *ilen, const char *ibuf) +{ + if (ibuf) { + + if (buf) + free(buf); + + buf = (char *) malloc(*ilen); + memcpy(buf, ibuf, *ilen); + len = *ilen; + cipher = true; + } + + Encode(); + + *ilen = (short)len; + return buf; +} + + +/****************************************************************************** + * SWCipher::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Encode(void) +{ + if (!cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.encrypt(buf[i]); + cipher = true; + } +} + + +/****************************************************************************** + * SWCipher::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Decode(void) +{ + if (cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.decrypt(buf[i]); + cipher = false; + } +} + + +/****************************************************************************** + * SWCipher::setCipherKey - setter for a new CipherKey + * + */ + +void SWCipher::setCipherKey(const char *ikey) { + unsigned char *key = (unsigned char *)ikey; + master.initialize(key, strlen((char *)key)); +} diff --git a/src/modules/common/swcomprs.cpp b/src/modules/common/swcomprs.cpp new file mode 100644 index 0000000..4bd2e5e --- /dev/null +++ b/src/modules/common/swcomprs.cpp @@ -0,0 +1,190 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'SWCompress'- a driver class that provides + * compression utilities. + */ + +#include <string.h> +#include <stdlib.h> +#include <swcomprs.h> + + +/****************************************************************************** + * SWCompress Constructor - Initializes data for instance of SWCompress + * + */ + +SWCompress::SWCompress() +{ + buf = zbuf = 0; + Init(); +} + + +/****************************************************************************** + * SWCompress Destructor - Cleans up instance of SWCompress + */ + +SWCompress::~SWCompress() +{ + if (zbuf) + free(zbuf); + + if (buf) + free(buf); +} + + +void SWCompress::Init() +{ + if (buf) + free(buf); + + if (zbuf) + free(zbuf); + + buf = 0; + zbuf = 0; + direct = 0; + zlen = 0; + slen = 0; + zpos = 0; + pos = 0; +} + + +char *SWCompress::Buf(const char *ibuf, unsigned long *len) { + // setting an uncompressed buffer + if (ibuf) { + Init(); + slen = (len) ? *len : strlen(ibuf); + buf = (char *) calloc(slen + 1, 1); + memcpy(buf, ibuf, slen); + } + + // getting an uncompressed buffer + if (!buf) { + buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return; + direct = 1; + Decode(); +// slen = strlen(buf); + if (len) + *len = slen; + } + return buf; +} + + +char *SWCompress::zBuf(unsigned long *len, char *ibuf) +{ + // setting a compressed buffer + if (ibuf) { + Init(); + zbuf = (char *) malloc(*len); + memcpy(zbuf, ibuf, *len); + zlen = *len; + } + + // getting a compressed buffer + if (!zbuf) { + direct = 0; + Encode(); + } + + *len = zlen; + return zbuf; +} + + +unsigned long SWCompress::GetChars(char *ibuf, unsigned long len) +{ + if (direct) { + len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos); + if (len > 0) { + memmove(ibuf, &zbuf[zpos], len); + zpos += len; + } + } + else { +// slen = strlen(buf); + len = (((slen - pos) > (unsigned)len) ? len : slen - pos); + if (len > 0) { + memmove(ibuf, &buf[pos], len); + pos += len; + } + } + return len; +} + + +unsigned long SWCompress::SendChars(char *ibuf, unsigned long len) +{ + if (direct) { + if (buf) { +// slen = strlen(buf); + if ((pos + len) > (unsigned)slen) { + buf = (char *) realloc(buf, pos + len + 1024); + memset(&buf[pos], 0, len + 1024); + } + } + else buf = (char *)calloc(1, len + 1024); + memmove(&buf[pos], ibuf, len); + pos += len; + } + else { + if (zbuf) { + if ((zpos + len) > zlen) { + zbuf = (char *) realloc(zbuf, zpos + len + 1024); + zlen = zpos + len + 1024; + } + } + else { + zbuf = (char *)calloc(1, len + 1024); + zlen = len + 1024; + } + memmove(&zbuf[zpos], ibuf, len); + zpos += len; + } + return len; +} + + +/****************************************************************************** + * SWCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Encode(void) +{ + cycleStream(); +} + + +/****************************************************************************** + * SWCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Decode(void) +{ + cycleStream(); +} + + +void SWCompress::cycleStream() { + char buf[1024]; + unsigned long len, totlen = 0; + + do { + len = GetChars(buf, 1024); + if (len) + totlen += SendChars(buf, len); + } while (len == 1024); + + zlen = slen = totlen; +} diff --git a/src/modules/common/zipcomprs.cpp b/src/modules/common/zipcomprs.cpp new file mode 100644 index 0000000..01ba430 --- /dev/null +++ b/src/modules/common/zipcomprs.cpp @@ -0,0 +1,158 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'ZipCompress'- a driver class that provides + * compression utilities. - using zlib + */ + +#include <string.h> +#include <string> +#include <stdlib.h> +#include <stdio.h> +#include <zipcomprs.h> +#include <zlib.h> + +/****************************************************************************** + * ZipCompress Constructor - Initializes data for instance of ZipCompress + * + */ + +ZipCompress::ZipCompress() : SWCompress() +{ +// fprintf(stderr, "init compress\n"); +} + + +/****************************************************************************** + * ZipCompress Destructor - Cleans up instance of ZipCompress + */ + +ZipCompress::~ZipCompress() { +} + + +/****************************************************************************** + * ZipCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void ZipCompress::Encode(void) +{ +/* +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // get buffer + char chunk[1024]; + char *buf = (char *)calloc(1, 1024); + char *chunkbuf = buf; + unsigned long chunklen; + unsigned long len = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + len += chunklen; + if (chunklen < 1023) + break; + else buf = (char *)realloc(buf, len + 1024); + chunkbuf = buf+len; + } + + + zlen = (long) (len*1.001)+15; + char *zbuf = new char[zlen+1]; + if (len) + { + //printf("Doing compress\n"); + if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len)!=Z_OK) + { + printf("ERROR in compression\n"); + } + else { + SendChars(zbuf, zlen); + } + } + else + { + fprintf(stderr, "No buffer to compress\n"); + } + delete [] zbuf; + free (buf); +} + + +/****************************************************************************** + * ZipCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void ZipCompress::Decode(void) +{ +/* +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ + + // get buffer + char chunk[1024]; + char *zbuf = (char *)calloc(1, 1024); + char *chunkbuf = zbuf; + int chunklen; + unsigned long zlen = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + zlen += chunklen; + if (chunklen < 1023) + break; + else zbuf = (char *)realloc(zbuf, zlen + 1024); + chunkbuf = zbuf + zlen; + } + + //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen); + if (zlen) { + unsigned long blen = zlen*20; // trust compression is less than 1000% + char *buf = new char[blen]; + //printf("Doing decompress {%s}\n", zbuf); + if (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen) != Z_OK) { + fprintf(stderr, "no room in outbuffer to during decompression. see zipcomp.cpp\n"); + } + SendChars(buf, blen); + delete [] buf; + slen = blen; + } + else { + fprintf(stderr, "No buffer to decompress!\n"); + } + //printf("Finished decoding\n"); + free (zbuf); +} diff --git a/src/modules/common/zstr.cpp b/src/modules/common/zstr.cpp new file mode 100644 index 0000000..cd1add5 --- /dev/null +++ b/src/modules/common/zstr.cpp @@ -0,0 +1,705 @@ +/****************************************************************************** + * zstr.cpp - code for class 'zStr'- a module that reads compressed text + * files and provides lookup and parsing functions based on + * class StrKey + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <zstr.h> +#include <swcomprs.h> + +#include <sysdata.h> +#include <entriesblk.h> + +/****************************************************************************** + * zStr Statics + */ + +int zStr::instance = 0; +const int zStr::IDXENTRYSIZE = 8; +const int zStr::ZDXENTRYSIZE = 8; + + +/****************************************************************************** + * zStr Constructor - Initializes data for instance of zStr + * + * ENT: ipath - path of the directory where data and index files are located. + */ + +zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp) { + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + + compressor = (icomp) ? icomp : new SWCompress(); + this->blockCount = blockCount; +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdx", path); + zdxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdt", path); + zdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd <= 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + cacheBlock = 0; + cacheBlockIndex = -1; + cacheDirty = false; + + instance++; +} + + +/****************************************************************************** + * zStr Destructor - Cleans up instance of zStr + */ + +zStr::~zStr() { + + flushCache(); + + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); + FileMgr::systemFileMgr.close(zdxfd); + FileMgr::systemFileMgr.close(zdtfd); + + + if (compressor) + delete compressor; + +} + + +/****************************************************************************** + * zStr::getidxbufdat - Gets the index string at the given dat offset + * NOTE: buf is calloc'd, or if not null, realloc'd and must + * be free'd by calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromDatOffset(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * zStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is calloc'd, or if not null, realloc'd + * and must be freed by calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromIdxOffset(long ioffset, char **buf) { + __u32 offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, sizeof(__u32)); + offset = swordtoarch32(offset); + getKeyFromDatOffset(offset, buf); + } +} + + +/****************************************************************************** + * zStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * offset - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status + */ + +signed char zStr::findKeyIndex(const char *ikey, long *idxoff, long away) { + char *trybuf = 0, *key = 0, quitflag = 0; + signed char retval = 0; + __s32 headoff, tailoff, tryoff = 0, maxoff = 0; + __u32 start, size; + + if (idxfd->getFd() >= 0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - IDXENTRYSIZE; + if (*ikey) { + headoff = 0; + stdstr(&key, ikey); + toupperstr_utf8(key); + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + (((((tailoff / IDXENTRYSIZE) - (headoff / IDXENTRYSIZE))) / 2) * IDXENTRYSIZE) : lastoff; + lastoff = -1; + + getKeyFromIdxOffset(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-IDXENTRYSIZE:IDXENTRYSIZE; + retval = -1; + break; + } + + int diff = strcmp(key, trybuf); + if (!diff) + break; + + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + IDXENTRYSIZE) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else { tryoff = 0; } + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + start = size = 0; + retval = (read(idxfd->getFd(), &start, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + retval = (read(idxfd->getFd(), &size, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + while (away) { + __u32 laststart = start; + __u32 lastsize = size; + __s32 lasttry = tryoff; + tryoff += (away > 0) ? IDXENTRYSIZE : -IDXENTRYSIZE; + + bool bad = false; + if (((long)(tryoff + (away*IDXENTRYSIZE)) < -IDXENTRYSIZE) || (tryoff + (away*IDXENTRYSIZE) > (maxoff+IDXENTRYSIZE))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + start = laststart; + size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + + if (((laststart != start) || (lastsize != size)) && (start >= 0) && (size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * zStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zStr::prepText(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * zStr::getText - gets text at a given offset + * + * ENT: + * offset - idxoffset where the key is located. + * buf - buffer to store text + * idxbuf - buffer to store index key + * NOTE: buffer will be alloc'd / realloc'd and + * should be free'd by the client + * + */ + +void zStr::getText(long offset, char **idxbuf, char **buf) { + char *ch; + char *idxbuflocal = 0; + getKeyFromIdxOffset(offset, &idxbuflocal); + __u32 start; + __u32 size; + + do { + lseek(idxfd->getFd(), offset, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + *idxbuf = (*idxbuf) ? (char *)realloc(*idxbuf, size*2 + 1) : (char *)malloc(size*2 + 1); + memset(*buf, 0, size + 1); + memset(*idxbuf, 0, size + 1); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)(size)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, size - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(*buf + 6, &offset); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + __u32 localsize = strlen(idxbuflocal); + localsize = (localsize < (size - 1)) ? localsize : (size - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } + __u32 block = 0; + __u32 entry = 0; + memmove(&block, *buf, sizeof(__u32)); + memmove(&entry, *buf + sizeof(__u32), sizeof(__u32)); + block = swordtoarch32(block); + entry = swordtoarch32(entry); + getCompressedText(block, entry, buf); +} + + +/****************************************************************************** + * zStr::getCompressedText - Get text entry from a compressed index / zdata + * file. + */ + +void zStr::getCompressedText(long block, long entry, char **buf) { + + __u32 size = 0; + + if (cacheBlockIndex != block) { + __u32 start = 0; + + lseek(zdxfd->getFd(), block * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + + lseek(zdtfd->getFd(), start, SEEK_SET); + read(zdtfd->getFd(), *buf, size); + + flushCache(); + + unsigned long len = size; + compressor->zBuf(&len, *buf); + char * rawBuf = compressor->Buf(0, &len); + cacheBlock = new EntriesBlock(rawBuf, len); + cacheBlockIndex = block; + } + size = cacheBlock->getEntrySize(entry); + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + strcpy(*buf, cacheBlock->getEntry(entry)); +} + + +/****************************************************************************** + * zLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zStr::setText(const char *ikey, const char *buf, long len) { + + __u32 start, outstart; + __u32 size, outsize; + __s32 endoff; + long idxoff = 0; + __s32 shiftSize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + len = (len < 0) ? strlen(buf) : len; + stdstr(&key, ikey); + toupperstr_utf8(key); + + char notFound = findKeyIndex(ikey, &idxoff, 0); + if (!notFound) { + getKeyFromIdxOffset(idxoff, &dbKey); + int diff = strcmp(key, dbKey); + if (diff < 0) { + } + else if (diff > 0) { + idxoff += IDXENTRYSIZE; + } + else if ((!diff) && (len > 0 /*we're not deleting*/)) { // got absolute entry + do { + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, size); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(tmpbuf + IDXENTRYSIZE, &idxoff); + delete [] tmpbuf; + } + else break; + } + while (true); // while we're resolving links + } + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + if (len > 0) { // NOT a link + if (!cacheBlock) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + else if (cacheBlock->getCount() >= blockCount) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + __u32 entry = cacheBlock->addEntry(buf); + cacheDirty = true; + outstart = archtosword32(cacheBlockIndex); + outsize = archtosword32(entry); + memcpy (outbuf + size, &outstart, sizeof(__u32)); + memcpy (outbuf + size + sizeof(__u32), &outsize, sizeof(__u32)); + size += (sizeof(__u32) * 2); + } + else { // link + memcpy(outbuf + size, buf, len); + size += len; + } + + start = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len > 0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, sizeof(__u32)); + write(idxfd->getFd(), &outsize, sizeof(__u32)); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+IDXENTRYSIZE, shiftSize-IDXENTRYSIZE); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + } + } + + if (idxBytes) + delete [] idxBytes; + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * zLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zStr::linkEntry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +void zStr::flushCache() { + if (cacheBlock) { + if (cacheDirty) { + __u32 start = 0; + unsigned long size = 0; + __u32 outstart = 0, outsize = 0; + + const char *rawBuf = cacheBlock->getRawData(&size); + compressor->Buf(rawBuf, &size); + compressor->zBuf(&size); + + long zdxSize = lseek(zdxfd->getFd(), 0, SEEK_END); + long zdtSize = lseek(zdtfd->getFd(), 0, SEEK_END); + + if ((cacheBlockIndex * ZDXENTRYSIZE) > (zdxSize - ZDXENTRYSIZE)) { // New Block + start = zdtSize; + } + else { + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &outsize, sizeof(__u32)); + start = swordtoarch32(start); + outsize = swordtoarch32(outsize); + if (start + outsize >= zdtSize) { // last entry, just overwrite + // start is already set + } + else if (size < outsize) { // middle entry, but smaller, that's fine and let's preserve bigger size + size = outsize; + } + else { // middle and bigger-- we have serious problems, for now let's put it at the end = lots of wasted space + start = zdtSize; + } + } + + + + outstart = archtosword32(start); + outsize = archtosword32((__u32)size); + + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + lseek(zdtfd->getFd(), start, SEEK_SET); + rawBuf = compressor->zBuf(&size); + write(zdtfd->getFd(), rawBuf, size); + + // add a new line to make data file easier to read in an editor + write(zdtfd->getFd(), &nl, 2); + + write(zdxfd->getFd(), &outstart, sizeof(__u32)); + write(zdxfd->getFd(), &outsize, sizeof(__u32)); + + delete cacheBlock; + } + } + cacheBlockIndex = -1; + cacheBlock = 0; + cacheDirty = false; +} + + +/****************************************************************************** + * zLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char zStr::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdt", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/zverse.cpp b/src/modules/common/zverse.cpp new file mode 100644 index 0000000..6d76ddc --- /dev/null +++ b/src/modules/common/zverse.cpp @@ -0,0 +1,518 @@ +/****************************************************************************** + * zverse.h - code for class 'zVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey for compressed modules + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <versekey.h> +#include <zverse.h> +#include <sysdata.h> + + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +/****************************************************************************** + * zVerse Statics + */ + +int zVerse::instance = 0; + +const char zVerse::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b'}; + +/****************************************************************************** + * zVerse Constructor - Initializes data for instance of zVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + * fileMode - open mode for the files (O_RDONLY, etc.) + * blockType - verse, chapter, book, etc. + */ + +zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp) +{ + char buf[127]; + + nl = '\n'; + path = 0; + cacheBufIdx = -1; + cacheTestament = 0; + cacheBuf = 0; + dirtyCache = false; + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + compressor = (icomp) ? icomp : new SWCompress(); + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockType]); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockType]); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockType]); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockType]); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockType]); + compfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockType]); + compfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + instance++; +} + + +/****************************************************************************** + * zVerse Destructor - Cleans up instance of zVerse + */ + +zVerse::~zVerse() +{ + int loop1; + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + if (path) + delete [] path; + + if (compressor) + delete compressor; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + FileMgr::systemFileMgr.close(compfp[loop1]); + } +} + + +/****************************************************************************** + * zVerse::findoffset - Finds the offset of the key verse from the indexes + * + * + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * book - book to find (0 - testament introduction) + * chapter - chapter to find (0 - book introduction) + * verse - verse to find (0 - chapter introduction) + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void zVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) +{ + // set start to offset in + // set size to + // set + unsigned long ulBuffNum=0; // buffer number + unsigned long ulVerseStart=0; // verse offset within buffer + unsigned short usVerseSize=0; // verse size + unsigned long ulCompOffset=0; // compressed buffer start + unsigned long ulCompSize=0; // buffer size compressed + unsigned long ulUnCompSize=0; // buffer size uncompressed + char *pcCompText=NULL; // compressed text + + *start = *size = 0; + //printf ("Finding offset %ld\n", idxoff); + idxoff *= 10; + if (!testmt) { + testmt = ((idxfp[0]) ? 1:2); + } + + // assert we have and valid file descriptor + if (compfp[testmt-1]->getFd() < 1) + return; + + long newOffset = lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + if (newOffset == idxoff) { + if (read(compfp[testmt-1]->getFd(), &ulBuffNum, 4) != 4) { + printf ("Error reading ulBuffNum\n"); + return; + } + } + else return; + + ulBuffNum = swordtoarch32(ulBuffNum); + + if (read(compfp[testmt-1]->getFd(), &ulVerseStart, 4) < 2) + { + printf ("Error reading ulVerseStart\n"); + return; + } + if (read(compfp[testmt-1]->getFd(), &usVerseSize, 2) < 2) + { + printf ("Error reading usVerseSize\n"); + return; + } + + *start = swordtoarch32(ulVerseStart); + *size = swordtoarch16(usVerseSize); + + if (*size) { + if (((long) ulBuffNum == cacheBufIdx) && (testmt == cacheTestament) && (cacheBuf)) { + // have the text buffered + return; + } + + //printf ("Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize); + + + if (lseek(idxfp[testmt-1]->getFd(), ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12) + { + printf ("Error seeking compressed file index\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompOffset, 4)<4) + { + printf ("Error reading ulCompOffset\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompSize, 4)<4) + { + printf ("Error reading ulCompSize\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulUnCompSize, 4)<4) + { + printf ("Error reading ulUnCompSize\n"); + return; + } + + ulCompOffset = swordtoarch32(ulCompOffset); + ulCompSize = swordtoarch32(ulCompSize); + ulUnCompSize = swordtoarch32(ulUnCompSize); + + if (lseek(textfp[testmt-1]->getFd(), ulCompOffset, SEEK_SET)!=(long)ulCompOffset) + { + printf ("Error: could not seek to right place in compressed text\n"); + return; + } + pcCompText = new char[ulCompSize]; + + if (read(textfp[testmt-1]->getFd(), pcCompText, ulCompSize)<(long)ulCompSize) + { + printf ("Error reading compressed text\n"); + return; + } + compressor->zBuf(&ulCompSize, pcCompText); + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + unsigned long len = 0; + compressor->Buf(0, &len); + cacheBuf = (char *)calloc(len + 1, 1); + memcpy(cacheBuf, compressor->Buf(), len); + + cacheTestament = testmt; + cacheBufIdx = ulBuffNum; + if (pcCompText) + delete [] pcCompText; + } +} + + +/****************************************************************************** + * zVerse::zreadtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 1 (null) + * buf - buffer to store text + * + */ + +void zVerse::zreadtext(char testmt, long start, unsigned short size, char *inbuf) +{ + memset(inbuf, 0, size); + if (size > 2) { + strncpy(inbuf, &(cacheBuf[start]), size-2); + } +} + + +/****************************************************************************** + * zVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zVerse::settext(char testmt, long idxoff, const char *buf, long len) { + + len = (len < 0) ? strlen(buf) : len; + if (!testmt) + testmt = ((idxfp[0]) ? 1:2); + if ((!dirtyCache) || (cacheBufIdx < 0)) { + cacheBufIdx = lseek(idxfp[testmt-1]->getFd(), 0, SEEK_END) / 12; + cacheTestament = testmt; + if (cacheBuf) + free(cacheBuf); + cacheBuf = (char *)calloc(len + 1, 1); + } + else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len + 1)):calloc((len + 1), 1)); + + dirtyCache = true; + + unsigned long start, outstart; + unsigned long outBufIdx = cacheBufIdx; + unsigned short size; + unsigned short outsize; + + idxoff *= 10; + size = outsize = len; + + start = strlen(cacheBuf); + + if (!size) + start = outBufIdx = 0; + + outBufIdx = archtosword32(outBufIdx); + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &outBufIdx, 4); + write(compfp[testmt-1]->getFd(), &outstart, 4); + write(compfp[testmt-1]->getFd(), &outsize, 2); + strcat(cacheBuf, buf); +} + + +void zVerse::flushCache() { + if (dirtyCache) { + unsigned long idxoff; + unsigned long start, outstart; + unsigned long size, outsize; + unsigned long zsize, outzsize; + + idxoff = cacheBufIdx * 12; + size = outsize = zsize = outzsize = strlen(cacheBuf); + if (size) { +// if (compressor) { +// delete compressor; +// compressor = new LZSSCompress(); +// } + compressor->Buf(cacheBuf); + compressor->zBuf(&zsize); + outzsize = zsize; + + start = outstart = lseek(textfp[cacheTestament-1]->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + outzsize = archtosword32(zsize); + + write(textfp[cacheTestament-1]->getFd(), compressor->zBuf(&zsize), zsize); + + lseek(idxfp[cacheTestament-1]->getFd(), idxoff, SEEK_SET); + write(idxfp[cacheTestament-1]->getFd(), &outstart, 4); + write(idxfp[cacheTestament-1]->getFd(), &outzsize, 4); + write(idxfp[cacheTestament-1]->getFd(), &outsize, 4); + } + dirtyCache = false; + } +} + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long bufidx; + long start; + unsigned short size; + + destidxoff *= 10; + srcidxoff *= 10; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(compfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(compfp[testmt-1]->getFd(), &bufidx, 4); + read(compfp[testmt-1]->getFd(), &start, 4); + read(compfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(compfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &bufidx, 4); + write(compfp[testmt-1]->getFd(), &start, 4); + write(compfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char zVerse::createModule(const char *ipath, int blockBound) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); //compBufIdxOffset + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} + + +/****************************************************************************** + * zVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + if (to > buf) { + for (to--; to > buf; to--) { // remove trailing excess + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } + } +} diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp new file mode 100644 index 0000000..ad55396 --- /dev/null +++ b/src/modules/filters/cipherfil.cpp @@ -0,0 +1,38 @@ +/****************************************************************************** + * + * cipherfil - SWFilter decendant to decipher a module + */ + + +#include <stdlib.h> +#include <string.h> +#include <cipherfil.h> + + +CipherFilter::CipherFilter(const char *key) { + cipher = new SWCipher((unsigned char *)key); +} + + +CipherFilter::~CipherFilter() { + delete cipher; +} + + +SWCipher *CipherFilter::getCipher() { + return cipher; +} + + +char CipherFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + unsigned int len; +// len = strlen(text); + len = maxlen; + if (len > 0) { + cipher->cipherBuf(&len, text); + strncpy(text, cipher->Buf(), (len < (unsigned int)maxlen) ? len : maxlen); + } + text[maxlen] = 0; + text[maxlen+1] = 0; + return 0; +} diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp new file mode 100644 index 0000000..c5b7b90 --- /dev/null +++ b/src/modules/filters/gbffootnotes.cpp @@ -0,0 +1,118 @@ +/****************************************************************************** + * + * gbffootnotes - SWFilter decendant to hide or show footnotes + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbffootnotes.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFFootnotes::on[] = "On"; +const char GBFFootnotes::off[] = "Off"; +const char GBFFootnotes::optName[] = "Footnotes"; +const char GBFFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +GBFFootnotes::GBFFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFFootnotes::~GBFFootnotes() { +} + +void GBFFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[4096]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 4096); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'R': // Reference + switch(token[1]) { + case 'F': // Begin footnote + hide = true; + break; + case 'f': // end footnote + hide = false; + break; + } + continue; // skip token + case 'W': + if (token[1] == 'T') { + switch (token[2]) { + case 'P': + case 'S': + case 'A': + continue; // remove this token + default: + break; + } + } + } + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp new file mode 100644 index 0000000..590e2fa --- /dev/null +++ b/src/modules/filters/gbfheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * gbfheadings - SWFilter decendant to hide or show headings + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfheadings.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFHeadings::on[] = "On"; +const char GBFHeadings::off[] = "Off"; +const char GBFHeadings::optName[] = "Headings"; +const char GBFHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +GBFHeadings::GBFHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFHeadings::~GBFHeadings() { +} + +void GBFHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 2048); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'T': // Reference + switch(token[1]) { + case 'S': // Begin heading + hide = true; + break; + case 's': // end heading + hide = false; + break; + } + continue; // skip token + } + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp new file mode 100644 index 0000000..73d445a --- /dev/null +++ b/src/modules/filters/gbfhtml.cpp @@ -0,0 +1,536 @@ +/*************************************************************************** + gbfhtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfhtml.h> + + +GBFHTML::GBFHTML() +{ +} + + +char GBFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + bool hasFootnotePreTag = false; + bool isRightJustified = false; + bool isCentered = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else + from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '\n') { + *from = ' '; + } + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) + { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + for (tok = token+2; *tok; tok++) + *to++ = *tok; + *to++ = '<'; + *to++ = '/'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'B': //word(s) explained in footnote + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + hasFootnotePreTag = true; //we have the RB tag + continue; + case 'F': // footnote begin + if (hasFootnotePreTag) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + *to++ = ' '; + } + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '('; + + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + hasFootnotePreTag = false; + continue; + } + break; + + case 'F': // font tags + switch(token[1]) + { + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '#'; + *to++ = 'F'; + *to++ = 'F'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'J': //Justification + switch(token[1]) + { + case 'R': //right + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'r'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'h'; + *to++ = 't'; + *to++ = '\"'; + *to++ = '>'; + isRightJustified = true; + continue; + + case 'C': //center + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '\"'; + *to++ = '>'; + isCentered = true; + continue; + + case 'L': //left, reset right and center + if (isCentered) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '>'; + isCentered = false; + } + if (isRightJustified) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + isRightJustified = false; + } + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue;/* + case 'S': + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue;*/ + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp new file mode 100644 index 0000000..30b27ba --- /dev/null +++ b/src/modules/filters/gbfhtmlhref.cpp @@ -0,0 +1,148 @@ +/*************************************************************************** + gbfhtmlhref.cpp - GBF to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfhtmlhref.h> + +GBFHTMLHREF::GBFHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("Rf", ")</small></font>"); + addTokenSubstitute("Rx", "</a>"); + addTokenSubstitute("FI", "<i>"); // italics begin + addTokenSubstitute("Fi", "</i>"); + addTokenSubstitute("FB", "<n>"); // bold begin + addTokenSubstitute("Fb", "</n>"); + addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin + addTokenSubstitute("Fr", "</font>"); + addTokenSubstitute("FU", "<u>"); // underline begin + addTokenSubstitute("Fu", "</u>"); + addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin + addTokenSubstitute("Fo", "</cite>"); + addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", "</sup>"); + addTokenSubstitute("FV", "<sub>"); // Subscript begin + addTokenSubstitute("Fv", "</sub>"); + addTokenSubstitute("TT", "<big>"); // Book title begin + addTokenSubstitute("Tt", "</big>"); + addTokenSubstitute("PP", "<cite>"); // poetry begin + addTokenSubstitute("Pp", "</cite>"); + addTokenSubstitute("Fn", "</font>"); // font end + addTokenSubstitute("CL", "<br />"); // new line + addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin + addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin + addTokenSubstitute("JL", "</div>"); // align end + +} + + +bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + + if (!substituteToken(buf, token)) { + if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + pushString(buf, " <small><em><<a href=\"#"); + for (tok = token+1; *tok; tok++) + //if(token[i] != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 2; *tok; tok++) + //if(token[i] != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>></em></small>"); + } + + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + pushString(buf, " <small><em>(<A HREF=\"#"); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>)</em></small>"); + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + pushString(buf, " <small><em>(<a href=\"M"); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>)</em></small>"); + } + + else if (!strncmp(token, "RX", 2)) { + pushString(buf, "<a href=\""); + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *(*buf)++ = *tok; + } + else { + break; + } + } + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + else if (!strncmp(token, "RB", 2)) { + pushString(buf, "<i>"); + userData["hasFootnotePreTag"] = "true"; + } + + else if (!strncmp(token, "RF", 2)) { + if(userData["hasFootnotePreTag"] == "true") { + userData["hasFootnotePreTag"] = "false"; + pushString(buf, "</i> "); + } + pushString(buf, "<font color=\"#800000\"><small> ("); + } + + else if (!strncmp(token, "FN", 2)) { + pushString(buf, "<font face=\""); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + else if (!strncmp(token, "CA", 2)) { // ASCII value + *(*buf)++ = (char)atoi(&token[2]); + } + + else { + return false; + } + } + return true; +} diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp new file mode 100644 index 0000000..f8d336e --- /dev/null +++ b/src/modules/filters/gbfmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * gbfmorph - SWFilter decendant to hide or show morph tags + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfmorph.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFMorph::on[] = "On"; +const char GBFMorph::off[] = "Off"; +const char GBFMorph::optName[] = "Morphological Tags"; +const char GBFMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +GBFMorph::GBFMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFMorph::~GBFMorph() { +} + +void GBFMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && token[1] == 'T') { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfosis.cpp b/src/modules/filters/gbfosis.cpp new file mode 100644 index 0000000..43161d4 --- /dev/null +++ b/src/modules/filters/gbfosis.cpp @@ -0,0 +1,313 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter decendant to hide or show strongs number + * in a GBF module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <gbfosis.h> +#include <swmodule.h> +#include <versekey.h> +#include <stdarg.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +GBFOSIS::GBFOSIS() { +} + + +GBFOSIS::~GBFOSIS() { +} + + +char GBFOSIS::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char buf[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart, *textEnd; + char *wordStart, *wordEnd; + bool newText = false; + bool newWord = false; + string tmp; + bool suspendTextPassThru = false; + bool keepToken = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + textStart = from; + wordStart = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; + wordEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + + + while (wordStart < (text+maxlen)) { +// if (strchr(" ,;.?!()'\"", *wordStart)) + if (strchr(";, .:?!()'\"", *wordStart)) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;.:?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + pushString(&to, convertToOSIS(tmp.c_str(), key)); + suspendTextPassThru = false; + } + + // Footnote + if (!strcmp(token, "RF")) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strcmp(token, "Rf")) { + tmp = "<note type=\"x-StudyNote\"><notePart type=\"x-MainText\">"; + tmp.append(textStart, (int)(textEnd - textStart)+1); + tmp += "</notePart></note>"; + pushString(&to, tmp.c_str()); + suspendTextPassThru = false; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + pushString(&to, "<figure src=\""); + const char *c; + for (c = src;((*c) && (*c != '"')); c++); + + /* uncomment for SWORD absolute path logic + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + end of uncomment for asolute path logic */ + + for (c++;((*c) && (*c != '"')); c++) + *to++ = *c; + + pushString(&to, "\" />"); + return true; + } + + // Strongs numbers + else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + // normal strongs number + strstrip(val); + if (!strncmp(wordStart, "<w ", 3)) { + sprintf(buf, "lemma=\"x-Strong:%s\" ", val); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + else { + sprintf(buf, "<w lemma=\"x-Strong:%s\">", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + } + } + } + + // Morphology + else if (*token == 'W' && token[1] == 'T' && (token[2] == 'G' || token[2] == 'H')) { // Strongs + valto = val; + for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + strstrip(val); + if (!strncmp(wordStart, "<w ", 3)) { + sprintf(buf, "morph=\"x-%s:%s\" ", "StrongsMorph", val); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + else { + sprintf(buf, "<w morph=\"x-%s:%s\">", "StrongsMorph", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + } + } + + if (!keepToken) { // if we don't want strongs + if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { + if (lastspace) + to--; + } + if (newText) {textStart = from+1; newText = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newWord = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); } + if (!suspendTextPassThru) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + char ref[254]; + if (vkey->Verse()) + sprintf(ref, "<verseStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Book()) + sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + else *ref = 0; + if (*ref) { + memmove(text+strlen(ref), text, maxlen-strlen(ref)-1); + memcpy(text, ref, strlen(ref)); + to+=strlen(ref); + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + sprintf(ref, "<verseEnd ref=\"%s\" />", vkey->getOSISRef()); + pushString(&to, ref); + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); + sprintf(ref, "<chapterEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); + sprintf(ref, "<bookEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + } + } + } + + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + } + } + *to++ = 0; + *to = 0; + return 0; +} + + +void GBFOSIS::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +const char *GBFOSIS::convertToOSIS(const char *inRef, const SWKey *key) { + static string outRef; + + outRef = ""; + + VerseKey defLanguage; + ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true); + const char *startFrag = inRef; + for (int i = 0; i < verses.Count(); i++) { + VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i)); + char buf[5120]; + char frag[800]; + if (element) { + memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1); + frag[((const char *)element->userData - startFrag) + 1] = 0; + startFrag = (const char *)element->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\" refEnd=\"%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag); + } + else { + memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1); + frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0; + startFrag = (const char *)verses.GetElement(i)->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag); + } + outRef+=buf; + } + return outRef.c_str(); +} diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp new file mode 100644 index 0000000..65766d3 --- /dev/null +++ b/src/modules/filters/gbfplain.cpp @@ -0,0 +1,106 @@ +/****************************************************************************** + * + * gbfplain - SWFilter decendant to strip out all GBF tags or convert to + * ASCII rendered symbols. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfplain.h> + + +GBFPlain::GBFPlain() { +} + + +char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + for (char *tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'F': // footnote begin + *to++ = ' '; + *to++ = '['; + continue; + case 'f': // footnote end + *to++ = ']'; + *to++ = ' '; + continue; + } + break; + case 'C': + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; +/* Bug in WEB + case 'L': + *to++ = '<'; + continue; +*/ + case 'L': // Bug in WEB. Use above entry when fixed + case 'N': // new line + *to++ = '\n'; + continue; + case 'M': // new paragraph + *to++ = '\n'; + *to++ = '\n'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp new file mode 100644 index 0000000..40e5752 --- /dev/null +++ b/src/modules/filters/gbfrtf.cpp @@ -0,0 +1,298 @@ +/****************************************************************************** + * + * gbfrtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfrtf.h> +#include <ctype.h> + +GBFRTF::GBFRTF() { +} + + +char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + char token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char *)&text[maxlen - len]; + } + else from = (unsigned char *)text; // ------------------------------- + for (to = (unsigned char *)text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '3'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = ' '; + *to++ = '<'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = '}'; + continue; + + case 'T': // Tense + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '4'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = ' '; + *to++ = '('; + bool separate = false; + for (tok = token + 2; *tok; tok++) { + if (separate) { + *to++ = ';'; + *to++ = ' '; + separate = false; + } + switch (*tok) { + case 'G': + case 'H': + for (tok++; *tok; tok++) { + if (isdigit(*tok)) { + *to++ = *tok; + separate = true; + } + else { + tok--; + break; + } + } + break; + default: + for (; *tok; tok++) { + *to++ = *tok; + } + } + } + *to++ = ')'; + *to++ = '}'; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '"'; + *to++ = '"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'F': // footnote begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '('; + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = ' '; + *to++ = '}'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) { + case 'I': // italic start + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'i': // italic end + *to++ = '\\'; + *to++ = 'i'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'B': // bold start + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'b': // bold end + *to++ = '\\'; + *to++ = 'b'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'N': + *to++ = '{'; + if (!strnicmp(token+2, "Symbol", 6)) { + *to++ = '\\'; + *to++ = 'f'; + *to++ = '7'; + *to++ = ' '; + } + continue; + case 'n': + *to++ = '}'; + continue; + case 'S': + *to++ = '{'; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + continue; + case 'R': + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '6'; + *to++ = ' '; + continue; + case 'r': + *to++ = '}'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; + case 'L': // line break + *to++ = '\\'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'n'; + *to++ = 'e'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 'T': + *to++ = '<'; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '2'; + *to++ = '2'; + *to++ = ' '; + continue; + case 't': + *to++ = '}'; + continue; + case 'S': + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + break; + + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp new file mode 100644 index 0000000..cb722bd --- /dev/null +++ b/src/modules/filters/gbfstrongs.cpp @@ -0,0 +1,130 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter decendant to hide or show strongs number + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfstrongs.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif +#include <ctype.h> + +const char GBFStrongs::on[] = "On"; +const char GBFStrongs::off[] = "Off"; +const char GBFStrongs::optName[] = "Strong's Numbers"; +const char GBFStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +GBFStrongs::GBFStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFStrongs::~GBFStrongs() { +} + +void GBFStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart = text, *textEnd = 0; + bool newText = false; + string tmp; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 2; ((token[i]) && (i < 150)); i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + if (!option) { + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + if (newText) {textStart = to; newText = false; } + continue; + } + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp new file mode 100644 index 0000000..ca03e71 --- /dev/null +++ b/src/modules/filters/gbfthml.cpp @@ -0,0 +1,463 @@ +/*************************************************************************** + gbfthml.cpp - GBF to ThML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfthml.h> + + +GBFThML::GBFThML() +{ +} + + +char GBFThML::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') + { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': + case 'H': + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'S'; + *to++ = 't'; + *to++ = 'r'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 'g'; + *to++ = 's'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 1; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + + case 'T': // Tense + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'M'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = 'p'; + *to++ = 'h'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'F': // footnote begin + *to++ = '<'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = ' '; + *to++ = 'p'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'o'; + *to++ = 't'; + *to++ = '"'; + *to++ = '>'; + continue; + case 'f': // footnote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) + { + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = 'f'; + *to++ = 'f'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'p'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 'S': + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 's'; + *to++ = 's'; + *to++ = '='; + *to++ = '\"'; + *to++ = 's'; + *to++ = 'e'; + *to++ = 'c'; + *to++ = 'h'; + *to++ = 'e'; + *to++ = 'a'; + *to++ = 'd'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + continue; + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp new file mode 100644 index 0000000..0f85c6c --- /dev/null +++ b/src/modules/filters/greeklexattribs.cpp @@ -0,0 +1,96 @@ +/****************************************************************************** + * + * greeklexattribs - SWFilter decendant to set entry attributes for greek + * lexicons + */ + + +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include <greeklexattribs.h> +#include <swmodule.h> + + +GreekLexAttribs::GreekLexAttribs() { +} + + +char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + if (module->isProcessEntryAttributes()) { + char *from; + bool inAV = false; + string phrase; + string freq; + char val[128], *valto; + char wordstr[7]; + char *currentPhrase = 0, *ch = 0; + char *currentPhraseEnd = 0; + int number = 0; + + + for (from = text; *from; from++) { + if (inAV) { + if (currentPhrase == 0) { + if (isalpha(*from)) + currentPhrase = from; + } + else { + if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) { + if (*from == '<') { + if (!currentPhraseEnd) + currentPhraseEnd = from - 1; + for (; *from && *from != '>'; from++) { + if (!strncmp(from, "value=\"", 7)) { + valto = val; + from += 7; + for (unsigned int i = 0; from[i] != '\"' && i < 127; i++) + *valto++ = from[i]; + *valto = 0; + sprintf(wordstr, "%03d", number+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val; + from += strlen(val); + } + } + continue; + } + + phrase = ""; + phrase.append(currentPhrase, (int)(((currentPhraseEnd)?currentPhraseEnd:from) - currentPhrase)-1); + currentPhrase = from; + while (*from && isdigit(*from)) from++; + freq = ""; + freq.append(currentPhrase, (int)(from - currentPhrase)); + if ((freq.length() > 0) && (phrase.length() > 0)) { + sprintf(wordstr, "%03d", ++number); + if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) { + string tmp = phrase.substr(0, phrase.find_first_of("(")); + phrase.erase(phrase.find_first_of("("), 1); + phrase.erase(phrase.find_first_of(")"), 1); + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase; + phrase = tmp; + } + phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); + freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1); + module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase; + module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq; + currentPhrase = 0; + currentPhraseEnd = 0; + } + } + } + if (*from == ';') inAV = false; + + } + else if (!strncmp(from, "AV-", 3)) { + inAV = true; + from+=2; + } + } + } + return 0; +} + + diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp new file mode 100644 index 0000000..75ee998 --- /dev/null +++ b/src/modules/filters/latin1utf16.cpp @@ -0,0 +1,120 @@ +/****************************************************************************** + * + * Latin1UTF16 - SWFilter decendant to convert a Latin-1 character to UTF-16 + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <latin1utf16.h> + +Latin1UTF16::Latin1UTF16() { +} + + +char Latin1UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + switch (*from) { + case 0x80: // '€' + *to++ = 0x20AC; + break; + case 0x82: // '‚' + *to++ = 0x201A; + break; + case 0x83: // 'ƒ' + *to++ = 0x0192; + break; + case 0x84: // '„' + *to++ = 0x201E; + break; + case 0x85: // '…' + *to++ = 0x2026; + break; + case 0x86: // '†' + *to++ = 0x2020; + break; + case 0x87: // '‡' + *to++ = 0x2021; + break; + case 0x88: // 'ˆ' + *to++ = 0x02C6; + break; + case 0x89: // '‰' + *to++ = 0x2030; + break; + case 0x8A: // 'Š' + *to++ = 0x0160; + break; + case 0x8B: // '‹' + *to++ = 0x2039; + break; + case 0x8C: // 'Œ' + *to++ = 0x0152; + break; + case 0x8E: // 'Ž' + *to++ = 0x017D; + break; + case 0x91: // '‘' + *to++ = 0x2018; + break; + case 0x92: // '’' + *to++ = 0x2019; + break; + case 0x93: // '“' + *to++ = 0x201C; + break; + case 0x94: // '”' + *to++ = 0x201D; + break; + case 0x95: // '•' + *to++ = 0x2022; + break; + case 0x96: // '–' + *to++ = 0x2013; + break; + case 0x97: // '—' + *to++ = 0x2014; + break; + case 0x98: // '˜' + *to++ = 0x02DC; + break; + case 0x99: // '™' + *to++ = 0x2122; + break; + case 0x9A: // 'š' + *to++ = 0x0161; + break; + case 0x9B: // '›' + *to++ = 0x203A; + break; + case 0x9C: // 'œ' + *to++ = 0x0153; + break; + case 0x9E: // 'ž' + *to++ = 0x017E; + break; + case 0x9F: // 'Ÿ' + *to++ = 0x0178; + break; + default: + *to++ = (unsigned short)*from; + } + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp new file mode 100644 index 0000000..91af8dc --- /dev/null +++ b/src/modules/filters/latin1utf8.cpp @@ -0,0 +1,179 @@ +/****************************************************************************** + * + * Latin1UTF8 - SWFilter decendant to convert a Latin-1 character to UTF-8 + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <latin1utf8.h> +#include <swmodule.h> + +Latin1UTF8::Latin1UTF8() { +} + + +char Latin1UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + + len = strlen(text) + 1; + if (len == maxlen + 1) + maxlen = (maxlen + 1) * FILTERPAD; + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; // ------------------------------- + + + + for (to = (unsigned char*)text; *from; from++) { + if (*from < 0x80) { + *to++ = *from; + } + else if (*from < 0xc0) { + switch(*from) { + case 0x80: // '€' + *to++ = 0xe2; // 'â' + *to++ = 0x82; // '‚' + *to++ = 0xac; // '¬' + break; + case 0x82: // '‚' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9a; // 'š' + break; + case 0x83: // 'ƒ' + *to++ = 0xc6; // 'Æ' + *to++ = 0x92; // '’' + break; + case 0x84: // '„' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9e; // 'ž' + break; + case 0x85: // '…' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa6; // '¦' + break; + case 0x86: // '†' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa0; // ' ' + break; + case 0x87: // '‡' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa1; // '¡' + break; + case 0x88: // 'ˆ' + *to++ = 0xcb; // 'Ë' + *to++ = 0x86; // '†' + break; + case 0x89: // '‰' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb0; // '°' + break; + case 0x8A: // 'Š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa0; // ' ' + break; + case 0x8B: // '‹' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb9; // '¹' + break; + case 0x8C: // 'Œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x92; // '’' + break; + case 0x8E: // 'Ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbd; // '½' + break; + case 0x91: // '‘' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x98; // '˜' + break; + case 0x92: // '’' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x99; // '™' + break; + case 0x93: // '“' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9c; // 'œ' + break; + case 0x94: // '”' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9d; // '' + break; + case 0x95: // '•' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa2; // '¢' + break; + case 0x96: // '–' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x93; // '“' + break; + case 0x97: // '—' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x94; // '”' + break; + case 0x98: // '˜' + *to++ = 0xcb; // 'Ë' + *to++ = 0x9c; // 'œ' + break; + case 0x99: // '™' + *to++ = 0xe2; // 'â' + *to++ = 0x84; // '„' + *to++ = 0xa2; // '¢' + break; + case 0x9A: // 'š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa1; // '¡' + break; + case 0x9B: // '›' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xba; // 'º' + break; + case 0x9C: // 'œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x93; // '“' + break; + case 0x9E: // 'ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbe; // '¾' + break; + case 0x9F: // 'Ÿ' + *to++ = 0xc5; // 'Å' + *to++ = 0xb8; // '¸' + break; + default: + *to++ = 0xC2; + *to++ = *from; + } + } + else { + *to++ = 0xC3; + *to++ = (*from - 0x40); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp new file mode 100644 index 0000000..96fc4d8 --- /dev/null +++ b/src/modules/filters/plainfootnotes.cpp @@ -0,0 +1,102 @@ +/*************************************************************************** + plainfootnotes.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <plainfootnotes.h> +#include <swkey.h> + +#include <stdlib.h> +#include <string.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +const char PLAINFootnotes::on[] = "On"; +const char PLAINFootnotes::off[] = "Off"; +const char PLAINFootnotes::optName[] = "Footnotes"; +const char PLAINFootnotes::optTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist"; + +PLAINFootnotes::PLAINFootnotes(){ + option = false; + options.push_back(on); + options.push_back(off); +} + +PLAINFootnotes::~PLAINFootnotes(){ +} + + +void PLAINFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *PLAINFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + + +char PLAINFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char token[2048]; + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + + if (!option) { // if we don't want footnotes + char *to, *from; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) + { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '{') // Footnote start + { + hide = true; + continue; + } + if (*from == '}') // Footnote end + { + hide=false; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} + diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp new file mode 100644 index 0000000..fefb029 --- /dev/null +++ b/src/modules/filters/plainhtml.cpp @@ -0,0 +1,134 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <plainhtml.h> + + +PLAINHTML::PLAINHTML() +{ +} + + +char PLAINHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + int count = 0; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if ((*from == '\n') && (from[1] == '\n')) // paragraph + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + from++; + continue; + } else { + if ((*from == '\n')) // && (from[1] != '\n')) // new line + { + *to++ = '<'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + } + + if (*from == '{') { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = ' '; + *to++ = 'C'; + *to++ = 'O'; + *to++ = 'L'; + *to++ = 'O'; + *to++ = 'R'; + *to++ = '='; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + + *to++ = '<'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + *to++ = ' '; + *to++ = '('; + continue; + } + + if (*from == '}') + { + *to++ = ')'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + + *to++ = '<'; + *to++ = '/'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = '>'; + continue; + } + + if ((*from == ' ') && (count > 5000)) + { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + count = 0; + continue; + } + + *to++ = *from; + count++; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp new file mode 100644 index 0000000..f0b842b --- /dev/null +++ b/src/modules/filters/rtfhtml.cpp @@ -0,0 +1,99 @@ +/*************************************************************************** + rtfhtml.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <rtfhtml.h> + + +RTFHTML::RTFHTML() { + +} + + +char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + bool center = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') // a RTF command + { + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd')) + { // switch all modifier off + if (center) + { + *to++ = '<'; + *to++ = '/'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = false; + } + from += 4; + continue; + } + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r')) + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + *to++ = '\n'; + from += 3; + continue; + } + if (from[1] == ' ') + { + from += 1; + continue; + } + if ((from[1] == 'q') && (from[2] == 'c')) // center on + { + if (!center) + { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = true; + } + from += 2; + continue; + } + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwphtml.cpp b/src/modules/filters/rwphtml.cpp new file mode 100644 index 0000000..6f8ae4f --- /dev/null +++ b/src/modules/filters/rwphtml.cpp @@ -0,0 +1,187 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <rwphtml.h> + +RWPHTML::RWPHTML() +{ +} + + +char RWPHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + signed char greek_str[500]; + bool inverse = false; + bool first_letter = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } else + from = text; + for (to = text; *from; from++) { + if (*from == '\\') { + ++from; + int i=0; + first_letter = true; + greek_str[0] = '\0'; + while (*from != '\\') { /* get the greek word or phrase */ + greek_str[i++] = *from; + greek_str[i + 1] = '\0'; + from++; + } /* convert to symbol font as best we can */ + strcpy(to,"<I> </I><FONT FACE=\"symbol\">"); + to += strlen(to); + for (int j = 0; j < i; j++) { + if ((first_letter) + && (greek_str[j] == 'h')) { + if (greek_str[j + 1] == 'o') { + *to++ = 'o'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'a') { + *to++ = 'a'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'w') { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'u') { + *to++ = 'u'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -109) { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -120) { + *to++ = 'h'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'i') { + *to++ = 'i'; + first_letter = false; + ++j; + continue; + }else if (greek_str[j + 1] == 'e') { + *to++ = 'e'; + first_letter = false; + ++j; + continue; + } + first_letter = false; + } + if ((greek_str[j] == 't') + && (greek_str[j + 1] == 'h')) { + *to++ = 'q'; + ++j; + continue; + } + if ((greek_str[j] == 'c') + && (greek_str[j + 1] == 'h')) { + *to++ = 'c'; + ++j; + continue; + } + if ((greek_str[j] == 'p') + && (greek_str[j + 1] == 'h')) { + ++j; + *to++ = 'f'; + continue; + } + if (greek_str[j] == -120) { + *to++ = 'h'; + continue; + } + if (greek_str[j] == -125) { + *to++ = 'a'; + continue; + } + if (greek_str[j] == -109) { + if(greek_str[j+1] == 'i') ++j; + *to++ = 'w'; + continue; + } + if (greek_str[j] == ' ') + first_letter = true; + if (greek_str[j] == 's') { + if(isalpha(greek_str[j + 1])) *to++ = 's'; + else if(!isprint(greek_str[j] )) *to++ = 's'; + else *to++ = 'V'; + continue; + } + if (greek_str[j] == '\'') { + continue; + } + *to++ = greek_str[j]; + } + strcpy(to,"</FONT><I> </I>"); + to += strlen(to); + continue; + } + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + strcpy(to,"<FONT COLOR=#0000FF>"); + to += strlen(to); + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + strcpy(to,"</FONT>"); + to += strlen(to); + continue; + } + if (*from == '{') { + strcpy(to,"<BR><STRONG>"); + to += strlen(to); + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + strcpy(to,"<P>"); + to += strlen(to); + } + continue; + } + if (*from == '}') { + strcpy(to," </STRONG>"); + to += strlen(to); + continue; + } + if ((*from == '\n') && (from[1] == '\n')) { + strcpy(to,"<P>"); + to += strlen(to); + continue; + } + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwprtf.cpp b/src/modules/filters/rwprtf.cpp new file mode 100644 index 0000000..8f7b074 --- /dev/null +++ b/src/modules/filters/rwprtf.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * rwprtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include <stdlib.h> +#include <string.h> +#include <rwprtf.h> + + +RWPRTF::RWPRTF() { + +} + + +char RWPRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + bool ingreek = false; + bool inverse = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') { + if(!ingreek) { + ingreek = true; + *to++ = '['; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = '8'; + *to++ = ' '; + continue; + } + else { + ingreek = false; + *to++ = '}'; + *to++ = ']'; + continue; + } + } + + if ((ingreek) && ((*from == 'h') || (*from == 'H'))) + continue; // 'h's are mostly useless in RWP translitterations. The greek is more correct without them. + + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '2'; + *to++ = ' '; + *to++ = '#'; + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + *to++ = '|'; + *to++ = '}'; + continue; + } + + if (*from == '{') { + *to++ = '{'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = ' '; + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + } + continue; + } + + if (*from == '}') { + // this is kinda neat... DO NOTHING + } + if ((*from == '\n') && (from[1] == '\n')) { + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp new file mode 100644 index 0000000..d0d5ceb --- /dev/null +++ b/src/modules/filters/scsuutf8.cpp @@ -0,0 +1,220 @@ +/****************************************************************************** + * + * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8 + * + */ + + +/* This class is based on: + * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl + * on Andrea's balcony in North Amsterdam on 1998-08-04 + * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion + * to correct the haphazard "if" after UQU to "else if" on 1998-10-01 + * + * This is a deflator to UTF-8 output for input compressed in SCSU, + * the (Reuters) Standard Compression Scheme for Unicode as described + * in http://www.unicode.org/unicode/reports/tr6.html + */ + +#include <stdlib.h> +#include <stdio.h> +#include <swmodule.h> + +#include <scsuutf8.h> + +SCSUUTF8::SCSUUTF8() { +} + + +unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text) +{ + /* join UTF-16 surrogates without any pairing sanity checks */ + + static int d; + + if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; } + if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; } + + /* output one character as UTF-8 multibyte sequence */ + + if (uchar < 0x80) { + *text++ = c; + } + else if (uchar < 0x800) { + *text++ = 0xc0 | uchar >> 6; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x10000) { + *text++ = 0xe0 | uchar >> 12; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *text++ = 0xf0 | uchar >> 18; + *text++ = 0x80 | uchar >> 12 & 0x3f; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + + return text; +} + +char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + unsigned long buflen = len * FILTERPAD; + char active = 0, mode = 0; + + static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; + static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; + static unsigned short win[256] = { + 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, + 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, + 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, + 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, + 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, + 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, + 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, + 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, + 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, + 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, + 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, + 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, + 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, + 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, + 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, + 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, + 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, + 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, + 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, + 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, + 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60 + }; + + if (!len) + return 0; + + memmove(&text[buflen - len], text, len); + from = (unsigned char*)&text[buflen - len]; + to = (unsigned char *)text; + + // ------------------------------- + + for (int i = 0; i < len;) { + + + if (i >= len) break; + c = from[i++]; + + if (c >= 0x80) + { + to = UTF8Output (c - 0x80 + slide[active], to); + } + else if (c >= 0x20 && c <= 0x7F) + { + to = UTF8Output (c, to); + } + else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) + { + to = UTF8Output (c, to); + } + else if (c >= 0x1 && c <= 0x8) /* SQn */ + { + if (i >= len) break; + /* single quote */ d = from[i++]; + + to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : + d - 0x80 + slide [c - 0x1], to); + } + else if (c >= 0x10 && c <= 0x17) /* SCn */ + { + /* change window */ active = c - 0x10; + } + else if (c >= 0x18 && c <= 0x1F) /* SDn */ + { + /* define window */ active = c - 0x18; + if (i >= len) break; + slide [active] = win [from[i++]]; + } + else if (c == 0xB) /* SDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); + } + else if (c == 0xE) /* SQU */ + { + if (i >= len) break; + /* SQU */ c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF) /* SCU */ + { + /* change to Unicode mode */ mode = 1; + + while (mode) + { + if (i >= len) break; + c = from[i++]; + + if (c <= 0xDF || c >= 0xF3) + { + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF0) /* UQU */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c >= 0xE0 && c <= 0xE7) /* UCn */ + { + active = c - 0xE0; mode = 0; + } + else if (c >= 0xE8 && c <= 0xEF) /* UDn */ + { + if (i >= len) break; + slide [active=c-0xE8] = win [from[i++]]; mode = 0; + } + else if (c == 0xF1) /* UDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = + 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; + } + } + } + + + } + + *to++ = 0; + *to = 0; + return 0; +} + diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp new file mode 100644 index 0000000..dd5fe81 --- /dev/null +++ b/src/modules/filters/swbasicfilter.cpp @@ -0,0 +1,300 @@ +/****************************************************************************** + * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter + * impl that provides some basic methods that + * many filters will need and can use as a starting + * point. + * + * $Id: swbasicfilter.cpp,v 1.18 2002/06/06 21:08:47 scribe Exp $ + * + * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <swbasicfilter.h> +#include <stdio.h> +#include <stdarg.h> + +SWBasicFilter::SWBasicFilter() { + tokenStart = 0; + tokenEnd = 0; + escStart = 0; + escEnd = 0; + + setTokenStart("<"); + setTokenEnd(">"); + setEscapeStart("&"); + setEscapeEnd(";"); + + escStringCaseSensitive = false; + tokenCaseSensitive = false; + passThruUnknownToken = false; + passThruUnknownEsc = false; +} + + +void SWBasicFilter::setPassThruUnknownToken(bool val) { + passThruUnknownToken = val; +} + + +void SWBasicFilter::setPassThruUnknownEscapeString(bool val) { + passThruUnknownEsc = val; +} + + +void SWBasicFilter::setTokenCaseSensitive(bool val) { + tokenCaseSensitive = val; +} + + +void SWBasicFilter::setEscapeStringCaseSensitive(bool val) { + escStringCaseSensitive = val; +} + + +SWBasicFilter::~SWBasicFilter() { + if (tokenStart) + delete [] tokenStart; + + if (tokenEnd) + delete [] tokenEnd; + + if (escStart) + delete [] escStart; + + if (escEnd) + delete [] escEnd; +} + + +void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!tokenCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + tokenSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else tokenSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!escStringCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + escSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else escSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +bool SWBasicFilter::substituteToken(char **buf, const char *token) { + DualStringMap::iterator it; + + if (!tokenCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, token); + toupperstr(tmp); + it = tokenSubMap.find(tmp); + delete [] tmp; + } else + it = tokenSubMap.find(token); + + if (it != tokenSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) { + DualStringMap::iterator it; + + if (!escStringCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, escString); + toupperstr(tmp); + it = escSubMap.find(tmp); + delete [] tmp; + } else + it = escSubMap.find(escString); + + if (it != escSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::handleToken(char **buf, const char *token, DualStringMap &userData) { + return substituteToken(buf, token); +} + + +bool SWBasicFilter::handleEscapeString(char **buf, const char *escString, DualStringMap &userData) { + return substituteEscapeString(buf, escString); +} + + +void SWBasicFilter::setEscapeStart(const char *escStart) { + stdstr(&(this->escStart), escStart); +} + + +void SWBasicFilter::setEscapeEnd(const char *escEnd) { + stdstr(&(this->escEnd), escEnd); +} + + +void SWBasicFilter::setTokenStart(const char *tokenStart) { + stdstr(&(this->tokenStart), tokenStart); +} + + +void SWBasicFilter::setTokenEnd(const char *tokenEnd) { + stdstr(&(this->tokenEnd), tokenEnd); +} + + +char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + this->key = key; + this->module = module; + char *to, *from, token[4096]; + int tokpos = 0; + bool intoken = false; + int len; + bool inEsc = false; + char escStartLen = strlen(escStart); + char escEndLen = strlen(escEnd); + char escStartPos = 0, escEndPos = 0; + char tokenStartLen = strlen(tokenStart); + char tokenEndLen = strlen(tokenEnd); + char tokenStartPos = 0, tokenEndPos = 0; + DualStringMap userData; + string lastTextNode; + + bool suspendTextPassThru = false; + userData["suspendTextPassThru"] = "false"; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + resultBuffer = text; + + for (to = text; *from; from++) { + if (*from == tokenStart[tokenStartPos]) { + if (tokenStartPos == (tokenStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = false; + } + else tokenStartPos++; + continue; + } + + if (*from == escStart[escStartPos]) { + if (escStartPos == (escStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = true; + } + else escStartPos++; + continue; + } + + if (inEsc) { + if (*from == escEnd[escEndPos]) { + if (escEndPos == (escEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleEscapeString(&to, token, userData)) && (passThruUnknownEsc)) { + pushString(&to, escStart); + pushString(&to, token); + pushString(&to, escEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (!inEsc) { + if (*from == tokenEnd[tokenEndPos]) { + if (tokenEndPos == (tokenEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleToken(&to, token, userData)) && (passThruUnknownToken)) { + pushString(&to, tokenStart); + pushString(&to, token); + pushString(&to, tokenEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!suspendTextPassThru) + *to++ = *from; + lastTextNode += *from; + } + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp new file mode 100644 index 0000000..d9b1f0e --- /dev/null +++ b/src/modules/filters/thmlfootnotes.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlfootnotes - SWFilter decendant to hide or show footnotes + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlfootnotes.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLFootnotes::on[] = "On"; +const char ThMLFootnotes::off[] = "Off"; +const char ThMLFootnotes::optName[] = "Footnotes"; +const char ThMLFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +ThMLFootnotes::ThMLFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLFootnotes::~ThMLFootnotes() { +} + +void ThMLFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "note", 4)) { + hide = true; + continue; + } + else if (!strncmp(token, "/note", 5)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp new file mode 100644 index 0000000..66d9a20 --- /dev/null +++ b/src/modules/filters/thmlgbf.cpp @@ -0,0 +1,330 @@ +/*************************************************************************** + thmlgbf.cpp - ThML to GBF filter + ------------------- + begin : 1999-10-28 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlgbf.h> + + +ThMLGBF::ThMLGBF() +{ +} + + +char ThMLGBF::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + bool sechead = false; + bool title = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = '<'; + *to++ = 'W'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'T'; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'X'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'x'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "note", 4)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'F'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'f'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sup", 3)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'S'; + *to++ = '>'; + } + else if (!strncmp(token, "/sup", 4)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 's'; + *to++ = '>'; + } + else if (!strnicmp(token, "font color=#ff0000", 18)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'r'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'S'; + *to++ = '>'; + sechead = true; + continue; + } + else if (sechead && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 's'; + *to++ = '>'; + sechead = false; + continue; + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'T'; + *to++ = '>'; + title = true; + continue; + } + else if (title && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 't'; + *to++ = '>'; + title = false; + continue; + } + else if (!strnicmp(token, "br", 2)) { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'L'; + *to++ = '>'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'I'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'B'; + *to++ = '>'; + continue; + case '/': + switch(token[1]) { + case 'P': + case 'p': + *to++ = '<'; + *to++ = 'C'; + *to++ = 'M'; + *to++ = '>'; + continue; + case 'I': + case 'i': // italic end + *to++ = '<'; + *to++ = 'F'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp new file mode 100644 index 0000000..00b8a23 --- /dev/null +++ b/src/modules/filters/thmlheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * thmlheadings - SWFilter decendant to hide or show headings + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlheadings.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLHeadings::on[] = "On"; +const char ThMLHeadings::off[] = "Off"; +const char ThMLHeadings::optName[] = "Headings"; +const char ThMLHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +ThMLHeadings::ThMLHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLHeadings::~ThMLHeadings() { +} + +void ThMLHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "div class=\"sechead\"", 19)) { + hide = true; + continue; + } + if (!strnicmp(token, "div class=\"title\"", 17)) { + hide = true; + continue; + } + else if (hide && !strnicmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp new file mode 100644 index 0000000..9cb8679 --- /dev/null +++ b/src/modules/filters/thmlhtml.cpp @@ -0,0 +1,211 @@ +/*************************************************************************** + thmlhtml.cpp - ThML to HTML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlhtml.h> +#include <swmodule.h> + + +ThMLHTML::ThMLHTML() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("/scripRef", " </a>"); + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); +} + + +bool ThMLHTML::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, "<small><em>"); + for (const char *tok = token + 5; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</em></small>"); + } + else if (token[27] == 'T') { + pushString(buf, "<small><i>"); + for (unsigned int i = 29; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, "</i></small>"); + } + } + else if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + pushString(buf, "<small><em>"); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, "</em></small>"); + } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "<small><em>("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")</em></small>"); + } + else if (!strncmp(token, "scripRef", 8)) { + pushString(buf, "<a href=\""); + for (const char *tok = token + 9; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if(!strncmp(token, "note", 4)) { + pushString(buf, " <font color=\"#800000\"><small>("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp new file mode 100644 index 0000000..ce7e3fd --- /dev/null +++ b/src/modules/filters/thmlhtmlhref.cpp @@ -0,0 +1,269 @@ +/*************************************************************************** + thmlhtmlhref.cpp - ThML to HTML filter with hrefs + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlhtmlhref.h> +#include <swmodule.h> + + +ThMLHTMLHREF::ThMLHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); + addTokenSubstitute("/scripture", "</i> "); +} + + +bool ThMLHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync ", 5)) { + pushString(buf, "<a href=\""); + for (tok = token + 5; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + + //scan for value and add it to the buffer + for (tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + pushString(buf, "</a>"); + } + + else if (!strncmp(token, "scripture ", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "<i>"); + } + + else if (!strncmp(token, "scripRef p", 10) || !strncmp(token, "scripRef v", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "<a href=\""); + for (const char *tok = token + 9; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + // we're starting a scripRef like "<scripRef>John 3:16</scripRef>" + else if (!strcmp(token, "scripRef")) { + userData["inscriptRef"] = "false"; + // let's stop text from going to output + userData["suspendTextPassThru"] = "true"; + } + + // we've ended a scripRef + else if (!strcmp(token, "/scripRef")) { + if (userData["inscriptRef"] == "true") { // like "<scripRef passage="John 3:16">John 3:16</scripRef>" + userData["inscriptRef"] = "false"; + pushString(buf, "</a>"); + } + + else { // like "<scripRef>John 3:16</scripRef>" + pushString(buf, "<a href=\"passage="); + //char *strbuf = (char *)userData["lastTextNode"].c_str(); + pushString(buf, userData["lastTextNode"].c_str()); + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + pushString(buf, userData["lastTextNode"].c_str()); + // let's let text resume to output again + userData["suspendTextPassThru"] = "false"; + pushString(buf, "</a>"); + } + } + + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "<br /><b><i>"); + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "<br /><b><i>"); + } + else if (!strncmp(token, "/div", 4)) { + if (userData["SecHead"] == "true") { + pushString(buf, "</i></b><br />"); + userData["SecHead"] = "false"; + } + } + + else if (!strncmp(token, "sync type=\"Strongs\" value=\"T", 28)) { + pushString(buf, "<a href=\""); + for (tok = token + 5; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 29; *(tok+2); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>"); + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " <small><font color=\"#800000\">("); + } + else { + *(*buf)++ = '<'; + for (const char *tok = token; *tok; tok++) + *(*buf)++ = *tok; + *(*buf)++ = '>'; + //return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp new file mode 100644 index 0000000..33856db --- /dev/null +++ b/src/modules/filters/thmllemma.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * thmllemma - SWFilter decendant to hide or show lemmas + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmllemma.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLLemma::on[] = "On"; +const char ThMLLemma::off[] = "Off"; +const char ThMLLemma::optName[] = "Lemmas"; +const char ThMLLemma::optTip[] = "Toggles Lemmas On and Off if they exist"; + +ThMLLemma::ThMLLemma() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLLemma::~ThMLLemma() { +} + +void ThMLLemma::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLLemma::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLLemma::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want lemmas + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"lemma\" ", 18)) { // Lemma + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a lemma token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp new file mode 100644 index 0000000..f95bede --- /dev/null +++ b/src/modules/filters/thmlmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * thmlmorph - SWFilter decendant to hide or show morph tags + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlmorph.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLMorph::on[] = "On"; +const char ThMLMorph::off[] = "Off"; +const char ThMLMorph::optName[] = "Morphological Tags"; +const char ThMLMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +ThMLMorph::ThMLMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLMorph::~ThMLMorph() { +} + +void ThMLMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"morph\" ", 18)) { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlolb.cpp b/src/modules/filters/thmlolb.cpp new file mode 100644 index 0000000..2b31fab --- /dev/null +++ b/src/modules/filters/thmlolb.cpp @@ -0,0 +1,243 @@ +/*************************************************************************** + thmlolb.cpp - ThML to OLB filter + ------------------- + begin : 2001-05-10 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlolb.h> + + +ThMLOLB::ThMLOLB() +{ +} + + +char ThMLOLB::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + int i; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) + { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '#'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = ' '; + continue; + } + else if (!strncmp(token, "note ", 5)) { + *to++ = '{'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '}'; + continue; + } + else if (!strnicmp(token, "font", 4)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + case '/': + switch(token[1]) { + case 'I': + case 'i': // italic end + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2047) + token[tokpos++] = *from; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlosis.cpp b/src/modules/filters/thmlosis.cpp new file mode 100644 index 0000000..54cd002 --- /dev/null +++ b/src/modules/filters/thmlosis.cpp @@ -0,0 +1,332 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter decendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <thmlosis.h> +#include <swmodule.h> +#include <versekey.h> +#include <stdarg.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +ThMLOSIS::ThMLOSIS() { +} + + +ThMLOSIS::~ThMLOSIS() { +} + + +char ThMLOSIS::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char buf[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart, *textEnd; + char *wordStart, *wordEnd; + bool newText = false; + bool newWord = false; + string tmp; + bool suspendTextPassThru = false; + bool keepToken = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + textStart = from; + wordStart = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = from-1; + wordEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + keepToken = false; + suspendTextPassThru = false; + newWord = true; + + + while (wordStart < (text+maxlen)) { +// if (strchr(" ,;.?!()'\"", *wordStart)) + if (strchr(";,: .?!()'\"", *wordStart)) + wordStart++; + else break; + } + while (wordEnd > wordStart) { + if (strchr(" ,;:.?!()'\"", *wordEnd)) + wordEnd--; + else break; + } + + // Scripture Reference + if (!strncmp(token, "scripRef", 8)) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strncmp(token, "/scripRef", 9)) { + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)+1); + pushString(&to, convertToOSIS(tmp.c_str(), key)); + suspendTextPassThru = false; + } + + // Footnote + if (!strcmp(token, "note")) { + // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); + suspendTextPassThru = true; + newText = true; + } + else if (!strcmp(token, "/note")) { + tmp = "<note type=\"x-StudyNote\"><notePart type=\"x-MainText\">"; + tmp.append(textStart, (int)(textEnd - textStart)+1); + tmp += "</notePart></note>"; + pushString(&to, tmp.c_str()); + suspendTextPassThru = false; + } + + // Figure + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + pushString(&to, "<figure src=\""); + const char *c; + for (c = src;((*c) && (*c != '"')); c++); + + /* uncomment for SWORD absolute path logic + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + end of uncomment for asolute path logic */ + + for (c++;((*c) && (*c != '"')); c++) + *to++ = *c; + + pushString(&to, "\" />"); + return true; + } + + // Strongs numbers + else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + strstrip(val); + sprintf(buf, "<w lemma=\"x-Strong:%s\">", val); + memmove(wordStart+strlen(buf), wordStart, (to-wordStart)+1); + memcpy(wordStart, buf, strlen(buf)); + to+=strlen(buf); + pushString(&to, "</w>"); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; +// tmp = ""; +// tmp.append(textStart, (int)(wordEnd - wordStart)); +// module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + + // Morphology + else if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + strstrip(val); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + strstrip(val); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + if (!strncmp(wordStart, "<w ", 3)) { + + const char *cls = "Unknown", *morph; + + if (module->getEntryAttributes()["Word"][wordstr]["Morph"].size() > 0) { + if (module->getEntryAttributes()["Word"][wordstr]["MorphClass"].size() > 0) + cls = module->getEntryAttributes()["Word"][wordstr]["MorphClass"].c_str(); + morph = module->getEntryAttributes()["Word"][wordstr]["Morph"].c_str(); + + sprintf(buf, "morph=\"x-%s:%s\" ", cls, morph); + memmove(wordStart+3+strlen(buf), wordStart+3, (to-wordStart)+1); + memcpy(wordStart+3, buf, strlen(buf)); + to+=strlen(buf); + } + } + } + + if (!keepToken) { // if we don't want strongs + if (strchr(" ,:;.?!()'\"", from[1])) { + if (lastspace) + to--; + } + if (newText) {textStart = from+1; newText = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newWord = false; } +// if (newWord) {wordStart = to; newWord = false; } + continue; + } + if (intoken) { + if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else { + if (newWord && (*from != ' ')) {wordStart = to; newWord = false; memset(to, 0, 10); } + if (!suspendTextPassThru) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + + VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); + if (vkey) { + char ref[254]; + if (vkey->Verse()) + sprintf(ref, "<verseStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else if (vkey->Book()) + sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + else *ref = 0; + if (*ref) { + memmove(text+strlen(ref), text, maxlen-strlen(ref)-1); + memcpy(text, ref, strlen(ref)); + to+=strlen(ref); + if (vkey->Verse()) { + VerseKey tmp; + tmp = *vkey; + tmp.AutoNormalize(0); + tmp.Headings(1); + sprintf(ref, "<verseEnd ref=\"%s\" />", vkey->getOSISRef()); + pushString(&to, ref); + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Verse(0); + sprintf(ref, "<chapterEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + tmp = MAXCHAPTER; + tmp = MAXVERSE; + if (*vkey == tmp) { + tmp.Chapter(0); + tmp.Verse(0); + sprintf(ref, "<bookEnd ref=\"%s\" />", tmp.getOSISRef()); + pushString(&to, ref); + } + } + } + + else if (vkey->Chapter()) + sprintf(ref, "<chapterStart ref=\"%s\" />", vkey->getOSISRef()); + else sprintf(ref, "<bookStart ref=\"%s\" />", vkey->getOSISRef()); + } + } + *to++ = 0; + *to = 0; + return 0; +} + + +void ThMLOSIS::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +const char *ThMLOSIS::convertToOSIS(const char *inRef, const SWKey *key) { + static string outRef; + + outRef = ""; + + VerseKey defLanguage; + ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true); + const char *startFrag = inRef; + for (int i = 0; i < verses.Count(); i++) { + VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i)); + char buf[5120]; + char frag[800]; + if (element) { + memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1); + frag[((const char *)element->userData - startFrag) + 1] = 0; + startFrag = (const char *)element->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\" refEnd=\"%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag); + } + else { + memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1); + frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0; + startFrag = (const char *)verses.GetElement(i)->userData + 1; + sprintf(buf, "<reference refStart=\"KJV:%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag); + } + outRef+=buf; + } + return outRef.c_str(); +} diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp new file mode 100644 index 0000000..5609f16 --- /dev/null +++ b/src/modules/filters/thmlplain.cpp @@ -0,0 +1,201 @@ +/****************************************************************************** + * + * thmlplain - SWFilter decendant to strip out all ThML tags or convert to + * ASCII rendered symbols. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlplain.h> + + +ThMLPlain::ThMLPlain() { +} + + +char ThMLPlain::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == 10 || *from == 13) + from++; + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = ' '; + *to++ = '<'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = ' '; + *to++ = '('; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = ')'; + continue; + } + if (!strncmp("note", token, 4)) { + *to++ = ' '; + *to++ = '('; + } + else if (!strncmp("br", token, 2)) + *to++ = '\n'; + else if (!strncmp("/p", token, 2)) + *to++ = '\n'; + else if (!strncmp("/note", token, 5)) { + *to++ = ')'; + *to++ = ' '; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + + return 0; +} + + diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp new file mode 100644 index 0000000..8b603b0 --- /dev/null +++ b/src/modules/filters/thmlrtf.cpp @@ -0,0 +1,222 @@ +/*************************************************************************** + thmlrtf.cpp - ThML to RTF filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlrtf.h> + + +ThMLRTF::ThMLRTF() +{ + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("/note", ") }"); + + addTokenSubstitute("br", "\\line "); + addTokenSubstitute("br /", "\\line "); + addTokenSubstitute("i", "{\\i1 "); + addTokenSubstitute("/i", "}"); + addTokenSubstitute("b", "{\\b1 "); + addTokenSubstitute("/b", "}"); + addTokenSubstitute("p", "\\par "); + + //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant + addTokenSubstitute("BR", "\\line "); + addTokenSubstitute("I", "{\\i1 "); + addTokenSubstitute("/I", "}"); + addTokenSubstitute("B", "{\\b1 "); + addTokenSubstitute("/B", "}"); + addTokenSubstitute("P", "\\par "); +} + +bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, " {\\fs15 <"); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ">}"); + } + else if (token[27] == 'T') { + pushString(buf, " {\\fs15 ("); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + } + else if (!strncmp(token, "sync type=\"morph\" ", 18)) { + pushString(buf, " {\\fs15 ("); + for (const char *tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + + pushString(buf, ")}"); + } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "{\\fs15 ("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + else if (!strncmp(token, "scripRef", 8)) { +// pushString(buf, "{\\cf2 #"); + pushString(buf, "<a href=\"\">"); + } + else if (!strncmp(token, "/scripRef", 9)) { + pushString(buf, "</a>"); + } + else if (!strncmp(token, "div", 3)) { + *(*buf)++ = '{'; + if (!strncmp(token, "div class=\"title\"", 17)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + } + else if (!strncmp(token, "/div", 4)) { + *(*buf)++ = '}'; + if (userData["sechead"] == "true") { + pushString(buf, "\\par "); + userData["sechead"] == "false"; + } + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " {\\i1\\fs15 ("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp new file mode 100644 index 0000000..23edd6d --- /dev/null +++ b/src/modules/filters/thmlscripref.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlscripref - SWFilter decendant to hide or show scripture references + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlscripref.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLScripref::on[] = "On"; +const char ThMLScripref::off[] = "Off"; +const char ThMLScripref::optName[] = "Scripture Cross-references"; +const char ThMLScripref::optTip[] = "Toggles Scripture Cross-references On and Off if they exist"; + + +ThMLScripref::ThMLScripref() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLScripref::~ThMLScripref() { +} + +void ThMLScripref::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLScripref::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLScripref::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want scriprefs + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "scripRef", 8)) { + hide = true; + continue; + } + else if (!strnicmp(token, "/scripRef", 9)) { + hide = false; + continue; + } + + // if not a scripref token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp new file mode 100644 index 0000000..ab5a3eb --- /dev/null +++ b/src/modules/filters/thmlstrongs.cpp @@ -0,0 +1,156 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter decendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <thmlstrongs.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif +#include <ctype.h> + +const char ThMLStrongs::on[] = "On"; +const char ThMLStrongs::off[] = "Off"; +const char ThMLStrongs::optName[] = "Strong's Numbers"; +const char ThMLStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +ThMLStrongs::ThMLStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLStrongs::~ThMLStrongs() { +} + +void ThMLStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + char *textStart = text, *textEnd = 0; + string tmp; + bool newText = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + textEnd = to; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + if (atoi((!isdigit(*val))?val+1:val) < 5627) { + // normal strongs number + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + tmp = ""; + tmp.append(textStart, (int)(textEnd - textStart)); + module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; + newText = true; + } + else { + // verb morph + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + + if (!option) { // if we don't want strongs + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + if (newText) {textStart = to; newText = false; } + continue; + } + } + if (module->isProcessEntryAttributes()) { + if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + if (newText) {textStart = to; newText = false; } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp new file mode 100644 index 0000000..a6a52cf --- /dev/null +++ b/src/modules/filters/thmlvariants.cpp @@ -0,0 +1,185 @@ +/****************************************************************************** + * + * thmlvariants - SWFilter decendant to hide or show textual variants + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlvariants.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLVariants::primary[] = "Primary Reading"; +const char ThMLVariants::secondary[] = "Secondary Reading"; +const char ThMLVariants::all[] = "All Readings"; + +const char ThMLVariants::optName[] = "Textual Variants"; +const char ThMLVariants::optTip[] = "Switch between Textual Variants modes"; + + +ThMLVariants::ThMLVariants() { + option = false; + options.push_back(primary); + options.push_back(secondary); + options.push_back(all); +} + + +ThMLVariants::~ThMLVariants() { +} + +void ThMLVariants::setOptionValue(const char *ival) +{ + if (!stricmp(ival, primary)) option = 0; + else if (!stricmp(ival, secondary)) option = 1; + else option = 2; +} + +const char *ThMLVariants::getOptionValue() +{ + if (option == 0) { + return primary; + } + else if (option == 1) { + return secondary; + } + else { + return all; + } +} + +char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option == 0) { //we want primary only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"variant\" class=\"2\"", 28)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + else if (option == 1) { //we want variant only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"variant\" class=\"1\"", 28)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + return 0; +} + + + + + + diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp new file mode 100644 index 0000000..6313792 --- /dev/null +++ b/src/modules/filters/unicodertf.cpp @@ -0,0 +1,75 @@ +/****************************************************************************** + * + * unicodertf - SWFilter decendant to convert a double byte unicode file + * to RTF tags + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <unicodertf.h> + +UnicodeRTF::UnicodeRTF() { +} + + +char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from, *maxto; + int len; + char digit[10]; + short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768) + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + maxto =(unsigned char*)text + maxlen; + + // ------------------------------- + bool lastUni = false; + for (to = (unsigned char*)text; *from && (to <= maxto); from++) { + ch = 0; + if ((*from & 128) != 128) { +// if ((*from == ' ') && (lastUni)) +// *to++ = ' '; + *to++ = *from; + lastUni = false; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '\\'; + *to++ = 'u'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = '?'; + lastUni = true; + } + + if (to != maxto) { + *to++ = 0; + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp new file mode 100644 index 0000000..5a7719f --- /dev/null +++ b/src/modules/filters/utf16utf8.cpp @@ -0,0 +1,95 @@ +/****************************************************************************** + * + * UTF16UTF8 - SWFilter decendant to convert UTF-16 to UTF-8 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf16utf8.h> + +UTF16UTF8::UTF16UTF8() { +} + + +char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned short *from; + unsigned char *to; + + int len; + unsigned long uchar; + unsigned short schar; + + len = 0; + from = (unsigned short*) text; + while (*from) { + len += 2; + from++; + } + + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned short*)&text[maxlen - len]; + } + else + from = (unsigned short*)text; + + + // ------------------------------- + + for (to = (unsigned char*)text; *from; from++) { + uchar = 0; + + if (*from < 0xD800 || *from > 0xDFFF) { + uchar = *from; + } + else if (*from >= 0xD800 && *from <= 0xDBFF) { + uchar = *from; + schar = *(from+1); + if (uchar < 0xDC00 || uchar > 0xDFFF) { + //error, do nothing + continue; + } + uchar &= 0x03ff; + schar &= 0x03ff; + uchar <<= 10; + uchar |= schar; + uchar += 0x10000; + from++; + } + else { + //error, do nothing + continue; + } + + if (uchar < 0x80) { + *to++ = uchar; + } + else if (uchar < 0x800) { + *to++ = 0xc0 | (uchar >> 6); + *to++ = 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x10000) { + *to++ = 0xe0 | (uchar >> 12); + *to++ = 0x80 | (uchar >> 6) & 0x3f; + *to++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *to++ = 0xF0 | (uchar >> 18); + *to++ = 0x80 | (uchar >> 12) & 0x3F; + *to++ = 0x80 | (uchar >> 6) & 0x3F; + *to++ = 0x80 | uchar & 0x3F; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + + + + diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp new file mode 100644 index 0000000..5121f48 --- /dev/null +++ b/src/modules/filters/utf8arshaping.cpp @@ -0,0 +1,48 @@ +/****************************************************************************** +* +* utf8arshaping - SWFilter decendant to perform Arabic shaping on +* UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8arshaping.h> + +UTF8arShaping::UTF8arShaping() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8arShaping::~UTF8arShaping() { + ucnv_close(conv); +} + +char UTF8arShaping::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; + ustr2 = new UChar[len]; + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + + len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp new file mode 100644 index 0000000..8fa7280 --- /dev/null +++ b/src/modules/filters/utf8bidireorder.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** +* +* utf8cnormalizer - SWFilter decendant to perform reordering of UTF-8 +* text to visual order according to Unicode BiDi +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8bidireorder.h> + +UTF8BiDiReorder::UTF8BiDiReorder() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8BiDiReorder::~UTF8BiDiReorder() { + ucnv_close(conv); +} + +char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + ustr2 = new UChar[len]; + + UBiDi* bidi = ubidi_openSized(len + 1, 0, &err); + ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err); + len = ubidi_writeReordered(bidi, ustr2, len, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + ubidi_close(bidi); + +// len = ubidi_writeReverse(ustr, len, ustr2, len, +// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp new file mode 100644 index 0000000..84cb513 --- /dev/null +++ b/src/modules/filters/utf8cantillation.cpp @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * UTF8Cantillation - SWFilter decendant to remove UTF-8 Hebrew cantillation + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8cantillation.h> + + +const char UTF8Cantillation::on[] = "On"; +const char UTF8Cantillation::off[] = "Off"; +const char UTF8Cantillation::optName[] = "Hebrew Cantillation"; +const char UTF8Cantillation::optTip[] = "Toggles Hebrew Cantillation Marks"; + +UTF8Cantillation::UTF8Cantillation() { + option = false; + options.push_back(on); + options.push_back(off); +} + +UTF8Cantillation::~UTF8Cantillation(){}; + +void UTF8Cantillation::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8Cantillation::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8Cantillation::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if (*from != 0xD6) { + if (*from == 0xD7 && *(from + 1) == 0x84) { + from++; + } + else { + *to++ = *from; + } + } + else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) { + *to++ = *from; + from++; + *to++ = *from; + } + else { + from++; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp new file mode 100644 index 0000000..b0e5dc8 --- /dev/null +++ b/src/modules/filters/utf8greekaccents.cpp @@ -0,0 +1,252 @@ +/****************************************************************************** + * + * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8greekaccents.h> + + +const char UTF8GreekAccents::on[] = "On"; +const char UTF8GreekAccents::off[] = "Off"; +const char UTF8GreekAccents::optName[] = "Greek Accents"; +const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents"; + +UTF8GreekAccents::UTF8GreekAccents() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8GreekAccents::~UTF8GreekAccents(){}; + +void UTF8GreekAccents::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8GreekAccents::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + for (from = (unsigned char*)text; *from; from++) { + //first just remove combining characters + if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) + from += 2; + else if (*from == 0xCC && *(from + 1)) { + if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) + from++; + } + else if (*from == 0xCD && *(from + 1) == 0xBA) + from++; + //now converted pre-composed characters to their alphabetic bases, discarding the accents + + //Greek + //capital alpha + else if ((*from == 0xCE && *(from + 1) == 0x86)) { + *to++ = 0xCE; + *to++ = 0x91; + from++; + } + //capital epsilon + else if ((*from == 0xCE && *(from + 1) == 0x88)) { + *to++ = 0xCE; + *to++ = 0x95; + from++; + } + //capital eta + else if ((*from == 0xCE && *(from + 1) == 0x89)) { + *to++ = 0xCE; + *to++ = 0x97; + from++; + } + //capital iota + else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { + *to++ = 0xCE; + *to++ = 0x99; + from++; + } + //capital omicron + else if ((*from == 0xCE && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0x9F; + from++; + } + //capital upsilon + else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from++; + } + //capital omega + else if ((*from == 0xCE && *(from + 1) == 0x8F)) { + *to++ = 0xCE; + *to++ = 0xA9; + from++; + } + + //alpha + else if ((*from == 0xCE && *(from + 1) == 0xAC)) { + *to++ = 0xCE; + *to++ = 0xB1; + from++; + } + //epsilon + else if ((*from == 0xCE && *(from + 1) == 0xAD)) { + *to++ = 0xCE; + *to++ = 0xB5; + from++; + } + //eta + else if ((*from == 0xCE && *(from + 1) == 0xAE)) { + *to++ = 0xCE; + *to++ = 0xB7; + from++; + } + //iota + else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { + *to++ = 0xCE; + *to++ = 0xB9; + from++; + } + //omicron + else if ((*from == 0xCF && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0xBF; + from++; + } + //upsilon + else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { + *to++ = 0xCF; + *to++ = 0x85; + from++; + } + //omega + else if ((*from == 0xCF && *(from + 1) == 0x8E)) { + *to++ = 0xCF; + *to++ = 0x89; + from++; + } + + //Extended Greek + //capital alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) { + *to++ = 0xCE; + *to++ = 0x91; + from+=2; + } + //capital epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { + *to++ = 0xCE; + *to++ = 0x95; + from+=2; + } + //capital eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { + *to++ = 0xCE; + *to++ = 0x97; + from+=2; + } + //capital iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { + *to++ = 0xCE; + *to++ = 0x99; + from+=2; + } + //capital omicron + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) { + *to++ = 0xCE; + *to++ = 0x9F; + from+=2; + } + //capital upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from+=2; + } + //capital omega + else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { + *to++ = 0xCE; + *to++ = 0xA9; + from+=2; + } + //capital rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { + *to++ = 0xCE; + *to++ = 0xA1; + from+=2; + } + + //alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) { + *to++ = 0xCE; + *to++ = 0xB1; + from+=2; + } + //epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { + *to++ = 0xCE; + *to++ = 0xB5; + from+=2; + } + //eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { + *to++ = 0xCE; + *to++ = 0xB7; + from+=2; + } + //iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { + *to++ = 0xCE; + *to++ = 0xB9; + from+=2; + } + //omicron + else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { + *to++ = 0xCE; + *to++ = 0xBF; + from+=2; + } + //upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { + *to++ = 0xCF; + *to++ = 0x85; + from+=2; + } + //omega + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { + *to++ = 0xCF; + *to++ = 0x89; + from+=2; + } + //rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { + *to++ = 0xCF; + *to++ = 0x81; + from+=2; + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + } + return 0; +} + + + + + + diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp new file mode 100644 index 0000000..e5b50e1 --- /dev/null +++ b/src/modules/filters/utf8hebrewpoints.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** + * + * UTF8HebrewPoints - SWFilter decendant to remove UTF-8 Hebrew vowel points + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8hebrewpoints.h> + + +const char UTF8HebrewPoints::on[] = "On"; +const char UTF8HebrewPoints::off[] = "Off"; +const char UTF8HebrewPoints::optName[] = "Hebrew Vowel Points"; +const char UTF8HebrewPoints::optTip[] = "Toggles Hebrew Vowel Points"; + +UTF8HebrewPoints::UTF8HebrewPoints() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8HebrewPoints::~UTF8HebrewPoints(){}; + +void UTF8HebrewPoints::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8HebrewPoints::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8HebrewPoints::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) { + from++; + } + else { + *to++ = *from; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp new file mode 100644 index 0000000..7487815 --- /dev/null +++ b/src/modules/filters/utf8html.cpp @@ -0,0 +1,66 @@ +/****************************************************************************** + * + * utf8html - SWFilter decendant to convert a UTF-8 stream to HTML escapes + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8html.h> + +UTF8HTML::UTF8HTML() { +} + + +char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + char digit[10]; + unsigned long ch; + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + // ------------------------------- + for (to = (unsigned char*)text; *from; from++) { + ch = 0; + if ((*from & 128) != 128) { +// if (*from != ' ') + *to++ = *from; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '&'; + *to++ = '#'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = ';'; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp new file mode 100644 index 0000000..6cc1acd --- /dev/null +++ b/src/modules/filters/utf8latin1.cpp @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8latin1.h> + +UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) { +} + + +char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0xff) { + *to++ = (unsigned char)uchar; + } + else { + *to++ = replacementChar; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp new file mode 100644 index 0000000..df9e090 --- /dev/null +++ b/src/modules/filters/utf8nfc.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfc - SWFilter decendant to perform NFC (canonical composition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8nfc.h> + +UTF8NFC::UTF8NFC() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFC::~UTF8NFC() { + ucnv_close(conv); +} + +char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //canonical composition + unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp new file mode 100644 index 0000000..450cbbf --- /dev/null +++ b/src/modules/filters/utf8nfkd.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfkd - SWFilter decendant to perform NFKD (compatability decomposition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8nfkd.h> + +UTF8NFKD::UTF8NFKD() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFKD::~UTF8NFKD() { + ucnv_close(conv); +} + +char UTF8NFKD::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //compatability decomposition + unorm_normalize(source, len, UNORM_NFKD, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp new file mode 100644 index 0000000..3686717 --- /dev/null +++ b/src/modules/filters/utf8transliterator.cpp @@ -0,0 +1,718 @@ +/****************************************************************************** +* +* utf8transliterators - SWFilter decendant to transliterate between +* ICU-supported scripts. +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8transliterator.h> + +#ifndef _ICUSWORD_ +#include "unicode/resbund.h" +#endif +#include <swlog.h> + +#ifdef _ICU_ +class UnicodeCaster { + const UnicodeString &ustr; +public: + UnicodeCaster(const UnicodeString &ustr):ustr(ustr) {}; operator const char *() { return ""; }; +}; + +#endif +const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { + "Off", + "Latin", + "Basic Latin", + "Beta", + "BGreek", +/* + "Greek", + "Hebrew", + "Cyrillic", + "Arabic", + "Syriac", + "Katakana", + "Hiragana", + "Jamo", + "Hangul", + "Devanagari", + "Tamil", + "Bengali", + "Gurmukhi", + "Gujarati", + "Oriya", + "Telugu", + "Kannada", + "Malayalam", + "Thai", + "Georgian", + "Armenian", + "Ethiopic", + "Gothic", + "Ugaritic", + "Coptic" + */ +}; + +const char UTF8Transliterator::optName[] = "Transliteration"; +const char UTF8Transliterator::optTip[] = "Transliterates between scripts"; + +SWTransMap UTF8Transliterator::transMap; + +#ifndef _ICUSWORD_ + +const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; +const char UTF8Transliterator::SW_RB_RULE[] = "Rule"; +#ifdef SWICU_DATA +const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA; +#else +const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/"; +#endif + +class SWCharString { + public: + inline SWCharString(const UnicodeString& str); + inline ~SWCharString(); + inline operator const char*() { return ptr; } + private: + char buf[128]; + char* ptr; +}; +SWCharString::SWCharString(const UnicodeString& str) { + // TODO This isn't quite right -- we should probably do + // preflighting here to determine the real length. + if (str.length() >= (int32_t)sizeof(buf)) { + ptr = new char[str.length() + 8]; + } else { + ptr = buf; + } + str.extract(0, 0x7FFFFFFF, ptr, ""); +} + +SWCharString::~SWCharString() { + if (ptr != buf) { + delete[] ptr; + } +} + +#endif // _ICUSWORD_ + + +UTF8Transliterator::UTF8Transliterator() { + option = 0; + unsigned long i; + for (i = 0; i < NUMTARGETSCRIPTS; i++) { + options.push_back(optionstring[i]); + } +#ifndef _ICUSWORD_ + utf8status = U_ZERO_ERROR; + Load(utf8status); +#endif +} + +void UTF8Transliterator::Load(UErrorCode &status) +{ +#ifndef _ICUSWORD_ + static const char translit_swordindex[] = "translit_swordindex"; + + UResourceBundle *bundle, *transIDs, *colBund; + bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("no resource index to load"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + return; + } + + transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status); + UParseError parseError; + + int32_t row, maxRows; + if (U_SUCCESS(status)) { + maxRows = ures_getSize(transIDs); + for (row = 0; row < maxRows; row++) { + colBund = ures_getByIndex(transIDs, row, 0, &status); + + if (U_SUCCESS(status) && ures_getSize(colBund) == 4) { + UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status); + UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0); + UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status); + SWLog::systemlog->LogInformation("ok so far"); + + if (U_SUCCESS(status)) { + switch (type) { + case 0x66: // 'f' + case 0x69: // 'i' + // 'file' or 'internal'; + // row[2]=resource, row[3]=direction + { + UBool visible = (type == 0x0066 /*f*/); + UTransDirection dir = + (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) == + 0x0046 /*F*/) ? + UTRANS_FORWARD : UTRANS_REVERSE; + //registry->put(id, resString, dir, visible); + SWLog::systemlog->LogInformation("instantiating %s ...", (const char *)(UnicodeCaster)resString); + registerTrans(id, resString, dir, status); + SWLog::systemlog->LogInformation("done."); + } + break; + case 0x61: // 'a' + // 'alias'; row[2]=createInstance argument + //registry->put(id, resString, TRUE); + break; + } + } + else SWLog::systemlog->LogError("Failed to get resString"); + } + else SWLog::systemlog->LogError("Failed to get row"); + + ures_close(colBund); + } + } + else + { + SWLog::systemlog->LogError("no resource index to load"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + } + + ures_close(transIDs); + ures_close(bundle); + +#endif // _ICUSWORD_ +} + +void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource, + UTransDirection dir, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + SWLog::systemlog->LogInformation("registering ID locally %s", (const char *)(UnicodeCaster)ID); + SWTransData swstuff; + swstuff.resource = resource; + swstuff.dir = dir; + SWTransPair swpair; + swpair.first = ID; + swpair.second = swstuff; + transMap.insert(swpair); +#endif +} + +bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status ) +{ +#ifndef _ICUSWORD_ + Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status); + if (!U_FAILURE(status)) + { + // already have it, clean up and return true + SWLog::systemlog->LogInformation("already have it %s", (const char *)(UnicodeCaster)ID); + delete trans; + return true; + } + status = U_ZERO_ERROR; + + SWTransMap::iterator swelement; + if ((swelement = transMap.find(ID)) != transMap.end()) + { + SWLog::systemlog->LogInformation("found element in map"); + SWTransData swstuff = (*swelement).second; + UParseError parseError; + //UErrorCode status; + //std::cout << "unregistering " << ID << std::endl; + //Transliterator::unregister(ID); + SWLog::systemlog->LogInformation("resource is %s", (const char *)(UnicodeCaster)swstuff.resource); + + // Get the rules + //std::cout << "importing: " << ID << ", " << resource << std::endl; + SWCharString ch(swstuff.resource); + UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status); + const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status); + ures_close(bundle); + //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD, + // parseError, status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("Failed to get rules"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + return false; + } + + + Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir, + parseError,status); + if (U_FAILURE(status)) { + SWLog::systemlog->LogError("Failed to create transliterator"); + SWLog::systemlog->LogError("status %s", u_errorName(status)); + SWLog::systemlog->LogError("Parse error: line %s", parseError.line); + SWLog::systemlog->LogError("Parse error: offset %d", parseError.offset); + SWLog::systemlog->LogError("Parse error: preContext %s", *parseError.preContext); + SWLog::systemlog->LogError("Parse error: postContext %s", *parseError.postContext); + SWLog::systemlog->LogError("rules were"); +// SWLog::systemlog->LogError((const char *)rules); + return false; + } + + Transliterator::registerInstance(trans); + return true; + + //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status); + //return trans; + } + else + { + return false; + } +#else +return true; +#endif // _ICUSWORD_ +} + +Transliterator * UTF8Transliterator::createTrans(const UnicodeString& preID, const UnicodeString& ID, + const UnicodeString& postID, UTransDirection dir, UErrorCode &status ) +{ + // extract id to check from ID xxx;id;xxx + if (checkTrans(ID, status)) { + UnicodeString fullID = preID; + fullID += ID; + fullID += postID; + Transliterator *trans = Transliterator::createInstance(fullID,UTRANS_FORWARD,status); + if (U_FAILURE(status)) { + delete trans; + return NULL; + } + else { + return trans; + } + } + else { + return NULL; + } +} + +void UTF8Transliterator::setOptionValue(const char *ival) +{ + unsigned char i = option = NUMTARGETSCRIPTS; + while (i && stricmp(ival, optionstring[i])) { + i--; + option = i; + } +} + +const char *UTF8Transliterator::getOptionValue() +{ + return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; +} + +char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option) { // if we want transliteration + unsigned long i, j; + UErrorCode err = U_ZERO_ERROR; + UConverter * conv = NULL; + conv = ucnv_open("UTF-8", &err); + + bool compat = false; + bool noNFC = false; + + if (option == SE_JAMO) { + noNFC = true; + } + + // Convert UTF-8 string to UTF-16 (UChars) + j = strlen(text); + int32_t len = (j * 2) + 1; + UChar *source = new UChar[len]; + err = U_ZERO_ERROR; + len = ucnv_toUChars(conv, source, len, text, j, &err); + source[len] = 0; + + // Figure out which scripts are used in the string + unsigned char scripts[NUMSCRIPTS]; + + for (i = 0; i < NUMSCRIPTS; i++) { + scripts[i] = false; + } + + for (i = 0; i < len; i++) { + j = ublock_getCode(source[i]); + switch (j) { + case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; + case UBLOCK_GREEK: scripts[SE_GREEK] = true; break; + case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break; + case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break; + case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break; + case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break; + case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break; + case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break; + case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break; + case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break; + case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break; + case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break; + case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break; + case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break; + case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break; + case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break; + case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break; + case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break; + case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break; + case UBLOCK_THAI: scripts[SE_THAI] = true; break; + case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break; + case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break; + case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break; + case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break; + // needs Unicode 3.2? or 4.0? support from ICU + //case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; + case UBLOCK_CJK_RADICALS_SUPPLEMENT: + case UBLOCK_KANGXI_RADICALS: + case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: + case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: + scripts[SE_HAN] = true; + break; + case UBLOCK_CJK_COMPATIBILITY: + case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: + case UBLOCK_CJK_COMPATIBILITY_FORMS: + scripts[SE_HAN] = true; + compat = true; + break; + case UBLOCK_HANGUL_COMPATIBILITY_JAMO: + scripts[SE_HANGUL] = true; + compat = true; + break; + + default: scripts[SE_LATIN] = true; + } + } + scripts[option] = false; //turn off the reflexive transliteration + + //return if we have no transliteration to do for this text + j = 0; + for (i = 0; !j && i < NUMSCRIPTS; i++) { + if (scripts[i]) j++; + } + if (!j) { + ucnv_close(conv); + return 0; + } + + UnicodeString preid; + if (compat) { + preid = UnicodeString("NFKD;"); + } + else { + preid = UnicodeString("NFD;"); + } + + //Simple X to Latin transliterators + UnicodeString id; + if (scripts[SE_GREEK]) { + if (option == SE_BETA) + id = UnicodeString("Greek-Beta"); + else if (option == SE_BGREEK) + id = UnicodeString("Greek-BGreek"); + else { + if (!strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { + id = UnicodeString("Coptic-Latin"); + } + else { + id = UnicodeString("Greek-Latin"); + } + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_HEBREW]) { + if (option == SE_BETA) + id = UnicodeString("Hebrew-CCAT"); + else if (option == SE_SYRIAC) + id = UnicodeString("Hebrew-Syriac"); + else { + id = UnicodeString("Hebrew-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_CYRILLIC]) { + id = UnicodeString("Cyrillic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARABIC]) { + id = UnicodeString("Arabic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_SYRIAC]) { + if (option == SE_BETA) + id = UnicodeString("Syriac-CCAT"); + else if (option == SE_HEBREW) + id = UnicodeString("Syriac-Hebrew"); + else { + id = UnicodeString("Syriac-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_THAI]) { + id = UnicodeString("Thai-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GEORGIAN]) { + id = UnicodeString("Georgian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARMENIAN]) { + id = UnicodeString("Armenian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ETHIOPIC]) { + id = UnicodeString("Ethiopic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GOTHIC]) { + id = UnicodeString("Gothic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_UGARITIC]) { + id = UnicodeString("Ugaritic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HAN]) { + if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { + id = UnicodeString("Kanji-OnRomaji"); + } + else { + id = UnicodeString("Han-Pinyin"); + } + scripts[SE_LATIN] = true; + } + + // Inter-Kana and Kana to Latin transliterators + if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { + id = UnicodeString("Katakana-Hiragana"); + scripts[SE_HIRAGANA] = true; + } + else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { + id = UnicodeString("Hiragana-Katakana"); + scripts[SE_KATAKANA] = true; + } + else { + if (scripts[SE_KATAKANA]) { + id = UnicodeString("Katakana-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HIRAGANA]) { + id = UnicodeString("Hiragana-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Inter-Korean and Korean to Latin transliterators + if (option == SE_HANGUL && scripts[SE_JAMO]) { + noNFC = false; + scripts[SE_HANGUL] = true; + } + else if (option == SE_JAMO && scripts[SE_HANGUL]) { + noNFC = true; + scripts[SE_JAMO] = true; + } + else { + if (scripts[SE_HANGUL]) { + id = UnicodeString("Hangul-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_JAMO]) { + id = UnicodeString("Jamo-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Indic-Latin + if (option < SE_DEVANAGARI || option > SE_MALAYALAM) { + // Indic to Latin + if (scripts[SE_TAMIL]) { + id = UnicodeString("Tamil-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_BENGALI]) { + id = UnicodeString("Bengali-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GURMUKHI]) { + id = UnicodeString("Gurmukhi-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GUJARATI]) { + id = UnicodeString("Gujarati-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ORIYA]) { + id = UnicodeString("Oriya-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_TELUGU]) { + id = UnicodeString("Telugu-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_KANNADA]) { + id = UnicodeString("Kannada-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_MALAYALAM]) { + id = UnicodeString("Malayalam-Latin"); + scripts[SE_LATIN] = true; + } + } + else { + if (scripts[SE_LATIN]) { + id = UnicodeString("Latin-InterIndic"); + } + if (scripts[SE_DEVANAGARI]) { + id = UnicodeString("Devanagari-InterIndic"); + } + if (scripts[SE_TAMIL]) { + id = UnicodeString("Tamil-InterIndic"); + } + if (scripts[SE_BENGALI]) { + id = UnicodeString("Bengali-InterIndic"); + } + if (scripts[SE_GURMUKHI]) { + id = UnicodeString("Gurmurkhi-InterIndic"); + } + if (scripts[SE_GUJARATI]) { + id = UnicodeString("Gujarati-InterIndic"); + } + if (scripts[SE_ORIYA]) { + id = UnicodeString("Oriya-InterIndic"); + } + if (scripts[SE_TELUGU]) { + id = UnicodeString("Telugu-InterIndic"); + } + if (scripts[SE_KANNADA]) { + id = UnicodeString("Kannada-InterIndic"); + } + if (scripts[SE_MALAYALAM]) { + id = UnicodeString("Malayalam-InterIndic"); + } + + switch(option) { + case SE_DEVANAGARI: + id = UnicodeString("InterIndic-Devanagari"); + break; + case SE_TAMIL: + id = UnicodeString("InterIndic-Tamil"); + break; + case SE_BENGALI: + id = UnicodeString("InterIndic-Bengali"); + break; + case SE_GURMUKHI: + id = UnicodeString("InterIndic-Gurmukhi"); + break; + case SE_GUJARATI: + id = UnicodeString("InterIndic-Gujarati"); + break; + case SE_ORIYA: + id = UnicodeString("InterIndic-Oriya"); + break; + case SE_TELUGU: + id = UnicodeString("InterIndic-Telugu"); + break; + case SE_KANNADA: + id = UnicodeString("InterIndic-Kannada"); + break; + case SE_MALAYALAM: + id = UnicodeString("InterIndic-Malayalam"); + break; + default: + id = UnicodeString("InterIndic-Latin"); + scripts[SE_LATIN] = true; + break; + } + } + + if (scripts[SE_LATIN]) { + switch (option) { + case SE_GREEK: + id = UnicodeString("Latin-Greek"); + break; + case SE_HEBREW: + id = UnicodeString("Latin-Hebrew"); + break; + case SE_CYRILLIC: + id = UnicodeString("Latin-Cyrillic"); + break; + case SE_ARABIC: + id = UnicodeString("Latin-Arabic"); + break; + case SE_SYRIAC: + id = UnicodeString("Latin-Syriac"); + break; + case SE_THAI: + id = UnicodeString("Latin-Thai"); + break; + case SE_GEORGIAN: + id = UnicodeString("Latin-Georgian"); + break; + case SE_ARMENIAN: + id = UnicodeString("Latin-Armenian"); + break; + case SE_ETHIOPIC: + id = UnicodeString("Latin-Ethiopic"); + break; + case SE_GOTHIC: + id = UnicodeString("Latin-Gothic"); + break; + case SE_UGARITIC: + id = UnicodeString("Latin-Ugaritic"); + break; + case SE_COPTIC: + id = UnicodeString("Latin-Coptic"); + break; + case SE_KATAKANA: + id = UnicodeString("Latin-Katakana"); + break; + case SE_HIRAGANA: + id = UnicodeString("Latin-Hiragana"); + break; + case SE_JAMO: + id = UnicodeString("Latin-Jamo"); + break; + case SE_HANGUL: + id = UnicodeString("Latin-Hangul"); + break; + } + } + + if (option == SE_BASICLATIN) { + id = UnicodeString("Any-Latin1"); + } + UnicodeString postid; + if (noNFC) { + postid = UnicodeString(";NFD"); + } else { + postid = UnicodeString(";NFC"); + } + + //UParseError perr; + + err = U_ZERO_ERROR; + //Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err); + Transliterator * trans = createTrans(preid, id, postid, UTRANS_FORWARD, err); + if (trans && !U_FAILURE(err)) { + UnicodeString target = UnicodeString(source); + trans->transliterate(target); + len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err); + if (len < maxlen) *(text + len) = 0; + else *(text + maxlen) = 0; + delete trans; + } + ucnv_close(conv); + } + return 0; +} +#endif + + + diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp new file mode 100644 index 0000000..9aea6fe --- /dev/null +++ b/src/modules/filters/utf8utf16.cpp @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * UTF8UTF16 - SWFilter decendant to convert UTF-8 to UTF-16 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8utf16.h> + +UTF8UTF16::UTF8UTF16() { +} + + +char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + unsigned short schar; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0x1ffff) { + *to++ = (unsigned short)uchar; + } + else { + uchar -= 0x10000; + schar = 0xD800 | (uchar & 0x03ff); + uchar >>= 10; + uchar |= 0xDC00; + *to++ = (unsigned short)schar; + *to++ = (unsigned short)uchar; + } + } + *to = (unsigned short)0; + + return 0; +} + diff --git a/src/modules/genbook/rawgenbook/rawgenbook.cpp b/src/modules/genbook/rawgenbook/rawgenbook.cpp new file mode 100644 index 0000000..e22532a --- /dev/null +++ b/src/modules/genbook/rawgenbook/rawgenbook.cpp @@ -0,0 +1,216 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawGenBook'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawgenbook.h> +#include <rawstr.h> + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawGenBook Constructor - Initializes data for instance of RawGenBook + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawGenBook::RawGenBook(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWGenBook(iname, idesc, idisp, enc, dir, mark, ilang) { + int fileMode = O_RDWR; + char *buf = new char [ strlen (ipath) + 20 ]; + + entryBuf = 0; + path = 0; + stdstr(&path, ipath); + + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + delete key; + key = CreateKey(); + + + sprintf(buf, "%s.bdt", path); + bdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + +} + + +/****************************************************************************** + * RawGenBook Destructor - Cleans up instance of RawGenBook + */ + +RawGenBook::~RawGenBook() { + + FileMgr::systemFileMgr.close(bdtfd); + + if (path) + delete [] path; + + if (entryBuf) + delete [] entryBuf; +} + + +/****************************************************************************** + * RawGenBook::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawGenBook::getRawEntry() { + + __u32 offset = 0; + __u32 size = 0; + + TreeKeyIdx *key = 0; + try { + key = SWDYNAMIC_CAST(TreeKeyIdx, (this->key)); + } + catch ( ... ) {} + + if (!key) { + key = (TreeKeyIdx *)CreateKey(); + (*key) = *(this->key); + } + + if (entryBuf) + delete [] entryBuf; + + int dsize; + key->getUserData(&dsize); + if (dsize > 7) { + memcpy(&offset, key->getUserData(), 4); + offset = swordtoarch32(offset); + + memcpy(&size, key->getUserData() + 4, 4); + size = swordtoarch32(size); + + entrySize = size; // support getEntrySize call + + entryBuf = new char [ (size + 2) * FILTERPAD ]; + *entryBuf = 0; + lseek(bdtfd->getFd(), offset, SEEK_SET); + read(bdtfd->getFd(), entryBuf, size); + + rawFilter(entryBuf, size, key); + + if (!isUnicode()) + RawStr::preptext(entryBuf); + } + else { + entryBuf = new char [2]; + entryBuf[0] = 0; + entryBuf[1] = 0; + entrySize = 0; + } + + if (key != this->key) // free our key if we created a VerseKey + delete key; + + return entryBuf; +} + + +void RawGenBook::setEntry(const char *inbuf, long len) { + + __u32 offset = archtosword32(lseek(bdtfd->getFd(), 0, SEEK_END)); + __u32 size = 0; + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + + char userData[8]; + + if (!len) + len = strlen(inbuf); + + write(bdtfd->getFd(), inbuf, len); + + size = archtosword32(len); + memcpy(userData, &offset, 4); + memcpy(userData+4, &size, 4); + key->setUserData(userData, 8); + key->save(); +} + + +void RawGenBook::linkEntry(const SWKey *inkey) { + TreeKeyIdx *srckey = 0; + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(TreeKeyIdx, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) { + srckey = (TreeKeyIdx *)CreateKey(); + (*srckey) = *inkey; + } + + key->setUserData(srckey->getUserData(), 8); + key->save(); + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawGenBook::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawGenBook::deleteEntry() { + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + key->remove(); +} + + +char RawGenBook::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd; + signed char retval; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.bdt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + retval = TreeKeyIdx::create(path); + delete [] path; + return retval; +} + + +SWKey *RawGenBook::CreateKey() { + TreeKeyIdx *newKey = new TreeKeyIdx(path); + return newKey; +} diff --git a/src/modules/genbook/swgenbook.cpp b/src/modules/genbook/swgenbook.cpp new file mode 100644 index 0000000..589b0b9 --- /dev/null +++ b/src/modules/genbook/swgenbook.cpp @@ -0,0 +1,27 @@ +/****************************************************************************** + * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all + * types of Lexicon and Dictionary modules (hence the 'LD'). + */ + +#include <swgenbook.h> + + +/****************************************************************************** + * SWLD Constructor - Initializes data for instance of SWLD + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWGenBook::SWGenBook(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, "Generic Books", enc, dir, mark, ilang) { +} + + +/****************************************************************************** + * SWLD Destructor - Cleans up instance of SWLD + */ + +SWGenBook::~SWGenBook() { +} + diff --git a/src/modules/lexdict/rawld/rawld.cpp b/src/modules/lexdict/rawld/rawld.cpp new file mode 100644 index 0000000..058679a --- /dev/null +++ b/src/modules/lexdict/rawld/rawld.cpp @@ -0,0 +1,173 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawstr.h> +#include <rawld.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawLD::RawLD(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +RawLD::~RawLD() +{ +} + + +/****************************************************************************** + * RawLD::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void RawLD::strongsPad(char *buf) +{ + const char *check; + int size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * RawLD::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char RawLD::getEntry(long away) +{ + long start = 0; + unsigned short size = 0; + char *idxbuf = 0; + char retval = 0; + + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + if (!(retval = findoffset(buf, &start, &size, away))) { + readtext(start, &size, &idxbuf, &entrybuf); + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + delete [] idxbuf; + } + else { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ 5 ]; + *entrybuf = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * RawLD::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *RawLD::getRawEntry() { + + char ret = getEntry(); + if (!ret) { + if (!isUnicode()) + preptext(entrybuf); + } + else error = ret; + + return entrybuf; +} + + +/****************************************************************************** + * RawLD::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawLD::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void RawLD::setEntry(const char *inbuf, long len) { + settext(*key, inbuf, len); +} + + +void RawLD::linkEntry(const SWKey *inkey) { + linkentry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawLD::deleteEntry() { + settext(*key, ""); +} diff --git a/src/modules/lexdict/rawld4/rawld4.cpp b/src/modules/lexdict/rawld4/rawld4.cpp new file mode 100644 index 0000000..1bdf22f --- /dev/null +++ b/src/modules/lexdict/rawld4/rawld4.cpp @@ -0,0 +1,172 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawstr4.h> +#include <rawld4.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawLD4::RawLD4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr4(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +RawLD4::~RawLD4() +{ +} + + +/****************************************************************************** + * RawLD4::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void RawLD4::strongsPad(char *buf) +{ + const char *check; + long size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * RawLD4::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char RawLD4::getEntry(long away) +{ + long start = 0; + unsigned long size = 0; + char *idxbuf = 0; + char retval = 0; + + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findoffset(buf, &start, &size, away))) { + readtext(start, &size, &idxbuf, &entrybuf); + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + delete [] idxbuf; + } + else { + entrybuf = new char [ 5 ]; + *entrybuf = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * RawLD4::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *RawLD4::getRawEntry() { + + char ret = getEntry(); + if (!ret) { + if (!isUnicode()) + preptext(entrybuf); + } + else error = ret; + + return entrybuf; +} + + +/****************************************************************************** + * RawLD4::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawLD4::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void RawLD4::setEntry(const char *inbuf, long len) { + setText(*key, inbuf, len); +} + + +void RawLD4::linkEntry(const SWKey *inkey) { + linkentry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawLD4::deleteEntry() { + setText(*key, ""); +} diff --git a/src/modules/lexdict/swld.cpp b/src/modules/lexdict/swld.cpp new file mode 100644 index 0000000..d28a5b8 --- /dev/null +++ b/src/modules/lexdict/swld.cpp @@ -0,0 +1,76 @@ +/****************************************************************************** + * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all + * types of Lexicon and Dictionary modules (hence the 'LD'). + */ + +#include <swld.h> + + +/****************************************************************************** + * SWLD Constructor - Initializes data for instance of SWLD + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWLD::SWLD(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, "Lexicons / Dictionaries", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); + entkeytxt = new char [1]; + *entkeytxt = 0; +} + + +/****************************************************************************** + * SWLD Destructor - Cleans up instance of SWLD + */ + +SWLD::~SWLD() +{ + if (entkeytxt) + delete [] entkeytxt; +} + + +/****************************************************************************** + * SWLD::KeyText - Sets/gets module KeyText, getting from saved text if key is + * persistent + * + * ENT: ikeytext - value which to set keytext + * [0] - only get + * + * RET: pointer to keytext + */ + +const char *SWLD::KeyText(const char *ikeytext) +{ + if (key->Persist() && !ikeytext) { + getRawEntry(); // force module key to snap to entry + return entkeytxt; + } + else return SWModule::KeyText(ikeytext); +} + + +/****************************************************************************** + * SWLD::setPosition(SW_POSITION) - Positions this key if applicable + */ + +void SWLD::setPosition(SW_POSITION p) { + if (!key->Traversable()) { + switch (p) { + case POS_TOP: + *key = ""; + break; + case POS_BOTTOM: + *key = "zzzzzzzzz"; + break; + } + } + else *key = p; + getRawEntry(); +} + + diff --git a/src/modules/lexdict/zld/zld.cpp b/src/modules/lexdict/zld/zld.cpp new file mode 100644 index 0000000..047effa --- /dev/null +++ b/src/modules/lexdict/zld/zld.cpp @@ -0,0 +1,172 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <zstr.h> +#include <zld.h> + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +zLD::zLD(const char *ipath, const char *iname, const char *idesc, long blockCount, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zStr(ipath, -1, blockCount, icomp), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) { + +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +zLD::~zLD() { + +} + + +/****************************************************************************** + * zLD::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void zLD::strongsPad(char *buf) { + const char *check; + long size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * zLD::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char zLD::getEntry(long away) { + char *idxbuf = 0; + char *ebuf = 0; + char retval = 0; + long index; + unsigned long size; + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findKeyIndex(buf, &index, away))) { + getText(index, &idxbuf, &ebuf); + size = strlen(ebuf) + 1; + entrybuf = new char [ size * FILTERPAD ]; + strcpy(entrybuf, ebuf); + + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + free(idxbuf); + free(ebuf); + } + else { + entrybuf = new char [ 5 ]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * zLD::getRawEntry - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *zLD::getRawEntry() { + if (!getEntry() && !isUnicode()) { + prepText(entrybuf); + } + + return entrybuf; +} + + +/****************************************************************************** + * zLD::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void zLD::increment(int steps) { + char tmperror; + + if (key->Traversable()) { + *key += steps; + error = key->Error(); + steps = 0; + } + + tmperror = (getEntry(steps)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; +} + + +void zLD::setEntry(const char *inbuf, long len) { + setText(*key, inbuf, len); +} + + +void zLD::linkEntry(const SWKey *inkey) { + zStr::linkEntry(*key, *inkey); +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zLD::deleteEntry() { + setText(*key, ""); +} diff --git a/src/modules/swmodule.cpp b/src/modules/swmodule.cpp new file mode 100644 index 0000000..f37df27 --- /dev/null +++ b/src/modules/swmodule.cpp @@ -0,0 +1,661 @@ +/****************************************************************************** + * swmodule.cpp -code for base class 'module'. Module is the basis for all + * types of modules (e.g. texts, commentaries, maps, lexicons, + * etc.) + */ + +#include <string.h> +#include <swmodule.h> +#include <utilfuns.h> +#include <regex.h> // GNU +#include <swfilter.h> +#include <versekey.h> // KLUDGE for Search +#ifndef _MSC_VER +#include <iostream> +#endif + +SWDisplay SWModule::rawdisp; +void SWModule::nullPercent(char percent, void *percentUserData) {} + +/****************************************************************************** + * SWModule Constructor - Initializes data for instance of SWModule + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + * imodtype - Type of Module (All modules will be displayed with + * others of same type under their modtype heading + * unicode - if this module is unicode + */ + +SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char* imodlang) { + key = CreateKey(); + entrybuf = new char [1]; + *entrybuf = 0; + config = &ownConfig; + entrybufallocsize = 0; + modname = 0; + error = 0; + moddesc = 0; + modtype = 0; + modlang = 0; + this->encoding = encoding; + this->direction = direction; + this->markup = markup; + entrySize= -1; + disp = (idisp) ? idisp : &rawdisp; + stdstr(&modname, imodname); + stdstr(&moddesc, imoddesc); + stdstr(&modtype, imodtype); + stdstr(&modlang, imodlang); + stripFilters = new FilterList(); + rawFilters = new FilterList(); + renderFilters = new FilterList(); + optionFilters = new FilterList(); + encodingFilters = new FilterList(); + skipConsecutiveLinks = true; + procEntAttr = true; +} + + +/****************************************************************************** + * SWModule Destructor - Cleans up instance of SWModule + */ + +SWModule::~SWModule() +{ + if (entrybuf) + delete [] entrybuf; + if (modname) + delete [] modname; + if (moddesc) + delete [] moddesc; + if (modtype) + delete [] modtype; + if (modlang) + delete [] modlang; + + if (key) { + if (!key->Persist()) + delete key; + } + + stripFilters->clear(); + rawFilters->clear(); + renderFilters->clear(); + optionFilters->clear(); + encodingFilters->clear(); + entryAttributes.clear(); + + delete stripFilters; + delete rawFilters; + delete renderFilters; + delete optionFilters; + delete encodingFilters; +} + + +/****************************************************************************** + * SWModule::CreateKey - Allocates a key of specific type for module + * + * RET: pointer to allocated key + */ + +SWKey *SWModule::CreateKey() +{ + return new SWKey(); +} + + +/****************************************************************************** + * SWModule::Error - Gets and clears error status + * + * RET: error status + */ + +char SWModule::Error() +{ + char retval = error; + + error = 0; + return retval; +} + + +/****************************************************************************** + * SWModule::Name - Sets/gets module name + * + * ENT: imodname - value which to set modname + * [0] - only get + * + * RET: pointer to modname + */ + +char *SWModule::Name(const char *imodname) +{ + return stdstr(&modname, imodname); +} + + +/****************************************************************************** + * SWModule::Description - Sets/gets module description + * + * ENT: imoddesc - value which to set moddesc + * [0] - only get + * + * RET: pointer to moddesc + */ + +char *SWModule::Description(const char *imoddesc) +{ + return stdstr(&moddesc, imoddesc); +} + + +/****************************************************************************** + * SWModule::Type - Sets/gets module type + * + * ENT: imodtype - value which to set modtype + * [0] - only get + * + * RET: pointer to modtype + */ + +char *SWModule::Type(const char *imodtype) +{ + return stdstr(&modtype, imodtype); +} + +/****************************************************************************** + * SWModule::Direction - Sets/gets module direction + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char direction + */ +char SWModule::Direction(signed char newdir) { + if (newdir != -1) + direction = newdir; + return direction; +} + +/****************************************************************************** + * SWModule::Encoding - Sets/gets module encoding + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char encoding + */ +char SWModule::Encoding(signed char newenc) { + if (newenc != -1) + encoding = newenc; + return encoding; +} + +/****************************************************************************** + * SWModule::Markup - Sets/gets module markup + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char markup + */ +char SWModule::Markup(signed char newmark) { + if (newmark != -1) + markup = newmark; + return markup; +} + + +/****************************************************************************** + * SWModule::Lang - Sets/gets module language + * + * ENT: imodlang - value which to set modlang + * [0] - only get + * + * RET: pointer to modname + */ + +char *SWModule::Lang(const char *imodlang) +{ + return stdstr(&modlang, imodlang); +} + + +/****************************************************************************** + * SWModule::Disp - Sets/gets display driver + * + * ENT: idisp - value which to set disp + * [0] - only get + * + * RET: pointer to disp + */ + +SWDisplay *SWModule::Disp(SWDisplay *idisp) +{ + if (idisp) + disp = idisp; + + return disp; +} + + +/****************************************************************************** + * SWModule::Display - Calls this modules display object and passes itself + * + * RET: error status + */ + +char SWModule::Display() +{ + disp->Display(*this); + return 0; +} + + +/****************************************************************************** + * SWModule::SetKey - Sets a key to this module for position to a particular + * record or set of records + * + * ENT: ikey - key with which to set this module + * + * RET: error status + */ + +char SWModule::SetKey(const SWKey &ikey) { + return SetKey(&ikey); +} + +char SWModule::SetKey(const SWKey *ikey) +{ + SWKey *oldKey = 0; + + if (key) { + if (!key->Persist()) // if we have our own copy + oldKey = key; + } + + if (!ikey->Persist()) { // if we are to keep our own copy + key = CreateKey(); + *key = *ikey; + } + else key = (SWKey *)ikey; // if we are to just point to an external key + + if (oldKey) + delete oldKey; + + return 0; +} + + +/****************************************************************************** + * SWModule::KeyText - Sets/gets module KeyText + * + * ENT: ikeytext - value which to set keytext + * [0] - only get + * + * RET: pointer to keytext + */ + +const char *SWModule::KeyText(const char *ikeytext) +{ + if (ikeytext) + SetKey(ikeytext); + + return *key; +} + + +/****************************************************************************** + * SWModule::setPosition(SW_POSITION) - Positions this modules to an entry + * + * ENT: p - position (e.g. TOP, BOTTOM) + * + * RET: *this + */ + +void SWModule::setPosition(SW_POSITION p) { + *key = p; + char saveError = key->Error(); + + switch (p) { + case POS_TOP: + (*this)++; + (*this)--; + break; + + case POS_BOTTOM: + (*this)--; + (*this)++; + break; + } + + error = saveError; +} + + +/****************************************************************************** + * SWModule::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void SWModule::increment(int steps) { + (*key) += steps; + error = key->Error(); +} + + +/****************************************************************************** + * SWModule::decrement - Decrements module key a number of entries + * + * ENT: decrement - Number of entries to jump backward + * + * RET: *this + */ + +void SWModule::decrement(int steps) { + (*key) -= steps; + error = key->Error(); +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &SWModule::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + SWKey *savekey = 0; + SWKey *searchkey = 0; + regex_t preg; + SWKey textkey; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + const char *sres; + terminateSearch = false; + char perc = 1; + bool savePEA = isProcessEntryAttributes(); + + processEntryAttributes(false); + listkey.ClearList(); + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (scope)?scope->clone():(key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + (*percent)(perc, percentUserData); + // MAJOR KLUDGE: VerseKey::Index still return index within testament. + // VerseKey::NewIndex should be moved to Index and Index should be some + // VerseKey specific name + VerseKey *vkcheck = 0; +#ifndef _WIN32_WCE + try { +#endif + vkcheck = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch (...) {} +#endif + // end MAJOR KLUDGE + + *this = BOTTOM; + // fix below when we find out the bug + long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index(); + if (!highIndex) + highIndex = 1; // avoid division by zero errors. + *this = TOP; + if (searchType >= 0) { + flags |=searchType|REG_NOSUB|REG_EXTENDED; + regcomp(&preg, istr, flags); + } + + (*percent)(++perc, percentUserData); + if (searchType == -2) { + wordBuf = (char *)calloc(sizeof(char), strlen(istr) + 1); + strcpy(wordBuf, istr); + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + } + + perc = 5; + (*percent)(perc, percentUserData); + + while (!Error() && !terminateSearch) { + + + long mindex = 0; + if (vkcheck) + mindex = vkcheck->NewIndex(); + else mindex = key->Index(); + float per = (float)mindex / highIndex; + per *= 93; + per += 5; + char newperc = (char)per; +// char newperc = (char)(5+(93*(((float)((vkcheck)?vkcheck->NewIndex():key->Index()))/highIndex))); + if (newperc > perc) { + perc = newperc; + (*percent)(perc, percentUserData); + } + else if (newperc < perc) { +#ifndef _MSC_VER + std::cerr << "Serious error: new percentage complete is less than previous value\n"; + std::cerr << "using vk? " << ((vkcheck)?"yes":"no") << "\n"; + std::cerr << "index: " << ((vkcheck)?vkcheck->NewIndex():key->Index()) << "\n"; + std::cerr << "highIndex: " << highIndex << "\n"; + std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n"; + std::cerr << "perc == " << (int )perc << "% \n"; +#endif + } + if (searchType >= 0) { + if (!regexec(&preg, StripText(), 0, 0, 0)) { + textkey = KeyText(); + listkey << textkey; + } + } + else { + if (searchType == -1) { + sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(StripText(), istr) : strstr(StripText(), istr); + if (sres) { + textkey = KeyText(); + listkey << textkey; + } + } + if (searchType == -2) { + int i; + const char *stripBuf = StripText(); + for (i = 0; i < wordCount; i++) { + sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(stripBuf, words[i]) : strstr(stripBuf, words[i]); + if (!sres) + break; + } + if (i == wordCount) { + textkey = KeyText(); + listkey << textkey; + } + + } + } + (*this)++; + } + if (searchType >= 0) + regfree(&preg); + + if (searchType == -2) { + free(words); + free(wordBuf); + } + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + listkey = TOP; + processEntryAttributes(savePEA); + (*percent)(100, percentUserData); + + return listkey; +} + + +/****************************************************************************** + * SWModule::StripText() - calls all stripfilters on current text + * + * ENT: buf - buf to massage instead of this modules current text + * len - max len of buf + * + * RET: this module's text at specified key location massaged by Strip filters + */ + +const char *SWModule::StripText(char *buf, int len) +{ + return RenderText(buf, len, false); +} + + +/****************************************************************************** + * SWModule::RenderText - calls all renderfilters on current text + * + * ENT: buf - buffer to Render instead of current module position + * + * RET: listkey set to verses that contain istr + */ + + const char *SWModule::RenderText(char *buf, int len, bool render) { + entryAttributes.clear(); + char *tmpbuf = (buf) ? buf : getRawEntry(); + SWKey *key = 0; + static char *null = ""; + + if (tmpbuf) { + unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) * FILTERPAD : len; + if (size > 0) { + key = (SWKey *)*this; + + optionFilter(tmpbuf, size, key); + + if (render) { + renderFilter(tmpbuf, size, key); + encodingFilter(tmpbuf, size, key); + } + else stripFilter(tmpbuf, size, key); + } + } + else { + tmpbuf = null; + } + + return tmpbuf; +} + + +/****************************************************************************** + * SWModule::RenderText - calls all renderfilters on current text + * + * ENT: tmpKey - key to use to grab text + * + * RET: this module's text at specified key location massaged by RenderFilers + */ + + const char *SWModule::RenderText(SWKey *tmpKey) +{ + SWKey *savekey; + const char *retVal; + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + SetKey(*tmpKey); + + retVal = RenderText(); + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + return retVal; +} + + +/****************************************************************************** + * SWModule::StripText - calls all StripTextFilters on current text + * + * ENT: tmpKey - key to use to grab text + * + * RET: this module's text at specified key location massaged by Strip filters + */ + +const char *SWModule::StripText(SWKey *tmpKey) +{ + SWKey *savekey; + const char *retVal; + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + SetKey(*tmpKey); + + retVal = StripText(); + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + return retVal; +} + + +const char *SWModule::getConfigEntry(const char *key) const { + ConfigEntMap::iterator it = config->find(key); + return (it != config->end()) ? it->second.c_str() : 0; +} + + +void SWModule::setConfig(ConfigEntMap *config) { + this->config = config; +} diff --git a/src/modules/texts/rawgbf/rawgbf.cpp b/src/modules/texts/rawgbf/rawgbf.cpp new file mode 100644 index 0000000..dd2fd47 --- /dev/null +++ b/src/modules/texts/rawgbf/rawgbf.cpp @@ -0,0 +1,84 @@ +/****************************************************************************** + * rawgbf.cpp - code for class 'RawGBF'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawgbf.h> + + +/****************************************************************************** + * RawGBF Constructor - Initializes data for instance of RawGBF + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawGBF::RawGBF(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp) : SWText(iname, idesc, idisp), RawVerse(ipath) +{ +} + + +/****************************************************************************** + * RawGBF Destructor - Cleans up instance of RawGBF + */ + +RawGBF::~RawGBF() +{ +} + + +/****************************************************************************** + * RawGBF::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +RawGBF::operator char*() +{ + long start; + unsigned short size; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ size * 3 ]; // extra for conversion to RTF or other. + + readtext(key->Testament(), start, size + 1, entrybuf); + preptext(entrybuf); + RenderText(entrybuf, size * 3); + + if (key != this->key) + delete key; + + return entrybuf; +} diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp new file mode 100644 index 0000000..c2214f8 --- /dev/null +++ b/src/modules/texts/rawtext/rawtext.cpp @@ -0,0 +1,580 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawText'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawtext.h> + +#include <map> +#include <list> +#include <algorithm> +#include <regex.h> // GNU + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawText Constructor - Initializes data for instance of RawText + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWText(iname, idesc, idisp, enc, dir, mark, ilang), + RawVerse(ipath) { + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + for (int loop = 0; loop < 2; loop++) { + fastSearch[loop] = 0; + string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); + if (!access(fastidxname.c_str(), 04)) { + fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); + if (!access(fastidxname.c_str(), 04)) + fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str()); + } + } +} + + +/****************************************************************************** + * RawText Destructor - Cleans up instance of RawText + */ + +RawText::~RawText() +{ + if (fastSearch[0]) + delete fastSearch[0]; + + if (fastSearch[1]) + delete fastSearch[1]; +} + + +/****************************************************************************** + * RawText::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawText::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) { } + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + readtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +signed char RawText::createSearchFramework() { + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + // dictionary holds words associated with a list + // containing every module position that contains + // the word. [0] Old Testament; [1] NT + map < string, list<long> > dictionary[2]; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + while (!Error()) { + long index = lkey->Index(); + wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1); + strcpy(wordBuf, StripText()); + + // grab each word from the text + word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); + while (word) { + + // make word upper case + toupperstr(word); + + // lookup word in dictionary (or make entry in dictionary + // for this word) and add this module position (index) to + // the word's associated list of module positions + dictionary[lkey->Testament()-1][word].push_back(index); + word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>"); + } + free(wordBuf); + (*this)++; + } + + // reposition module back to where it was before we were called + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + + // --------- Let's output an index from our dictionary ----------- + int datfd; + int idxfd; + map < string, list<long> >::iterator it; + list<long>::iterator it2; + unsigned long offset, entryoff; + unsigned short size; + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + // for old and new testament do... + for (int loop = 0; loop < 2; loop++) { + if ((datfd = open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) + return -1; + if ((idxfd = open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) { + close(datfd); + return -1; + } + + // iterate thru each word in the dictionary + for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) { + printf("%s: ", it->first.c_str()); + + // get our current offset in our word.dat file and write this as the start + // of the next entry in our database + offset = lseek(datfd, 0, SEEK_CUR); + write(idxfd, &offset, 4); + + // write our word out to the word.dat file, delineating with a \n + write(datfd, it->first.c_str(), strlen(it->first.c_str())); + write(datfd, "\n", 1); + + // force our mod position list for this word to be unique (remove + // duplicates that may exist if the word was found more than once + // in the verse + it->second.unique(); + + // iterate thru each mod position for this word and output it to + // our database + unsigned short count = 0; + for (it2 = it->second.begin(); it2 != it->second.end(); it2++) { + entryoff= *it2; + write(datfd, &entryoff, 4); + count++; + } + + // now see what our new position is in our word.dat file and + // determine the size of this database entry + size = lseek(datfd, 0, SEEK_CUR) - offset; + + // store the size of this database entry + write(idxfd, &size, 2); + printf("%d entries (size: %d)\n", count, size); + } + close(datfd); + close(idxfd); + } + return 0; +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + listkey.ClearList(); + + if ((fastSearch[0]) && (fastSearch[1])) { + + switch (searchType) { + case -2: { + + if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to + // ignore case + break; // can't handle fast case sensitive searches + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0; + try { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listkey; + } + + SWKey saveKey = *testKeyType; // save current place + + char error = 0; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + long start; + unsigned short size; + char *idxbuf = 0; + char *datbuf = 0; + list <long> indexes; + list <long> indexes2; + VerseKey vk; + vk = TOP; + + (*percent)(10, percentUserData); + + // toupper our copy of search string + stdstr(&wordBuf, istr); + toupperstr(wordBuf); + + // get list of individual words + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + + (*percent)(20, percentUserData); + + // clear our result set + indexes.erase(indexes.begin(), indexes.end()); + + // search both old and new testament indexes + for (int j = 0; j < 2; j++) { + // iterate thru each word the user passed to us. + for (int i = 0; i < wordCount; i++) { + + // clear this word's result set + indexes2.erase(indexes2.begin(), indexes2.end()); + error = 0; + + // iterate thru every word in the database that starts + // with our search word + for (int away = 0; !error; away++) { + idxbuf = 0; + + // find our word in the database and jump ahead _away_ + error = fastSearch[j]->findoffset(words[i], &start, &size, away); + + // get the word from the database + fastSearch[j]->getidxbufdat(start, &idxbuf); + + // check to see if it starts with our target word + if (strlen(idxbuf) > strlen(words[i])) + idxbuf[strlen(words[i])] = 0; +// else words[i][strlen(idxbuf)] = 0; + if (!strcmp(idxbuf, words[i])) { + + // get data for this word from database + free(idxbuf); + idxbuf = 0; + datbuf = 0; + fastSearch[j]->readtext(start, &size, &idxbuf, &datbuf); + + // we know that the data consists of sizof(long) + // records each a valid module position that constains + // this word + // + // iterate thru each of these module positions + long *keyindex = (long *)datbuf; + while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) { + if (i) { // if we're not on our first word + + // check to see if this word is already in the result set. + // This is our AND functionality + if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end()) + // add to new result set + indexes2.push_back(*keyindex); + } + else indexes2.push_back(*keyindex); + keyindex++; + } + free(datbuf); + } + else error = 1; // no more matches + free(idxbuf); + } + + // make new result set final result set + indexes = indexes2; + + percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData); + } + + // indexes contains our good verses, lets return them in a listkey + indexes.sort(); + + // iterate thru each good module position that meets the search + for (list <long>::iterator it = indexes.begin(); it != indexes.end(); it++) { + + // set a temporary verse key to this module position + vk.Testament(j+1); + vk.Error(); + vk.Index(*it); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listkey << (const char *) vk; + } + else listkey << (const char*) vk; + } + } + (*percent)(98, percentUserData); + + free(words); + free(wordBuf); + + *testKeyType = saveKey; // set current place back to original + + listkey = TOP; + (*percent)(100, percentUserData); + return listkey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listkey; + } + + // if we don't support this search, fall back to base class + return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + + +void RawText::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void RawText::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawText::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawText::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + +/****************************************************************************** + * RawText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} diff --git a/src/modules/texts/swtext.cpp b/src/modules/texts/swtext.cpp new file mode 100644 index 0000000..5f6b424 --- /dev/null +++ b/src/modules/texts/swtext.cpp @@ -0,0 +1,40 @@ +/****************************************************************************** + * swtext.cpp - code for base class 'SWText'- The basis for all text modules + */ + +#include <swtext.h> +#include <listkey.h> + + +/****************************************************************************** + * SWText Constructor - Initializes data for instance of SWText + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWText::SWText(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, "Biblical Texts", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); + skipConsecutiveLinks = false; +} + + +/****************************************************************************** + * SWText Destructor - Cleans up instance of SWText + */ + +SWText::~SWText() { +} + + +/****************************************************************************** + * SWText CreateKey - Create the correct key (VerseKey) for use with SWText + */ + +SWKey *SWText::CreateKey() +{ + return new VerseKey(); +} diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp new file mode 100644 index 0000000..c774693 --- /dev/null +++ b/src/modules/texts/ztext/ztext.cpp @@ -0,0 +1,309 @@ +/****************************************************************************** + * ztext.cpp - code for class 'zText'- a module that reads compressed text + * files: ot and nt using indexs ??.vss + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +//#include <rawverse.h> +#include <ztext.h> +//#include <zlib.h> + + +/****************************************************************************** + * zText Constructor - Initializes data for instance of zText + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ +{ + blockType = iblockType; + lastWriteKey = 0; +} + + +/****************************************************************************** + * zText Destructor - Cleans up instance of zText + */ + +zText::~zText() +{ + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; +} + + +/****************************************************************************** + * zText::getRawEntry - Returns the current verse buffer + * + * RET: buffer with verse + */ + +char *zText::getRawEntry() +{ +/* + long start; + unsigned long size; + unsigned long destsize; + char *tmpbuf; + char *dest; + VerseKey *lkey = (VerseKey *) SWModule::key; + char sizebuf[3]; + + lkey->Verse(0); + if (chapcache != lkey->Index()) { + findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); + readtext(lkey->Testament(), start, 3, sizebuf); + memcpy(&size, sizebuf, 2); + tmpbuf = new char [ size + 1 ]; + readtext(lkey->Testament(), start + 2, size + 1 , tmpbuf); + //zBuf(&size, tmpbuf); + dest = new char [ (size*4) + 1 ]; + uncompress((Bytef *)dest, &destsize, (Bytef *) tmpbuf, size); + chapcache = lkey->Index(); + delete [] tmpbuf; + } + + //findoffset(key->Testament(), key->Index(), &start, &size); + findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); + + if (versebuf) + delete [] versebuf; + versebuf = new char [ size + 1 ]; + //memcpy(versebuf, Buf(), size); + memcpy(versebuf, dest, destsize); + delete [] dest; + + preptext(versebuf); + + return versebuf; +*/ + + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + //printf ("zText char *\n"); + + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + //printf ("checking cache\n"); + //printf ("finding offset\n"); + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + //printf ("deleting previous buffer\n"); + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + //printf ("getting text\n"); + zreadtext(key->Testament(), start, (size + 2), entrybuf); + //printf ("got text\n"); + + rawFilter(entrybuf, size, key); + + //printf ("preparing text\n"); + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + //printf ("returning text\n"); + return entrybuf; + +} + + +bool zText::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + + +void zText::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, key)) { + flushCache(); + } + delete lastWriteKey; + } + + settext(key->Testament(), key->Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key->clone(); // must delete + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void zText::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) { + } + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * zFiles::deleteEntry - deletes this entry + * + */ + +void zText::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * zText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + */ + +void zText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} diff --git a/src/utilfuns/Greek2Greek.cpp b/src/utilfuns/Greek2Greek.cpp new file mode 100644 index 0000000..7e81f0f --- /dev/null +++ b/src/utilfuns/Greek2Greek.cpp @@ -0,0 +1,901 @@ +//***************************************************************************** +// Author : William Dicks *** +// Date Created: 10 February 1998 *** +// Purpose : Implementation for Greek to b-Greek conversion and vice *** +// : versa. *** +// File Name : Greek2Greek.cpp *** +// *** +// Author info : ---------------------------------------------------------- *** +// Address : 23 Tieroogpark *** +// : Hoewe Str *** +// : Elarduspark X3 *** +// : 0181 *** +// : South Africa *** +// Home Tel: +27 (0)12 345 3166 *** +// Cell No : +27 (0)82 577 4424 *** +// e-mail : wd@isis.co.za *** +// Church WWW : http://www.hatfield.co.za *** +// *** +// Bugfix info : ---------------------------------------------------------- *** +// Bug #1 : Greek Font character 197 converted to b-Greek "6" *** +// Date Fixed : 23 February 1998 *** +//***************************************************************************** + +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +#include "Greek2Greek.h" +#include "GreekChars.h" + +//***************************************************************************** +// Used to convert a string created by using the Greek font supplied with the +// Sword Project to a string that conforms to the b-Greek discussion list +// method of transliteration. +//***************************************************************************** + +unsigned char Greek2bGreek( + unsigned char *sResult, + unsigned char *sGreekText, + int nMaxResultBuflen) +{ + char error; + + unsigned int NoOfChars = ParseGreek(sResult, sGreekText, nMaxResultBuflen); + + if (NoOfChars < strlen((char *)sGreekText)) + error = 1; + else + error = 0; + + return error; +} + +//***************************************************************************** +// Used to convert a string created by using the b-Greek method of +// transliteration to a string that can be converted to a Greek-font readable +// string. +//***************************************************************************** + +unsigned char bGreek2Greek( + unsigned char *sResult, + unsigned char *sGreekText, + int nMaxResultBuflen) +{ + unsigned char error; + + unsigned int NoOfChars = ParsebGreek(sResult, sGreekText, nMaxResultBuflen); + + if (NoOfChars < strlen((char *)sGreekText)) + error = 1; + else + error = 0; + + return error; +} + +//***************************************************************************** +// Parse a Greek font created string and return the b-Greek equivalent +//***************************************************************************** + +int ParseGreek( + unsigned char *sResult, + unsigned char *sGreekText, + int nMaxResultBuflen) +{ + int characters = 0; + int index = 0; + unsigned char tmp; + bool iota; // true = IOTA subscript; false = No IOTA + bool breathing; // true = add breathing; false = no breathing + bool rough; // true = rough breathing; false = smooth + + // While text is not equal to NULL pointer + + while (sGreekText[index] && characters < nMaxResultBuflen) + { + iota = breathing = rough = false; + tmp = Font2char(sGreekText[index++], iota, breathing, rough); + + if (breathing) + { + if (rough) // Rough breathing + { + sResult[characters++] = ROUGH; // Add rough breathing "h" + sResult[characters++] = tmp; // Insert char + } + else + sResult[characters++] = tmp; // Insert char + } + else + { + if (iota) // IOTA subscript + { + sResult[characters++] = tmp; // Insert char + sResult[characters++] = IOTA_SUB; // Add Iota subscript + } + else + sResult[characters++] = tmp; // Insert char + } + } + sResult[characters] = 0; // Terminate the string + + return index; +} + +//***************************************************************************** +// Parse a b-Greek string and return the Greek font equivalent +//***************************************************************************** +int ParsebGreek( + unsigned char *sResult, + unsigned char *sGreekText, + int nMaxResultBuflen) +{ + int characters = 0; + int index = 0; + bool iota = false; // true = IOTA subscript; false = No IOTA + bool breathing = false; // true = add breathing; false = no breathing + bool rough = false; // true = rough breathing; false = smooth + bool fSigma = false; // Final sigma flag + bool nChar = true; // New char flag + + // While text is not equal to NULL pointer + + while (*sGreekText || characters < nMaxResultBuflen) + { + if (nChar) + { + if (*sGreekText == (unsigned char)ROUGH) + { + rough = true; + breathing = true; + } + else + { + rough = false; + breathing = true; + } + + nChar = false; + } + else if (isPunctSpace(*(sGreekText + 1))) + { + fSigma = true; + } + else if (*(sGreekText + 1) == (unsigned char)IOTA_SUB) + { + iota = true; + } + + if (*sGreekText != (unsigned char)IOTA_SUB) + { + if (*sGreekText == (unsigned char)' ') + { + nChar = true; + } + + if (breathing) + { + if (rough) + { + // When we read a rough breather we want to increment the pointer + // to the right character before char2Font is called. + + sResult[index++] = + char2Font(*++sGreekText, fSigma, iota, breathing, rough); + + sGreekText++; + characters++; + } + else + { + sResult[index++] = + char2Font(*sGreekText++, fSigma, iota, breathing, rough); + characters++; + } + } + else + { + sResult[index++] = + char2Font(*sGreekText++, fSigma, iota, breathing, rough); + characters++; + } + } + else + { + sGreekText++; + characters++; + } + + fSigma = iota = breathing = rough = false; + } + + sResult[index] = 0; // Terminate the string + + return characters; +} + + +//***************************************************************************** +// Convert a character to a GREEK font character +//***************************************************************************** +unsigned char char2Font( + unsigned char letter, // bGreek letter to convert to Font letter + bool finalSigma, // Is it a final SIGMA + bool iota, // true = IOTA subscript; false = No IOTA + bool breathing, // true = add breathing; false = no breathing + bool rough) // true = rough breathing; false = smooth +{ + unsigned char charFont = 0; + + switch (letter) + { + case ALPHA: // A + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_ALPHA; + } + else + charFont = (unsigned char)gNON_ROUGH_ALPHA; + } + else + { + if (iota) + { + charFont = (unsigned char)gIOTA_ALPHA; + } + else + charFont = (unsigned char)gALPHA; + } + + break; + + case BETA: // B + charFont = (unsigned char)gBETA; + + break; + + case CHI: // C + charFont = (unsigned char)gCHI; + + break; + + case DELTA: // D + charFont = (unsigned char)gDELTA; + + break; + + case EPSILON: // E + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_EPSILON; + } + else + charFont = (unsigned char)gNON_ROUGH_EPSILON; + } + else + { + charFont = (unsigned char)gEPSILON; + } + + break; + + case PHI: // F + charFont = (unsigned char)gPHI; + + break; + + case GAMMA: // G + charFont = (unsigned char)gGAMMA; + + break; + + case ETA: // H + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_ETA; + } + else + charFont = (unsigned char)gNON_ROUGH_ETA; + } + else + { + if (iota) + { + charFont = (unsigned char)gIOTA_ETA; + } + else + charFont = (unsigned char)gETA; + } + + break; + + case IOTA: // I + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_IOTA; + } + else + charFont = (unsigned char)gNON_ROUGH_IOTA; + } + else + { + charFont = (unsigned char)gIOTA; + } + + break; + + case KAPPA: // K + charFont = (unsigned char)gKAPPA; + + break; + + case LAMBDA: // L + charFont = (unsigned char)gLAMBDA; + + break; + + case MU: // M + charFont = (unsigned char)gMU; + + break; + + case NU: // N + charFont = (unsigned char)gNU; + + break; + + case OMICRON: // O + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_OMICRON; + } + else + charFont = (unsigned char)gNON_ROUGH_OMICRON; + } + else + { + charFont = (unsigned char)gOMICRON; + } + + break; + + case PI: // P + charFont = (unsigned char)gPI; + + break; + + case THETA: // Q + charFont = (unsigned char)gTHETA; + + break; + + case RHO: // R + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_RHO; + } + else + charFont = (unsigned char)gNON_ROUGH_RHO; + } + else + { + charFont = (unsigned char)gRHO; + } + + break; + + case SIGMA: // S + if (finalSigma) + charFont = (unsigned char)gSIGMA_END; + else + charFont = (unsigned char)gSIGMA; + + break; + + case TAU: // T + charFont = (unsigned char)gTAU; + + break; + + case UPSILON: // U + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_UPSILON; + } + else + charFont = (unsigned char)gNON_ROUGH_UPSILON; + } + else + { + charFont = (unsigned char)gUPSILON; + } + + break; + + case OMEGA: // W + if (breathing) + { + if (rough) + { + charFont = (unsigned char)gROUGH_OMEGA; + } + else + charFont = (unsigned char)gNON_ROUGH_OMEGA; + } + else + { + if (iota) + { + charFont = (unsigned char)gIOTA_OMEGA; + } + else + charFont = (unsigned char)gOMEGA; + } + + break; + + case XI: // X + charFont = (unsigned char)gXI; + + break; + + case PSI: // Y + charFont = (unsigned char)gPSI; + + break; + + case ZETA: // Z + charFont = (unsigned char)gZETA; + + break; + + default: + if (ispunct(letter) || isspace(letter)) + { + charFont = getGreekPunct(letter); + } + + if (isdigit(letter)) + charFont = letter; + + break; + } + + return charFont; +} + + +//***************************************************************************** +// Convert a GREEK font character to a character +//***************************************************************************** +unsigned char Font2char( + unsigned char letter, // bGreek letter to convert to Font letter + bool &iota, // true = IOTA subscript; false = No IOTA + bool &breathing, // true = add breathing; false = no breathing + bool &rough) // true = rough breathing; false = smooth +{ + unsigned char character = 0; + + if (getSpecialChar(letter, letter)) + { + switch (letter) + { + case gROUGH_ALPHA: // hA + case gIOTA_ALPHA: // Ai + case gNON_ROUGH_ALPHA: // hA + character = ALPHA; + + if (letter == gIOTA_ALPHA) + iota = true; + else + iota = false; + + if (letter == gROUGH_ALPHA) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_EPSILON: // hE + case gNON_ROUGH_EPSILON: // hE + character = EPSILON; + iota = false; + + if (letter == gROUGH_EPSILON) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_ETA: // hH + case gIOTA_ETA: // Ei + case gNON_ROUGH_ETA: // hH + character = ETA; + + if (letter == gIOTA_ETA) + iota = true; + else + iota = false; + + if (letter == gROUGH_ETA) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_IOTA: // hH + case gNON_ROUGH_IOTA: // hH + character = IOTA; + iota = false; + + if (letter == gROUGH_IOTA) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_OMICRON: // hH + case gNON_ROUGH_OMICRON: // hH + character = OMICRON; + iota = false; + + if (letter == gROUGH_OMICRON) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_RHO: // hR + case gNON_ROUGH_RHO: // hR + character = RHO; + iota = false; + + if (letter == gROUGH_RHO) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_UPSILON: // hU + case gNON_ROUGH_UPSILON: // hU + character = UPSILON; + iota = false; + + if (letter == gROUGH_UPSILON) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + + case gROUGH_OMEGA: // hW + case gIOTA_OMEGA: // Wi + case gNON_ROUGH_OMEGA: // hW + character = OMEGA; + + if (letter == gIOTA_OMEGA) + iota = true; + else + iota = false; + + if (letter == gROUGH_OMEGA) + { + breathing = true; + rough = true; + } + else + { + breathing = false; + rough = false; + } + + break; + } + } // if (letter > SPECIAL_GREEK) + else + { + if (letter == gSIGMA_END) + { + character = SIGMA; + } + else if (ispunct(letter) || isspace(letter)) + { + character = getbGreekPunct(letter); + } + else if (isdigit(letter)) + { + character = letter; + } + else + { + character = letter - 32; + } + } + + return character; +} + +//***************************************************************************** +// Identify and return a bGreek letter from a special font char +//***************************************************************************** +bool getSpecialChar(unsigned char Font, unsigned char &letter) +{ + bool Yes = false; + letter = Font; + + if (Font >= 133 && Font <= 144) + { + letter = gIOTA; + Font = gIOTA; + } + + if (Font >= 154 && Font <= 159) + { + letter = gEPSILON; + Font = gEPSILON; + } + + if (Font >= 163 && Font <= 171) + { + letter = gALPHA; + Font = gALPHA; + } + + if (Font >= 172 && Font <= 182) + { + letter = gIOTA_ALPHA; + Font = gIOTA_ALPHA; + Yes = true; + } + + if (Font >= 187 && Font <= 195) + { + letter = gETA; + Font = gETA; + } + + if (Font >= 197 && Font <= 207) + { + letter = gIOTA_ETA; + Font = gIOTA_ETA; + Yes = true; + } + + if ((Font >= 210 && Font <= 215) || Font == 253) + { + letter = gOMICRON; + Font = gOMICRON; + } + + if (Font >= 218 && Font <= 229) + { + letter = gUPSILON; + Font = gUPSILON; + } + + if (Font >= 232 && Font <= 240) + { + letter = gOMEGA; + Font = gOMEGA; + } + + if (Font >= 241 && Font <= 251) + { + letter = gIOTA_OMEGA; + Font = gIOTA_OMEGA; + Yes = true; + } + + Yes = SpecialGreek(Font); + + return Yes; +} + + +//***************************************************************************** +// true if the font character is a special character; false it isn't +//***************************************************************************** + +bool SpecialGreek(unsigned char Font) +{ + bool res = false; + + switch (Font) + { + case gROUGH_ALPHA: + case gROUGH_EPSILON: + case gROUGH_ETA: + case gROUGH_IOTA: + case gROUGH_OMICRON: + case gROUGH_RHO: + case gROUGH_UPSILON: + case gROUGH_OMEGA: + case gIOTA_ALPHA: + case gIOTA_ETA: + case gIOTA_OMEGA: + case gNON_ROUGH_ALPHA: + case gNON_ROUGH_EPSILON: + case gNON_ROUGH_ETA: + case gNON_ROUGH_IOTA: + case gNON_ROUGH_OMICRON: + case gNON_ROUGH_RHO: + case gNON_ROUGH_UPSILON: + case gNON_ROUGH_OMEGA: + res = true; + + break; + } + + return res; +} + + +//***************************************************************************** +// Return Greek font puntuation from bGreek punstuation +//***************************************************************************** + +unsigned char getGreekPunct(unsigned char bGreek) +{ + unsigned char Font; + + switch (bGreek) + { + case COMMA: + Font = gCOMMA; + break; + + case STOP: + Font = gSTOP; + break; + + case SEMI_COLON: + Font = gSEMI_COLON; + break; + + case QUESTION: + Font = gQUESTION; + break; + + default: + Font = ' '; + break; + } + + return Font; +} + + +//***************************************************************************** +// Return bGreek puntuation from Greek font punstuation +//***************************************************************************** + +unsigned char getbGreekPunct(unsigned char Greek) +{ + unsigned char character; + + switch (Greek) + { + case gCOMMA: + character = COMMA; + break; + + case gSTOP: + character = STOP; + break; + + case gSEMI_COLON: + character = SEMI_COLON; + break; + + case gQUESTION: + character = QUESTION; + break; + + default: + character = ' '; + break; + } + + return character; +} + + +//***************************************************************************** +// Is the character punctuation or a space: true it is, false it isn't +//***************************************************************************** + +bool isPunctSpace(unsigned char c) +{ + return (ispunct(c) || isspace(c) || c == 0) ? true : false; +} + +#ifdef __TEST + +int main() +{ + unsigned char *sGreekText = (unsigned char *) + "1„£kwboj qeoà kaˆ kur…ou „hsoà cristoà doàloj ta‹j dèdeka fula‹j ta‹j ™n tÍ diaspor´ ca…rein."; + unsigned char *sResult = new unsigned char[100]; + + char result = Greek2bGreek( + sResult, + sGreekText, + 100); + + strset((char *)sResult, 0); + strset((char *)sGreekText, 0); + + sGreekText = (unsigned char *)"18 EIS AFESIN TWN hAMARTWN hUMWN?"; + result = bGreek2Greek( + sResult, + sGreekText, + 33); + + //delete[] sGreekText; + delete[] sResult; +} + +#endif // __TEST diff --git a/src/utilfuns/Makefile.am b/src/utilfuns/Makefile.am new file mode 100644 index 0000000..e7b2258 --- /dev/null +++ b/src/utilfuns/Makefile.am @@ -0,0 +1,19 @@ + +utilfunsdir = $(top_srcdir)/src/utilfuns +libsword_la_SOURCES += $(utilfunsdir)/Greek2Greek.cpp +libsword_la_SOURCES += $(utilfunsdir)/utilstr.cpp +libsword_la_SOURCES += $(utilfunsdir)/unixstr.cpp +libsword_la_SOURCES += $(utilfunsdir)/swunicod.cpp +libsword_la_SOURCES += $(utilfunsdir)/swversion.cpp + +if MINGW +SWREGEX = $(utilfunsdir)/regex.c +else +SWREGEX = +endif +libsword_la_SOURCES += $(SWREGEX) + + +libsword_la_SOURCES += $(utilfunsdir)/roman.c + + diff --git a/src/utilfuns/roman.c b/src/utilfuns/roman.c new file mode 100644 index 0000000..3c6d190 --- /dev/null +++ b/src/utilfuns/roman.c @@ -0,0 +1,82 @@ +/* + * roman.c + * Copyright 2001 by CrossWire Bible Society + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <roman.h> +char isroman (const char* str) { + char * ch = (char*)str; + for (; *ch; ch++) + if (!strchr ("IVXLCDMivxlcdm ", *ch)) + return 0; + return 1; +} + +int from_rom(const char* str) { + int i, n = strlen(str); + short * num= calloc(n, sizeof(short)); + for (i = 0; str[i]; i++) { + switch(str[i]) { + case 'i': + case 'I': + num[i] = 1; + break; + case 'v': + case 'V': + num[i] = 5; + break; + case 'x': + case 'X': + num[i] = 10; + break; + case 'l': + case 'L': + num[i] = 50; + break; + case 'c': + case 'C': + num[i] = 100; + break; + case 'd': + case 'D': + num[i] = 500; + break; + case 'm': + case 'M': + num[i] = 1000; + break; + default: + num[i] = 0; + } + } + for (i = 1; str[i]; i++) { + if (num[i] > num[i-1]) { + num[i] -= num[i-1]; + num[i-1] = 0; + } + } + n = 0; + for (i = 0; str[i]; i++) { + n += num[i]; + } + free(num); + return n; +} diff --git a/src/utilfuns/swunicod.cpp b/src/utilfuns/swunicod.cpp new file mode 100644 index 0000000..f42fd86 --- /dev/null +++ b/src/utilfuns/swunicod.cpp @@ -0,0 +1,139 @@ +/* + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include "swunicod.h" +unsigned char* UTF32to8 (unsigned long utf32, unsigned char * utf8) { + unsigned int i; + for (i = 0; i < 6; i++) utf8[i] = 0; + + if (utf32 < 0x80) { + utf8[0] = (char)utf32; + } + else if (utf32 < 0x800) { + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x1f; + utf8[0] = 0xc0 | i; + } + else if (utf32 < 0x10000) { + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x0f; + utf8[0] = 0xe0 | i; + } + else if (utf32 < 0x200000) { + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x07; + utf8[0] = 0xf0 | i; + } + else if (utf32 < 0x4000000) { + i = utf32 & 0x3f; + utf8[4] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x03; + utf8[0] = 0xf8 | i; + } + else if (utf32 < 0x80000000) { + i = utf32 & 0x3f; + utf8[5] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[4] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x01; + utf8[0] = 0xfc | i; + } + return utf8; +} + +/** Converts a UTF-8 encoded 1-6 byte array into a 32-bit unsigned integer UTF-32 value + * @param utf8 pointer to an array of 6 unsigned chars containing the UTF-8 value, starting in the utf8[0] + * @param utf32 the UTF-32 Unicode code point value + */ +unsigned long UTF8to32 (unsigned char * utf8) { + + unsigned char i = utf8[0]; + unsigned char count; + unsigned long utf32 = 0; + + for (count = 0; i & 0x80; count++) i <<= 1; + if (!count) { + return utf8[0]; + } + else if (count == 1) { + return 0xffff; + } + else { + count--; + utf32 = i >> count; + for (i = 1; i <= count; i++) { + if (0xc0 & utf8[i] != 0x80) { + return 0xffff; + } + utf32 <<= 6; + utf32 |= (utf8[i] & 0x3f); + } + } + return utf32; +} diff --git a/src/utilfuns/swversion.cpp b/src/utilfuns/swversion.cpp new file mode 100644 index 0000000..48c85ff --- /dev/null +++ b/src/utilfuns/swversion.cpp @@ -0,0 +1,78 @@ +#include <swversion.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + + +SWVersion SWVersion::currentVersion(SWORDVER); + +/****************************************************************************** + * SWVersion c-tor - Constructs a new SWVersion + * + * ENT: version - const version string + */ + +SWVersion::SWVersion(const char *version) { + char *buf = new char[ strlen(version) + 1 ]; + char *tok; + major = minor = minor2 = minor3 = -1; + + strcpy(buf, version); + tok = strtok(buf, "."); + if (tok) + major = atoi(tok); + tok = strtok(0, "."); + if (tok) + minor = atoi(tok); + tok = strtok(0, "."); + if (tok) + minor2 = atoi(tok); + tok = strtok(0, "."); + if (tok) + minor3 = atoi(tok); + delete [] buf; +} + + +/****************************************************************************** + * compare - compares this version to another version + * + * ENT: vi - other version with which to compare + * + * RET: = 0 if equal; + * < 0 if this version is less than other version; + * > 0 if this version is greater than other version + */ + +int SWVersion::compare(const SWVersion &vi) const { + if (major == vi.major) + if (minor == vi.minor) + if (minor2 == vi.minor2) + if (minor3 == vi.minor3) + return 0; + else return minor3 - vi.minor3; + else return minor2 - vi.minor2; + else return minor - vi.minor; + else return major - vi.major; +} + + +const char *SWVersion::getText() const { + + // 255 is safe because there is no way 4 integers (plus 3 '.'s) can have + // a string representation that will overrun this buffer + static char buf[255]; + + if (minor > -1) { + if (minor2 > -1) { + if (minor3 > -1) { + sprintf(buf, "%d.%d.%d.%d", major, minor, minor2, minor3); + } + else sprintf(buf, "%d.%d.%d", major, minor, minor2); + } + else sprintf(buf, "%d.%d", major, minor); + } + else sprintf(buf, "%d", major); + + return buf; +} diff --git a/src/utilfuns/unixstr.cpp b/src/utilfuns/unixstr.cpp new file mode 100644 index 0000000..7a975a2 --- /dev/null +++ b/src/utilfuns/unixstr.cpp @@ -0,0 +1,7 @@ +// Include only if your UNIX compiler does not include stricmp but does include strcasecmp + +#include <unixstr.h> + +int stricmp(const char *s1, const char *s2) { + return strcasecmp(s1, s2); +} diff --git a/src/utilfuns/utilconf.cpp b/src/utilfuns/utilconf.cpp new file mode 100644 index 0000000..5a32ca0 --- /dev/null +++ b/src/utilfuns/utilconf.cpp @@ -0,0 +1,19 @@ +#include <string.h> +#include <utilstr.h> + + +/****************************************************************************** + * getconfent - Get the value of an entry in a configuration file + * + * ENT: filename - File name in which to look for entry + * entryname - Entry of which to obtain value + * buf - Buffer to store entry value + * len - Maximum length to write into buffer + * + * RET: error status + */ + +char getconfent(char *filename, char *entryname, char *buf, int len) +{ + return 0; +} diff --git a/src/utilfuns/utilstr.cpp b/src/utilfuns/utilstr.cpp new file mode 100644 index 0000000..7363240 --- /dev/null +++ b/src/utilfuns/utilstr.cpp @@ -0,0 +1,198 @@ +#include <string.h> +#include <utilstr.h> +#include <ctype.h> + +#ifdef _ICU_ +#include <unicode/utypes.h> +#include <unicode/ucnv.h> +#include <unicode/ustring.h> +#include <unicode/uchar.h> + +#include <unicode/unistr.h> +#include <unicode/translit.h> + +#endif + +/****************************************************************************** + * stdstr - Sets/gets a string + * + * ENT: ipstr - pointer to a string pointer to set if necessary + * istr - string to set to *ipstr + * 0 - only get + * + * RET: *ipstr + */ + +char *stdstr(char **ipstr, const char *istr) { + if (istr) { + if (*ipstr) + delete [] *ipstr; + int len = strlen(istr) + 1; + *ipstr = new char [ len*2 ]; // *2 buffer for unicode manipulations + memcpy(*ipstr, istr, len); + } + return *ipstr; +} + + +/****************************************************************************** + * strstrip - Removes leading and trailing spaces from a string + * + * ENT: istr - string pointer to strip + * + * RET: *istr + */ + +char *strstrip(char *istr) { + char *tmp = istr; + char *rtmp; + + int len = strlen(istr); + if (len < 1) + return istr; + rtmp = istr + (len - 1); + + while ((rtmp > istr)&&((*rtmp == ' ')||(*rtmp == '\t')||(*rtmp == 10)||(*rtmp == 13))) *(rtmp--) = 0; + while ((*tmp == ' ')||(*tmp == '\t')||(*tmp == 10)||(*tmp == 13)) tmp++; + memmove(istr, tmp, (rtmp - tmp) + 1); + istr[(rtmp - tmp) + 1] = 0; + + return istr; +} + + +/****************************************************************************** + * stristr - Scans a string for the occurrence of a given substring, no case + * + * ENT: scans s1 for the first occurrence of the substring s2, ingnoring case + * + * RET: a pointer to the element in s1, where s2 begins (points to s2 in s1). + * If s2 does not occur in s1, returns null. + */ + +const char *stristr(const char *s1, const char *s2) { + int tLen = strlen(s2); + int cLen = strlen(s1); + char *target = new char [ tLen + 1 ]; + int i, j; + const char *retVal = 0; + + strcpy(target, s2); + for (i = 0; i < tLen; i++) + target[i] = SW_toupper(target[i]); + + for (i = 0; i < (cLen - tLen)+1; i++) { + if (SW_toupper(s1[i]) == (unsigned char)*target) { + for (j = 1; j < tLen; j++) { + if (SW_toupper(s1[i+j]) != (unsigned char)target[j]) + break; + } + if (j == tLen) { + retVal = s1+i; + break; + } + } + } + delete [] target; + return retVal; +} + +/****************************************************************************** + * strnicmp - compares the first n bytes of 2 string ignoring case + * + * ENT: compares s1 to s2 comparing the first n byte ingnoring case + * + * RET: same as strcmp + */ + +const char strnicmp(const char *s1, const char *s2, int len) { + int tLen = strlen(s2); + int cLen = strlen(s1); + char diff; + int i; + for (i = 0; ((i < len) && (i < tLen) && (i < cLen)); i++) { + if ((diff = SW_toupper(*s1) - SW_toupper(*s2))) + return diff; + s1++; + s2++; + } + return (i < len) ? cLen - tLen : 0; +} + +/****************************************************************************** + * strlenw - Scans a string for trailing 0x0000 and return size in BYTES + * + * ENT: target - string for which to determine size + * + * RET: length in BYTES + * If s2 does not occur in s1, returns null. + */ + +unsigned int strlenw(const char *s1) { + return strlen(s1); +// utf8 says no null in string except terminator, so below code is overkill +/* + const char *ch = s1; + if (!*ch) + ch++; + while (*ch) { + ch++; + if (!*ch) + ch++; + } + return (unsigned int)(ch - s1) - 1; +*/ +} + + +/****************************************************************************** + * toupperstr - converts a string to uppercase string + * + * ENT: target - string to convert + * + * RET: target + */ + +char *toupperstr(char *buf) { + char *ret = buf; + + while (*buf) + *buf = SW_toupper(*buf++); + + return ret; +} + + +/****************************************************************************** + * toupperstr - converts a string to uppercase string + * + * ENT: target - string to convert + * + * RET: target + */ + +char *toupperstr_utf8(char *buf) { + char *ret = buf; + +#ifndef _ICU_ + // try to decide if it's worth trying to toupper. Do we have more + // characters that are probably lower latin than not? + long performOp = 0; + for (const char *ch = buf; *ch; ch++) + performOp += (*ch > 0) ? 1 : -1; + + if (performOp) { + while (*buf) + *buf = SW_toupper(*buf++); + } +#else + UErrorCode err = U_ZERO_ERROR; + UConverter *conv = ucnv_open("UTF-8", &err); + UnicodeString str(buf, -1, conv, err); + UnicodeString ustr = str.toUpper(); + ustr.extract(ret, strlen(ret)*2, conv, err); + ucnv_close(conv); +#endif + + return ret; +} diff --git a/src/utilfuns/zlib/adler32.c b/src/utilfuns/zlib/adler32.c new file mode 100644 index 0000000..14e3abd --- /dev/null +++ b/src/utilfuns/zlib/adler32.c @@ -0,0 +1,48 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: adler32.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zlib.h" + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == Z_NULL) return 1L; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} diff --git a/src/utilfuns/zlib/compress.c b/src/utilfuns/zlib/compress.c new file mode 100644 index 0000000..df5fca8 --- /dev/null +++ b/src/utilfuns/zlib/compress.c @@ -0,0 +1,68 @@ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: compress.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zlib.h" + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; + int level; +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; +#endif + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = deflateEnd(&stream); + return err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} diff --git a/src/utilfuns/zlib/crc32.c b/src/utilfuns/zlib/crc32.c new file mode 100644 index 0000000..fe80e8a --- /dev/null +++ b/src/utilfuns/zlib/crc32.c @@ -0,0 +1,162 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: crc32.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zlib.h" + +#define local static + +#ifdef DYNAMIC_CRC_TABLE + +local int crc_table_empty = 1; +local uLongf crc_table[256]; +local void make_crc_table OF((void)); + +/* + Generate a table for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The table is simply the CRC of all possible eight bit values. This is all + the information needed to generate CRC's on data a byte at a time for all + combinations of CRC register values and incoming bytes. +*/ +local void make_crc_table() +{ + uLong c; + int n, k; + uLong poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static const Byte p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* make exclusive-or pattern from polynomial (0xedb88320L) */ + poly = 0L; + for (n = 0; n < sizeof(p)/sizeof(Byte); n++) + poly |= 1L << (31 - p[n]); + + for (n = 0; n < 256; n++) + { + c = (uLong)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[n] = c; + } + crc_table_empty = 0; +} +#else +/* ======================================================================== + * Table of CRC-32's of all single-byte values (made by make_crc_table) + */ +local const uLongf crc_table[256] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; +#endif + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const uLongf * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) make_crc_table(); +#endif + return (const uLongf *)crc_table; +} + +/* ========================================================================= */ +#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8); +#define DO2(buf) DO1(buf); DO1(buf); +#define DO4(buf) DO2(buf); DO2(buf); +#define DO8(buf) DO4(buf); DO4(buf); + +/* ========================================================================= */ +uLong ZEXPORT crc32(crc, buf, len) + uLong crc; + const Bytef *buf; + uInt len; +{ + if (buf == Z_NULL) return 0L; +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif + crc = crc ^ 0xffffffffL; + while (len >= 8) + { + DO8(buf); + len -= 8; + } + if (len) do { + DO1(buf); + } while (--len); + return crc ^ 0xffffffffL; +} diff --git a/src/utilfuns/zlib/deflate.c b/src/utilfuns/zlib/deflate.c new file mode 100644 index 0000000..a232eea --- /dev/null +++ b/src/utilfuns/zlib/deflate.c @@ -0,0 +1,1350 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in ftp://ds.internic.net/rfc/rfc1951.txt + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id: deflate.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.1.3 Copyright 1995-1998 Jean-loup Gailly "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +local block_state deflate_slow OF((deflate_state *s, int flush)); +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifdef ASMV + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif + +#ifdef DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */ + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int noheader = 0; + static const char* my_version = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == Z_NULL) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == Z_NULL) strm->zfree = zcfree; + + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#ifdef FASTEST + level = 1; +#endif + + if (windowBits < 0) { /* undocumented feature: suppress zlib header */ + noheader = 1; + windowBits = -windowBits; + } + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + + s->noheader = noheader; + s->w_bits = windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt length = dictLength; + uInt n; + IPos hash_head = 0; + + if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || + strm->state->status != INIT_STATE) return Z_STREAM_ERROR; + + s = strm->state; + strm->adler = adler32(strm->adler, dictionary, dictLength); + + if (length < MIN_MATCH) return Z_OK; + if (length > MAX_DIST(s)) { + length = MAX_DIST(s); +#ifndef USE_DICT_HEAD + dictionary += dictLength - length; /* use the tail of the dictionary */ +#endif + } + zmemcpy(s->window, dictionary, length); + s->strstart = length; + s->block_start = (long)length; + + /* Insert all strings in the hash table (except for the last two bytes). + * s->lookahead stays null, so s->ins_h will be recomputed at the next + * call of fill_window. + */ + s->ins_h = s->window[0]; + UPDATE_HASH(s, s->ins_h, s->window[1]); + for (n = 0; n <= length - MIN_MATCH; n++) { + INSERT_STRING(s, n, hash_head); + } + if (hash_head) hash_head = 0; /* to make compiler happy */ + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR; + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->noheader < 0) { + s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */ + } + s->status = s->noheader ? BUSY_STATE : INIT_STATE; + strm->adler = 1; + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + lm_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + int err = Z_OK; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + + if (level == Z_DEFAULT_COMPRESSION) { + level = 6; + } + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if (func != configuration_table[level].func && strm->total_in != 0) { + /* Flush the last buffer: */ + err = deflate(strm, Z_PARTIAL_FLUSH); + } + if (s->level != level) { + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return err; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output goes + * through this function so some applications may wish to modify it + * to avoid allocating a large strm->next_out buffer and copying into it. + * (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len = strm->state->pending; + + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, strm->state->pending_out, len); + strm->next_out += len; + strm->state->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + strm->state->pending -= len; + if (strm->state->pending == 0) { + strm->state->pending_out = strm->state->pending_buf; + } +} + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + flush > Z_FINISH || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + s->strm = strm; /* just in case */ + old_flush = s->last_flush; + s->last_flush = flush; + + /* Write the zlib header */ + if (s->status == INIT_STATE) { + + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags = (s->level-1) >> 1; + + if (level_flags > 3) level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + s->status = BUSY_STATE; + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = 1L; + } + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUFF_ERROR. + */ + } else if (strm->avail_in == 0 && flush <= old_flush && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + Assert(strm->avail_out > 0, "bug2"); + + if (flush != Z_FINISH) return Z_OK; + if (s->noheader) return Z_STREAM_END; + + /* Write the zlib trailer (adler32) */ + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + s->noheader = -1; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + + status = strm->state->status; + if (status != INIT_STATE && status != BUSY_STATE && + status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + *dest = *source; + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + *ds = *ss; + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local int read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + if (!strm->state->noheader) { + strm->adler = adler32(strm->adler, strm->next_in, len); + } + zmemcpy(buf, strm->next_in, len); + strm->next_in += len; + strm->total_in += len; + + return (int)len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +} + +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +#ifndef FASTEST +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2: + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} + +#else /* FASTEST */ +/* --------------------------------------------------------------------------- + * Optimized version for level == 1 only + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return len <= s->lookahead ? len : s->lookahead; +} +#endif /* FASTEST */ +#endif /* ASMV */ + +#ifdef DEBUG +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + register unsigned n, m; + register Posf *p; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if strstart == 0 + * and lookahead == 1 (input done one byte at time) + */ + more--; + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + } else if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + + /* Slide the hash table (could be avoided with 32 bit values + at the expense of memory usage). We slide even when level == 0 + to keep the hash table consistent if we switch back to level > 0 + later. (Using level 0 permanently is not an optimal usage of + zlib, so we don't care about this pathological case.) + */ + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + } while (--n); + + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif + more += wsize; + } + if (s->strm->avail_in == 0) return; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead >= MIN_MATCH) { + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, eof) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (eof)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, eof) { \ + FLUSH_BLOCK_ONLY(s, eof); \ + if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +} + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * This function does not insert new strings in the dictionary since + * uncompressible data is probably not useful. This function is used + * only for the level=0 compression option. + * NOTE: this function should be optimized to avoid extra copying from + * window to pending_buf. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Stored blocks are limited to 0xffff bytes, pending_buf is limited + * to pending_buf_size, and each stored block has a 5 byte header: + */ + ulg max_block_size = 0xffff; + ulg max_start; + + if (max_block_size > s->pending_buf_size - 5) { + max_block_size = s->pending_buf_size - 5; + } + + /* Copy as much as possible from input to output: */ + for (;;) { + /* Fill the window as much as possible: */ + if (s->lookahead <= 1) { + + Assert(s->strstart < s->w_size+MAX_DIST(s) || + s->block_start >= (long)s->w_size, "slide too late"); + + fill_window(s); + if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + + if (s->lookahead == 0) break; /* flush the current block */ + } + Assert(s->block_start >= 0L, "block gone"); + + s->strstart += s->lookahead; + s->lookahead = 0; + + /* Emit a stored block if pending_buf will be full: */ + max_start = s->block_start + max_block_size; + if (s->strstart == 0 || (ulg)s->strstart >= max_start) { + /* strstart == 0 is possible when wraparound on 16-bit machine */ + s->lookahead = (uInt)(s->strstart - max_start); + s->strstart = (uInt)max_start; + FLUSH_BLOCK(s, 0); + } + /* Flush if we may have to slide, otherwise block_start may become + * negative and the data will be gone: + */ + if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { + FLUSH_BLOCK(s, 0); + } + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY) { + s->match_length = longest_match (s, hash_head); + } + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in hash table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY) { + s->match_length = longest_match (s, hash_head); + } + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED || + (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR))) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} diff --git a/src/utilfuns/zlib/deflate.h b/src/utilfuns/zlib/deflate.h new file mode 100644 index 0000000..e55d52a --- /dev/null +++ b/src/utilfuns/zlib/deflate.h @@ -0,0 +1,318 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-1998 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id: deflate.h,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#ifndef _DEFLATE_H +#define _DEFLATE_H + +#include "zutil.h" + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define INIT_STATE 42 +#define BUSY_STATE 113 +#define FINISH_STATE 666 +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + int pending; /* nb of bytes in the pending buffer */ + int noheader; /* suppress zlib header and adler32 */ + Byte data_type; /* UNKNOWN, BINARY or ASCII */ + Byte method; /* STORED (for zip only) or DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to supress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + int last_eob_len; /* bit length of EOB code for last block */ + +#ifdef DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + + /* in trees.c */ +void _tr_init OF((deflate_state *s)); +int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); +void _tr_align OF((deflate_state *s)); +void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch _length_code[]; + extern uch _dist_code[]; +#else + extern const uch _length_code[]; + extern const uch _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (length); \ + ush dist = (distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif diff --git a/src/utilfuns/zlib/gzio.c b/src/utilfuns/zlib/gzio.c new file mode 100644 index 0000000..a2c5b58 --- /dev/null +++ b/src/utilfuns/zlib/gzio.c @@ -0,0 +1,875 @@ +/* gzio.c -- IO on .gz files + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Compile this file with -DNO_DEFLATE to avoid the compression code. + */ + +/* @(#) $Id: gzio.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include <stdio.h> + +#include "zutil.h" + +struct internal_state {int dummy;}; /* for buggy compilers */ + +#ifndef Z_BUFSIZE +# ifdef MAXSEG_64K +# define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */ +# else +# define Z_BUFSIZE 16384 +# endif +#endif +#ifndef Z_PRINTF_BUFSIZE +# define Z_PRINTF_BUFSIZE 4096 +#endif + +#define ALLOC(size) malloc(size) +#define TRYFREE(p) {if (p) free(p);} + +static int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +typedef struct gz_stream { + z_stream stream; + int z_err; /* error code for last stream operation */ + int z_eof; /* set if end of input file */ + FILE *file; /* .gz file */ + Byte *inbuf; /* input buffer */ + Byte *outbuf; /* output buffer */ + uLong crc; /* crc32 of uncompressed data */ + char *msg; /* error message */ + char *path; /* path name for debugging only */ + int transparent; /* 1 if input file is not a .gz file */ + char mode; /* 'w' or 'r' */ + long startpos; /* start of compressed data in file (header skipped) */ +} gz_stream; + + +local gzFile gz_open OF((const char *path, const char *mode, int fd)); +local int do_flush OF((gzFile file, int flush)); +local int get_byte OF((gz_stream *s)); +local void check_header OF((gz_stream *s)); +local int destroy OF((gz_stream *s)); +local void putLong OF((FILE *file, uLong x)); +local uLong getLong OF((gz_stream *s)); + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb"). The file is given either by file descriptor + or path name (if fd == -1). + gz_open return NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). +*/ +local gzFile gz_open (path, mode, fd) + const char *path; + const char *mode; + int fd; +{ + int err; + int level = Z_DEFAULT_COMPRESSION; /* compression level */ + int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */ + char *p = (char*)mode; + gz_stream *s; + char fmode[80]; /* copy of mode, without the compression level */ + char *m = fmode; + + if (!path || !mode) return Z_NULL; + + s = (gz_stream *)ALLOC(sizeof(gz_stream)); + if (!s) return Z_NULL; + + s->stream.zalloc = (alloc_func)0; + s->stream.zfree = (free_func)0; + s->stream.opaque = (voidpf)0; + s->stream.next_in = s->inbuf = Z_NULL; + s->stream.next_out = s->outbuf = Z_NULL; + s->stream.avail_in = s->stream.avail_out = 0; + s->file = NULL; + s->z_err = Z_OK; + s->z_eof = 0; + s->crc = crc32(0L, Z_NULL, 0); + s->msg = NULL; + s->transparent = 0; + + s->path = (char*)ALLOC(strlen(path)+1); + if (s->path == NULL) { + return destroy(s), (gzFile)Z_NULL; + } + strcpy(s->path, path); /* do this early for debugging */ + + s->mode = '\0'; + do { + if (*p == 'r') s->mode = 'r'; + if (*p == 'w' || *p == 'a') s->mode = 'w'; + if (*p >= '0' && *p <= '9') { + level = *p - '0'; + } else if (*p == 'f') { + strategy = Z_FILTERED; + } else if (*p == 'h') { + strategy = Z_HUFFMAN_ONLY; + } else { + *m++ = *p; /* copy the mode */ + } + } while (*p++ && m != fmode + sizeof(fmode)); + if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL; + + if (s->mode == 'w') { +#ifdef NO_DEFLATE + err = Z_STREAM_ERROR; +#else + err = deflateInit2(&(s->stream), level, + Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy); + /* windowBits is passed < 0 to suppress zlib header */ + + s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); +#endif + if (err != Z_OK || s->outbuf == Z_NULL) { + return destroy(s), (gzFile)Z_NULL; + } + } else { + s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); + + err = inflateInit2(&(s->stream), -MAX_WBITS); + /* windowBits is passed < 0 to tell that there is no zlib header. + * Note that in this case inflate *requires* an extra "dummy" byte + * after the compressed stream in order to complete decompression and + * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are + * present after the compressed stream. + */ + if (err != Z_OK || s->inbuf == Z_NULL) { + return destroy(s), (gzFile)Z_NULL; + } + } + s->stream.avail_out = Z_BUFSIZE; + + errno = 0; + s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode); + + if (s->file == NULL) { + return destroy(s), (gzFile)Z_NULL; + } + if (s->mode == 'w') { + /* Write a very simple .gz header: + */ + fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1], + Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE); + s->startpos = 10L; + /* We use 10L instead of ftell(s->file) to because ftell causes an + * fflush on some systems. This version of the library doesn't use + * startpos anyway in write mode, so this initialization is not + * necessary. + */ + } else { + check_header(s); /* skip the .gz header */ + s->startpos = (ftell(s->file) - s->stream.avail_in); + } + + return (gzFile)s; +} + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. +*/ +gzFile ZEXPORT gzopen (path, mode) + const char *path; + const char *mode; +{ + return gz_open (path, mode, -1); +} + +/* =========================================================================== + Associate a gzFile with the file descriptor fd. fd is not dup'ed here + to mimic the behavio(u)r of fdopen. +*/ +gzFile ZEXPORT gzdopen (fd, mode) + int fd; + const char *mode; +{ + char name[20]; + + if (fd < 0) return (gzFile)Z_NULL; + sprintf(name, "<fd:%d>", fd); /* for debugging */ + + return gz_open (name, mode, fd); +} + +/* =========================================================================== + * Update the compression level and strategy + */ +int ZEXPORT gzsetparams (file, level, strategy) + gzFile file; + int level; + int strategy; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + /* Make room to allow flushing */ + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + } + s->stream.avail_out = Z_BUFSIZE; + } + + return deflateParams (&(s->stream), level, strategy); +} + +/* =========================================================================== + Read a byte from a gz_stream; update next_in and avail_in. Return EOF + for end of file. + IN assertion: the stream s has been sucessfully opened for reading. +*/ +local int get_byte(s) + gz_stream *s; +{ + if (s->z_eof) return EOF; + if (s->stream.avail_in == 0) { + errno = 0; + s->stream.avail_in = fread(s->inbuf, 1, Z_BUFSIZE, s->file); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (ferror(s->file)) s->z_err = Z_ERRNO; + return EOF; + } + s->stream.next_in = s->inbuf; + } + s->stream.avail_in--; + return *(s->stream.next_in)++; +} + +/* =========================================================================== + Check the gzip header of a gz_stream opened for reading. Set the stream + mode to transparent if the gzip magic header is not present; set s->err + to Z_DATA_ERROR if the magic header is present but the rest of the header + is incorrect. + IN assertion: the stream s has already been created sucessfully; + s->stream.avail_in is zero for the first time, but may be non-zero + for concatenated .gz files. +*/ +local void check_header(s) + gz_stream *s; +{ + int method; /* method byte */ + int flags; /* flags byte */ + uInt len; + int c; + + /* Check the gzip magic header */ + for (len = 0; len < 2; len++) { + c = get_byte(s); + if (c != gz_magic[len]) { + if (len != 0) s->stream.avail_in++, s->stream.next_in--; + if (c != EOF) { + s->stream.avail_in++, s->stream.next_in--; + s->transparent = 1; + } + s->z_err = s->stream.avail_in != 0 ? Z_OK : Z_STREAM_END; + return; + } + } + method = get_byte(s); + flags = get_byte(s); + if (method != Z_DEFLATED || (flags & RESERVED) != 0) { + s->z_err = Z_DATA_ERROR; + return; + } + + /* Discard time, xflags and OS code: */ + for (len = 0; len < 6; len++) (void)get_byte(s); + + if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ + len = (uInt)get_byte(s); + len += ((uInt)get_byte(s))<<8; + /* len is garbage if EOF but the loop below will quit anyway */ + while (len-- != 0 && get_byte(s) != EOF) ; + } + if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ + for (len = 0; len < 2; len++) (void)get_byte(s); + } + s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK; +} + + /* =========================================================================== + * Cleanup then free the given gz_stream. Return a zlib error code. + Try freeing in the reverse order of allocations. + */ +local int destroy (s) + gz_stream *s; +{ + int err = Z_OK; + + if (!s) return Z_STREAM_ERROR; + + TRYFREE(s->msg); + + if (s->stream.state != NULL) { + if (s->mode == 'w') { +#ifdef NO_DEFLATE + err = Z_STREAM_ERROR; +#else + err = deflateEnd(&(s->stream)); +#endif + } else if (s->mode == 'r') { + err = inflateEnd(&(s->stream)); + } + } + if (s->file != NULL && fclose(s->file)) { +#ifdef ESPIPE + if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */ +#endif + err = Z_ERRNO; + } + if (s->z_err < 0) err = s->z_err; + + TRYFREE(s->inbuf); + TRYFREE(s->outbuf); + TRYFREE(s->path); + TRYFREE(s); + return err; +} + +/* =========================================================================== + Reads the given number of uncompressed bytes from the compressed file. + gzread returns the number of bytes actually read (0 for end of file). +*/ +int ZEXPORT gzread (file, buf, len) + gzFile file; + voidp buf; + unsigned len; +{ + gz_stream *s = (gz_stream*)file; + Bytef *start = (Bytef*)buf; /* starting point for crc computation */ + Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */ + + if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR; + + if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1; + if (s->z_err == Z_STREAM_END) return 0; /* EOF */ + + next_out = (Byte*)buf; + s->stream.next_out = (Bytef*)buf; + s->stream.avail_out = len; + + while (s->stream.avail_out != 0) { + + if (s->transparent) { + /* Copy first the lookahead bytes: */ + uInt n = s->stream.avail_in; + if (n > s->stream.avail_out) n = s->stream.avail_out; + if (n > 0) { + zmemcpy(s->stream.next_out, s->stream.next_in, n); + next_out += n; + s->stream.next_out = next_out; + s->stream.next_in += n; + s->stream.avail_out -= n; + s->stream.avail_in -= n; + } + if (s->stream.avail_out > 0) { + s->stream.avail_out -= fread(next_out, 1, s->stream.avail_out, + s->file); + } + len -= s->stream.avail_out; + s->stream.total_in += (uLong)len; + s->stream.total_out += (uLong)len; + if (len == 0) s->z_eof = 1; + return (int)len; + } + if (s->stream.avail_in == 0 && !s->z_eof) { + + errno = 0; + s->stream.avail_in = fread(s->inbuf, 1, Z_BUFSIZE, s->file); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (ferror(s->file)) { + s->z_err = Z_ERRNO; + break; + } + } + s->stream.next_in = s->inbuf; + } + s->z_err = inflate(&(s->stream), Z_NO_FLUSH); + + if (s->z_err == Z_STREAM_END) { + /* Check CRC and original size */ + s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + start = s->stream.next_out; + + if (getLong(s) != s->crc) { + s->z_err = Z_DATA_ERROR; + } else { + (void)getLong(s); + /* The uncompressed length returned by above getlong() may + * be different from s->stream.total_out) in case of + * concatenated .gz files. Check for such files: + */ + check_header(s); + if (s->z_err == Z_OK) { + uLong total_in = s->stream.total_in; + uLong total_out = s->stream.total_out; + + inflateReset(&(s->stream)); + s->stream.total_in = total_in; + s->stream.total_out = total_out; + s->crc = crc32(0L, Z_NULL, 0); + } + } + } + if (s->z_err != Z_OK || s->z_eof) break; + } + s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + + return (int)(len - s->stream.avail_out); +} + + +/* =========================================================================== + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +*/ +int ZEXPORT gzgetc(file) + gzFile file; +{ + unsigned char c; + + return gzread(file, &c, 1) == 1 ? c : -1; +} + + +/* =========================================================================== + Reads bytes from the compressed file until len-1 characters are + read, or a newline character is read and transferred to buf, or an + end-of-file condition is encountered. The string is then terminated + with a null character. + gzgets returns buf, or Z_NULL in case of error. + + The current implementation is not optimized at all. +*/ +char * ZEXPORT gzgets(file, buf, len) + gzFile file; + char *buf; + int len; +{ + char *b = buf; + if (buf == Z_NULL || len <= 0) return Z_NULL; + + while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ; + *buf = '\0'; + return b == buf && len > 0 ? Z_NULL : b; +} + + +#ifndef NO_DEFLATE +/* =========================================================================== + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of bytes actually written (0 in case of error). +*/ +int ZEXPORT gzwrite (file, buf, len) + gzFile file; + const voidp buf; + unsigned len; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.next_in = (Bytef*)buf; + s->stream.avail_in = len; + + while (s->stream.avail_in != 0) { + + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + break; + } + s->stream.avail_out = Z_BUFSIZE; + } + s->z_err = deflate(&(s->stream), Z_NO_FLUSH); + if (s->z_err != Z_OK) break; + } + s->crc = crc32(s->crc, (const Bytef *)buf, len); + + return (int)(len - s->stream.avail_in); +} + +/* =========================================================================== + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). +*/ +#ifdef STDC +#include <stdarg.h> + +int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...) +{ + char buf[Z_PRINTF_BUFSIZE]; + va_list va; + int len; + + va_start(va, format); +#ifdef HAS_vsnprintf + (void)vsnprintf(buf, sizeof(buf), format, va); +#else + (void)vsprintf(buf, format, va); +#endif + va_end(va); + len = strlen(buf); /* some *sprintf don't return the nb of bytes written */ + if (len <= 0) return 0; + + return gzwrite(file, buf, (unsigned)len); +} +#else /* not ANSI C */ + +int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) + gzFile file; + const char *format; + int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; +{ + char buf[Z_PRINTF_BUFSIZE]; + int len; + +#ifdef HAS_snprintf + snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +#else + sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +#endif + len = strlen(buf); /* old sprintf doesn't return the nb of bytes written */ + if (len <= 0) return 0; + + return gzwrite(file, buf, len); +} +#endif + +/* =========================================================================== + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +*/ +int ZEXPORT gzputc(file, c) + gzFile file; + int c; +{ + unsigned char cc = (unsigned char) c; /* required for big endian systems */ + + return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1; +} + + +/* =========================================================================== + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + gzputs returns the number of characters written, or -1 in case of error. +*/ +int ZEXPORT gzputs(file, s) + gzFile file; + const char *s; +{ + return gzwrite(file, (char*)s, (unsigned)strlen(s)); +} + + +/* =========================================================================== + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. +*/ +local int do_flush (file, flush) + gzFile file; + int flush; +{ + uInt len; + int done = 0; + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.avail_in = 0; /* should be zero already anyway */ + + for (;;) { + len = Z_BUFSIZE - s->stream.avail_out; + + if (len != 0) { + if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) { + s->z_err = Z_ERRNO; + return Z_ERRNO; + } + s->stream.next_out = s->outbuf; + s->stream.avail_out = Z_BUFSIZE; + } + if (done) break; + s->z_err = deflate(&(s->stream), flush); + + /* Ignore the second of two consecutive flushes: */ + if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK; + + /* deflate has finished flushing only when it hasn't used up + * all the available space in the output buffer: + */ + done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END); + + if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break; + } + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} + +int ZEXPORT gzflush (file, flush) + gzFile file; + int flush; +{ + gz_stream *s = (gz_stream*)file; + int err = do_flush (file, flush); + + if (err) return err; + fflush(s->file); + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} +#endif /* NO_DEFLATE */ + +/* =========================================================================== + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error. + SEEK_END is not implemented, returns error. + In this version of the library, gzseek can be extremely slow. +*/ +z_off_t ZEXPORT gzseek (file, offset, whence) + gzFile file; + z_off_t offset; + int whence; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || whence == SEEK_END || + s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) { + return -1L; + } + + if (s->mode == 'w') { +#ifdef NO_DEFLATE + return -1L; +#else + if (whence == SEEK_SET) { + offset -= s->stream.total_in; + } + if (offset < 0) return -1L; + + /* At this point, offset is the number of zero bytes to write. */ + if (s->inbuf == Z_NULL) { + s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */ + zmemzero(s->inbuf, Z_BUFSIZE); + } + while (offset > 0) { + uInt size = Z_BUFSIZE; + if (offset < Z_BUFSIZE) size = (uInt)offset; + + size = gzwrite(file, s->inbuf, size); + if (size == 0) return -1L; + + offset -= size; + } + return (z_off_t)s->stream.total_in; +#endif + } + /* Rest of function is for reading only */ + + /* compute absolute position */ + if (whence == SEEK_CUR) { + offset += s->stream.total_out; + } + if (offset < 0) return -1L; + + if (s->transparent) { + /* map to fseek */ + s->stream.avail_in = 0; + s->stream.next_in = s->inbuf; + if (fseek(s->file, offset, SEEK_SET) < 0) return -1L; + + s->stream.total_in = s->stream.total_out = (uLong)offset; + return offset; + } + + /* For a negative seek, rewind and use positive seek */ + if ((uLong)offset >= s->stream.total_out) { + offset -= s->stream.total_out; + } else if (gzrewind(file) < 0) { + return -1L; + } + /* offset is now the number of bytes to skip. */ + + if (offset != 0 && s->outbuf == Z_NULL) { + s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); + } + while (offset > 0) { + int size = Z_BUFSIZE; + if (offset < Z_BUFSIZE) size = (int)offset; + + size = gzread(file, s->outbuf, (uInt)size); + if (size <= 0) return -1L; + offset -= size; + } + return (z_off_t)s->stream.total_out; +} + +/* =========================================================================== + Rewinds input file. +*/ +int ZEXPORT gzrewind (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'r') return -1; + + s->z_err = Z_OK; + s->z_eof = 0; + s->stream.avail_in = 0; + s->stream.next_in = s->inbuf; + s->crc = crc32(0L, Z_NULL, 0); + + if (s->startpos == 0) { /* not a compressed file */ + rewind(s->file); + return 0; + } + + (void) inflateReset(&s->stream); + return fseek(s->file, s->startpos, SEEK_SET); +} + +/* =========================================================================== + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. +*/ +z_off_t ZEXPORT gztell (file) + gzFile file; +{ + return gzseek(file, 0L, SEEK_CUR); +} + +/* =========================================================================== + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +*/ +int ZEXPORT gzeof (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + return (s == NULL || s->mode != 'r') ? 0 : s->z_eof; +} + +/* =========================================================================== + Outputs a long in LSB order to the given file +*/ +local void putLong (file, x) + FILE *file; + uLong x; +{ + int n; + for (n = 0; n < 4; n++) { + fputc((int)(x & 0xff), file); + x >>= 8; + } +} + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets z_err in case + of error. +*/ +local uLong getLong (s) + gz_stream *s; +{ + uLong x = (uLong)get_byte(s); + int c; + + x += ((uLong)get_byte(s))<<8; + x += ((uLong)get_byte(s))<<16; + c = get_byte(s); + if (c == EOF) s->z_err = Z_DATA_ERROR; + x += ((uLong)c)<<24; + return x; +} + +/* =========================================================================== + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. +*/ +int ZEXPORT gzclose (file) + gzFile file; +{ + int err; + gz_stream *s = (gz_stream*)file; + + if (s == NULL) return Z_STREAM_ERROR; + + if (s->mode == 'w') { +#ifdef NO_DEFLATE + return Z_STREAM_ERROR; +#else + err = do_flush (file, Z_FINISH); + if (err != Z_OK) return destroy((gz_stream*)file); + + putLong (s->file, s->crc); + putLong (s->file, s->stream.total_in); +#endif + } + return destroy((gz_stream*)file); +} + +/* =========================================================================== + Returns the error message for the last error which occured on the + given compressed file. errnum is set to zlib error number. If an + error occured in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +*/ +const char* ZEXPORT gzerror (file, errnum) + gzFile file; + int *errnum; +{ + char *m; + gz_stream *s = (gz_stream*)file; + + if (s == NULL) { + *errnum = Z_STREAM_ERROR; + return (const char*)ERR_MSG(Z_STREAM_ERROR); + } + *errnum = s->z_err; + if (*errnum == Z_OK) return (const char*)""; + + m = (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg); + + if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err); + + TRYFREE(s->msg); + s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3); + strcpy(s->msg, s->path); + strcat(s->msg, ": "); + strcat(s->msg, m); + return (const char*)s->msg; +} diff --git a/src/utilfuns/zlib/infblock.c b/src/utilfuns/zlib/infblock.c new file mode 100644 index 0000000..f4920fa --- /dev/null +++ b/src/utilfuns/zlib/infblock.c @@ -0,0 +1,398 @@ +/* infblock.c -- interpret and process block types to last block + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "infblock.h" +#include "inftrees.h" +#include "infcodes.h" +#include "infutil.h" + +struct inflate_codes_state {int dummy;}; /* for buggy compilers */ + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + +/* Table for deflate from PKZIP's appnote.txt. */ +local const uInt border[] = { /* Order of the bit length code lengths */ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +/* + Notes beyond the 1.93a appnote.txt: + + 1. Distance pointers never point before the beginning of the output + stream. + 2. Distance pointers can point back across blocks, up to 32k away. + 3. There is an implied maximum of 7 bits for the bit length table and + 15 bits for the actual data. + 4. If only one code exists, then it is encoded using one bit. (Zero + would be more efficient, but perhaps a little confusing.) If two + codes exist, they are coded using one bit each (0 and 1). + 5. There is no way of sending zero distance codes--a dummy must be + sent if there are none. (History: a pre 2.0 version of PKZIP would + store blocks with no distance codes, but this was discovered to be + too harsh a criterion.) Valid only for 1.93a. 2.04c does allow + zero distance codes, which is sent as one code of zero bits in + length. + 6. There are up to 286 literal/length codes. Code 256 represents the + end-of-block. Note however that the static length tree defines + 288 codes just to fill out the Huffman codes. Codes 286 and 287 + cannot be used though, since there is no length base or extra bits + defined for them. Similarily, there are up to 30 distance codes. + However, static trees define 32 codes (all 5 bits) to fill out the + Huffman codes, but the last two had better not show up in the data. + 7. Unzip can check dynamic Huffman blocks for complete code sets. + The exception is that a single code would not be complete (see #4). + 8. The five bits following the block type is really the number of + literal codes sent minus 257. + 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits + (1+6+6). Therefore, to output three times the length, you output + three codes (1+1+1), whereas to output four times the same length, + you only need two codes (1+3). Hmm. + 10. In the tree reconstruction algorithm, Code = Code + Increment + only if BitLength(i) is not zero. (Pretty obvious.) + 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) + 12. Note: length code 284 can represent 227-258, but length code 285 + really is 258. The last length deserves its own, short code + since it gets used a lot in very redundant files. The length + 258 is special since 258 - 3 (the min match length) is 255. + 13. The literal/length and distance code bit lengths are read as a + single stream of lengths. It is possible (and advantageous) for + a repeat code (16, 17, or 18) to go across the boundary between + the two sets of lengths. + */ + + +void inflate_blocks_reset(s, z, c) +inflate_blocks_statef *s; +z_streamp z; +uLongf *c; +{ + if (c != Z_NULL) + *c = s->check; + if (s->mode == BTREE || s->mode == DTREE) + ZFREE(z, s->sub.trees.blens); + if (s->mode == CODES) + inflate_codes_free(s->sub.decode.codes, z); + s->mode = TYPE; + s->bitk = 0; + s->bitb = 0; + s->read = s->write = s->window; + if (s->checkfn != Z_NULL) + z->adler = s->check = (*s->checkfn)(0L, (const Bytef *)Z_NULL, 0); + Tracev((stderr, "inflate: blocks reset\n")); +} + + +inflate_blocks_statef *inflate_blocks_new(z, c, w) +z_streamp z; +check_func c; +uInt w; +{ + inflate_blocks_statef *s; + + if ((s = (inflate_blocks_statef *)ZALLOC + (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) + return s; + if ((s->hufts = + (inflate_huft *)ZALLOC(z, sizeof(inflate_huft), MANY)) == Z_NULL) + { + ZFREE(z, s); + return Z_NULL; + } + if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) + { + ZFREE(z, s->hufts); + ZFREE(z, s); + return Z_NULL; + } + s->end = s->window + w; + s->checkfn = c; + s->mode = TYPE; + Tracev((stderr, "inflate: blocks allocated\n")); + inflate_blocks_reset(s, z, Z_NULL); + return s; +} + + +int inflate_blocks(s, z, r) +inflate_blocks_statef *s; +z_streamp z; +int r; +{ + uInt t; /* temporary storage */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + + /* copy input/output information to locals (UPDATE macro restores) */ + LOAD + + /* process input based on current state */ + while (1) switch (s->mode) + { + case TYPE: + NEEDBITS(3) + t = (uInt)b & 7; + s->last = t & 1; + switch (t >> 1) + { + case 0: /* stored */ + Tracev((stderr, "inflate: stored block%s\n", + s->last ? " (last)" : "")); + DUMPBITS(3) + t = k & 7; /* go to byte boundary */ + DUMPBITS(t) + s->mode = LENS; /* get length of stored block */ + break; + case 1: /* fixed */ + Tracev((stderr, "inflate: fixed codes block%s\n", + s->last ? " (last)" : "")); + { + uInt bl, bd; + inflate_huft *tl, *td; + + inflate_trees_fixed(&bl, &bd, &tl, &td, z); + s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); + if (s->sub.decode.codes == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + } + DUMPBITS(3) + s->mode = CODES; + break; + case 2: /* dynamic */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + s->last ? " (last)" : "")); + DUMPBITS(3) + s->mode = TABLE; + break; + case 3: /* illegal */ + DUMPBITS(3) + s->mode = BAD; + z->msg = (char*)"invalid block type"; + r = Z_DATA_ERROR; + LEAVE + } + break; + case LENS: + NEEDBITS(32) + if ((((~b) >> 16) & 0xffff) != (b & 0xffff)) + { + s->mode = BAD; + z->msg = (char*)"invalid stored block lengths"; + r = Z_DATA_ERROR; + LEAVE + } + s->sub.left = (uInt)b & 0xffff; + b = k = 0; /* dump bits */ + Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); + s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE); + break; + case STORED: + if (n == 0) + LEAVE + NEEDOUT + t = s->sub.left; + if (t > n) t = n; + if (t > m) t = m; + zmemcpy(q, p, t); + p += t; n -= t; + q += t; m -= t; + if ((s->sub.left -= t) != 0) + break; + Tracev((stderr, "inflate: stored end, %lu total out\n", + z->total_out + (q >= s->read ? q - s->read : + (s->end - s->read) + (q - s->window)))); + s->mode = s->last ? DRY : TYPE; + break; + case TABLE: + NEEDBITS(14) + s->sub.trees.table = t = (uInt)b & 0x3fff; +#ifndef PKZIP_BUG_WORKAROUND + if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) + { + s->mode = BAD; + z->msg = (char*)"too many length or distance symbols"; + r = Z_DATA_ERROR; + LEAVE + } +#endif + t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); + if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + DUMPBITS(14) + s->sub.trees.index = 0; + Tracev((stderr, "inflate: table sizes ok\n")); + s->mode = BTREE; + case BTREE: + while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) + { + NEEDBITS(3) + s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; + DUMPBITS(3) + } + while (s->sub.trees.index < 19) + s->sub.trees.blens[border[s->sub.trees.index++]] = 0; + s->sub.trees.bb = 7; + t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, + &s->sub.trees.tb, s->hufts, z); + if (t != Z_OK) + { + ZFREE(z, s->sub.trees.blens); + r = t; + if (r == Z_DATA_ERROR) + s->mode = BAD; + LEAVE + } + s->sub.trees.index = 0; + Tracev((stderr, "inflate: bits tree ok\n")); + s->mode = DTREE; + case DTREE: + while (t = s->sub.trees.table, + s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) + { + inflate_huft *h; + uInt i, j, c; + + t = s->sub.trees.bb; + NEEDBITS(t) + h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); + t = h->bits; + c = h->base; + if (c < 16) + { + DUMPBITS(t) + s->sub.trees.blens[s->sub.trees.index++] = c; + } + else /* c == 16..18 */ + { + i = c == 18 ? 7 : c - 14; + j = c == 18 ? 11 : 3; + NEEDBITS(t + i) + DUMPBITS(t) + j += (uInt)b & inflate_mask[i]; + DUMPBITS(i) + i = s->sub.trees.index; + t = s->sub.trees.table; + if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || + (c == 16 && i < 1)) + { + ZFREE(z, s->sub.trees.blens); + s->mode = BAD; + z->msg = (char*)"invalid bit length repeat"; + r = Z_DATA_ERROR; + LEAVE + } + c = c == 16 ? s->sub.trees.blens[i - 1] : 0; + do { + s->sub.trees.blens[i++] = c; + } while (--j); + s->sub.trees.index = i; + } + } + s->sub.trees.tb = Z_NULL; + { + uInt bl, bd; + inflate_huft *tl, *td; + inflate_codes_statef *c; + + bl = 9; /* must be <= 9 for lookahead assumptions */ + bd = 6; /* must be <= 9 for lookahead assumptions */ + t = s->sub.trees.table; + t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), + s->sub.trees.blens, &bl, &bd, &tl, &td, + s->hufts, z); + ZFREE(z, s->sub.trees.blens); + if (t != Z_OK) + { + if (t == (uInt)Z_DATA_ERROR) + s->mode = BAD; + r = t; + LEAVE + } + Tracev((stderr, "inflate: trees ok\n")); + if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) + { + r = Z_MEM_ERROR; + LEAVE + } + s->sub.decode.codes = c; + } + s->mode = CODES; + case CODES: + UPDATE + if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) + return inflate_flush(s, z, r); + r = Z_OK; + inflate_codes_free(s->sub.decode.codes, z); + LOAD + Tracev((stderr, "inflate: codes end, %lu total out\n", + z->total_out + (q >= s->read ? q - s->read : + (s->end - s->read) + (q - s->window)))); + if (!s->last) + { + s->mode = TYPE; + break; + } + s->mode = DRY; + case DRY: + FLUSH + if (s->read != s->write) + LEAVE + s->mode = DONE; + case DONE: + r = Z_STREAM_END; + LEAVE + case BAD: + r = Z_DATA_ERROR; + LEAVE + default: + r = Z_STREAM_ERROR; + LEAVE + } +} + + +int inflate_blocks_free(s, z) +inflate_blocks_statef *s; +z_streamp z; +{ + inflate_blocks_reset(s, z, Z_NULL); + ZFREE(z, s->window); + ZFREE(z, s->hufts); + ZFREE(z, s); + Tracev((stderr, "inflate: blocks freed\n")); + return Z_OK; +} + + +void inflate_set_dictionary(s, d, n) +inflate_blocks_statef *s; +const Bytef *d; +uInt n; +{ + zmemcpy(s->window, d, n); + s->read = s->write = s->window + n; +} + + +/* Returns true if inflate is currently at the end of a block generated + * by Z_SYNC_FLUSH or Z_FULL_FLUSH. + * IN assertion: s != Z_NULL + */ +int inflate_blocks_sync_point(s) +inflate_blocks_statef *s; +{ + return s->mode == LENS; +} diff --git a/src/utilfuns/zlib/infblock.h b/src/utilfuns/zlib/infblock.h new file mode 100644 index 0000000..bd25c80 --- /dev/null +++ b/src/utilfuns/zlib/infblock.h @@ -0,0 +1,39 @@ +/* infblock.h -- header to use infblock.c + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +struct inflate_blocks_state; +typedef struct inflate_blocks_state FAR inflate_blocks_statef; + +extern inflate_blocks_statef * inflate_blocks_new OF(( + z_streamp z, + check_func c, /* check function */ + uInt w)); /* window size */ + +extern int inflate_blocks OF(( + inflate_blocks_statef *, + z_streamp , + int)); /* initial return code */ + +extern void inflate_blocks_reset OF(( + inflate_blocks_statef *, + z_streamp , + uLongf *)); /* check value on output */ + +extern int inflate_blocks_free OF(( + inflate_blocks_statef *, + z_streamp)); + +extern void inflate_set_dictionary OF(( + inflate_blocks_statef *s, + const Bytef *d, /* dictionary */ + uInt n)); /* dictionary length */ + +extern int inflate_blocks_sync_point OF(( + inflate_blocks_statef *s)); diff --git a/src/utilfuns/zlib/infcodes.c b/src/utilfuns/zlib/infcodes.c new file mode 100644 index 0000000..cfd0807 --- /dev/null +++ b/src/utilfuns/zlib/infcodes.c @@ -0,0 +1,260 @@ +/* infcodes.c -- process literals and length/distance pairs + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "infblock.h" +#include "infcodes.h" +#include "infutil.h" +#include "inffast.h" + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + +typedef enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ + START, /* x: set up for LEN */ + LEN, /* i: get length/literal/eob next */ + LENEXT, /* i: getting length extra (have base) */ + DIST, /* i: get distance next */ + DISTEXT, /* i: getting distance extra */ + COPY, /* o: copying bytes in window, waiting for space */ + LIT, /* o: got literal, waiting for output space */ + WASH, /* o: got eob, possibly still output waiting */ + END, /* x: got eob and all data flushed */ + BADCODE} /* x: got error */ +inflate_codes_mode; + +/* inflate codes private state */ +struct inflate_codes_state { + + /* mode */ + inflate_codes_mode mode; /* current inflate_codes mode */ + + /* mode dependent information */ + uInt len; + union { + struct { + inflate_huft *tree; /* pointer into tree */ + uInt need; /* bits needed */ + } code; /* if LEN or DIST, where in tree */ + uInt lit; /* if LIT, literal */ + struct { + uInt get; /* bits to get for extra */ + uInt dist; /* distance back to copy from */ + } copy; /* if EXT or COPY, where and how much */ + } sub; /* submode */ + + /* mode independent information */ + Byte lbits; /* ltree bits decoded per branch */ + Byte dbits; /* dtree bits decoder per branch */ + inflate_huft *ltree; /* literal/length/eob tree */ + inflate_huft *dtree; /* distance tree */ + +}; + + +inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z) +uInt bl, bd; +inflate_huft *tl; +inflate_huft *td; /* need separate declaration for Borland C++ */ +z_streamp z; +{ + inflate_codes_statef *c; + + if ((c = (inflate_codes_statef *) + ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) + { + c->mode = START; + c->lbits = (Byte)bl; + c->dbits = (Byte)bd; + c->ltree = tl; + c->dtree = td; + Tracev((stderr, "inflate: codes new\n")); + } + return c; +} + + +int inflate_codes(s, z, r) +inflate_blocks_statef *s; +z_streamp z; +int r; +{ + uInt j; /* temporary storage */ + inflate_huft *t; /* temporary pointer */ + uInt e; /* extra bits or operation */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + Bytef *f; /* pointer to copy strings from */ + inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ + long tryF; +// f = q + /* copy input/output information to locals (UPDATE macro restores) */ + LOAD + + /* process input and output based on current state */ + while (1) switch (c->mode) + { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ + case START: /* x: set up for LEN */ +#ifndef SLOW + if (m >= 258 && n >= 10) + { + UPDATE + r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); + LOAD + if (r != Z_OK) + { + c->mode = r == Z_STREAM_END ? WASH : BADCODE; + break; + } + } +#endif /* !SLOW */ + c->sub.code.need = c->lbits; + c->sub.code.tree = c->ltree; + c->mode = LEN; + case LEN: /* i: get length/literal/eob next */ + j = c->sub.code.need; + NEEDBITS(j) + t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); + DUMPBITS(t->bits) + e = (uInt)(t->exop); + if (e == 0) /* literal */ + { + c->sub.lit = t->base; + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", t->base)); + c->mode = LIT; + break; + } + if (e & 16) /* length */ + { + c->sub.copy.get = e & 15; + c->len = t->base; + c->mode = LENEXT; + break; + } + if ((e & 64) == 0) /* next table */ + { + c->sub.code.need = e; + c->sub.code.tree = t + t->base; + break; + } + if (e & 32) /* end of block */ + { + Tracevv((stderr, "inflate: end of block\n")); + c->mode = WASH; + break; + } + c->mode = BADCODE; /* invalid code */ + z->msg = (char*)"invalid literal/length code"; + r = Z_DATA_ERROR; + LEAVE + case LENEXT: /* i: getting length extra (have base) */ + j = c->sub.copy.get; + NEEDBITS(j) + c->len += (uInt)b & inflate_mask[j]; + DUMPBITS(j) + c->sub.code.need = c->dbits; + c->sub.code.tree = c->dtree; + Tracevv((stderr, "inflate: length %u\n", c->len)); + c->mode = DIST; + case DIST: /* i: get distance next */ + j = c->sub.code.need; + NEEDBITS(j) + t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); + DUMPBITS(t->bits) + e = (uInt)(t->exop); + if (e & 16) /* distance */ + { + c->sub.copy.get = e & 15; + c->sub.copy.dist = t->base; + c->mode = DISTEXT; + break; + } + if ((e & 64) == 0) /* next table */ + { + c->sub.code.need = e; + c->sub.code.tree = t + t->base; + break; + } + c->mode = BADCODE; /* invalid code */ + z->msg = (char*)"invalid distance code"; + r = Z_DATA_ERROR; + LEAVE + case DISTEXT: /* i: getting distance extra */ + j = c->sub.copy.get; + NEEDBITS(j) + c->sub.copy.dist += (uInt)b & inflate_mask[j]; + DUMPBITS(j) + Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); + c->mode = COPY; + case COPY: /* o: copying bytes in window, waiting for space */ +#ifndef __TURBOC__ /* Turbo C bug for following expression */ + f = (uInt)(q - s->window) < c->sub.copy.dist ? + s->end - (c->sub.copy.dist - (q - s->window)) : + q - c->sub.copy.dist; +#else + tryF = (long)q - c->sub.copy.dist; +// f = q - c->sub.copy.dist; + if ((uInt)(q - s->window) < c->sub.copy.dist) + f = s->end - (c->sub.copy.dist - (uInt)(q - s->window)); + else f = (Bytef *)tryF; +#endif + while (c->len) + { + NEEDOUT + OUTBYTE(*f++) + if (f == s->end) + f = s->window; + c->len--; + } + c->mode = START; + break; + case LIT: /* o: got literal, waiting for output space */ + NEEDOUT + OUTBYTE(c->sub.lit) + c->mode = START; + break; + case WASH: /* o: got eob, possibly more output */ + if (k > 7) /* return unused byte, if any */ + { + Assert(k < 16, "inflate_codes grabbed too many bytes") + k -= 8; + n++; + p--; /* can always return one */ + } + FLUSH + if (s->read != s->write) + LEAVE + c->mode = END; + case END: + r = Z_STREAM_END; + LEAVE + case BADCODE: /* x: got error */ + r = Z_DATA_ERROR; + LEAVE + default: + r = Z_STREAM_ERROR; + LEAVE + } +#ifdef NEED_DUMMY_RETURN + return Z_STREAM_ERROR; /* Some dumb compilers complain without this */ +#endif +} + + +void inflate_codes_free(c, z) +inflate_codes_statef *c; +z_streamp z; +{ + ZFREE(z, c); + Tracev((stderr, "inflate: codes free\n")); +} diff --git a/src/utilfuns/zlib/infcodes.h b/src/utilfuns/zlib/infcodes.h new file mode 100644 index 0000000..6c750d8 --- /dev/null +++ b/src/utilfuns/zlib/infcodes.h @@ -0,0 +1,27 @@ +/* infcodes.h -- header to use infcodes.c + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +struct inflate_codes_state; +typedef struct inflate_codes_state FAR inflate_codes_statef; + +extern inflate_codes_statef *inflate_codes_new OF(( + uInt, uInt, + inflate_huft *, inflate_huft *, + z_streamp )); + +extern int inflate_codes OF(( + inflate_blocks_statef *, + z_streamp , + int)); + +extern void inflate_codes_free OF(( + inflate_codes_statef *, + z_streamp )); + diff --git a/src/utilfuns/zlib/inffast.c b/src/utilfuns/zlib/inffast.c new file mode 100644 index 0000000..61a78ee --- /dev/null +++ b/src/utilfuns/zlib/inffast.c @@ -0,0 +1,170 @@ +/* inffast.c -- process literals and length/distance pairs fast + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "infblock.h" +#include "infcodes.h" +#include "infutil.h" +#include "inffast.h" + +struct inflate_codes_state {int dummy;}; /* for buggy compilers */ + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + +/* macros for bit input with no checking and for returning unused bytes */ +#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}} +#define UNGRAB {c=z->avail_in-n;c=(k>>3)<c?k>>3:c;n+=c;p-=c;k-=c<<3;} + +/* Called with number of bytes left to write in window at least 258 + (the maximum string length) and number of input bytes available + at least ten. The ten bytes are six bytes for the longest length/ + distance pair plus four bytes for overloading the bit buffer. */ + +int inflate_fast(bl, bd, tl, td, s, z) +uInt bl, bd; +inflate_huft *tl; +inflate_huft *td; /* need separate declaration for Borland C++ */ +inflate_blocks_statef *s; +z_streamp z; +{ + inflate_huft *t; /* temporary pointer */ + uInt e; /* extra bits or operation */ + uLong b; /* bit buffer */ + uInt k; /* bits in bit buffer */ + Bytef *p; /* input data pointer */ + uInt n; /* bytes available there */ + Bytef *q; /* output window write pointer */ + uInt m; /* bytes to end of window or read pointer */ + uInt ml; /* mask for literal/length tree */ + uInt md; /* mask for distance tree */ + uInt c; /* bytes to copy */ + uInt d; /* distance back to copy from */ + Bytef *r; /* copy source pointer */ + + /* load input, output, bit values */ + LOAD + + /* initialize masks */ + ml = inflate_mask[bl]; + md = inflate_mask[bd]; + + /* do until not enough input or output space for fast loop */ + do { /* assume called with m >= 258 && n >= 10 */ + /* get literal/length code */ + GRABBITS(20) /* max bits for literal/length code */ + if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) + { + DUMPBITS(t->bits) + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: * literal '%c'\n" : + "inflate: * literal 0x%02x\n", t->base)); + *q++ = (Byte)t->base; + m--; + continue; + } + do { + DUMPBITS(t->bits) + if (e & 16) + { + /* get extra bits for length */ + e &= 15; + c = t->base + ((uInt)b & inflate_mask[e]); + DUMPBITS(e) + Tracevv((stderr, "inflate: * length %u\n", c)); + + /* decode distance base of block to copy */ + GRABBITS(15); /* max bits for distance code */ + e = (t = td + ((uInt)b & md))->exop; + do { + DUMPBITS(t->bits) + if (e & 16) + { + /* get extra bits to add to distance base */ + e &= 15; + GRABBITS(e) /* get extra bits (up to 13) */ + d = t->base + ((uInt)b & inflate_mask[e]); + DUMPBITS(e) + Tracevv((stderr, "inflate: * distance %u\n", d)); + + /* do the copy */ + m -= c; + if ((uInt)(q - s->window) >= d) /* offset before dest */ + { /* just copy */ + r = q - d; + *q++ = *r++; c--; /* minimum count is three, */ + *q++ = *r++; c--; /* so unroll loop a little */ + } + else /* else offset after destination */ + { + e = d - (uInt)(q - s->window); /* bytes from offset to end */ + r = s->end - e; /* pointer to offset */ + if (c > e) /* if source crosses, */ + { + c -= e; /* copy to end of window */ + do { + *q++ = *r++; + } while (--e); + r = s->window; /* copy rest from start of window */ + } + } + do { /* copy all or what's left */ + *q++ = *r++; + } while (--c); + break; + } + else if ((e & 64) == 0) + { + t += t->base; + e = (t += ((uInt)b & inflate_mask[e]))->exop; + } + else + { + z->msg = (char*)"invalid distance code"; + UNGRAB + UPDATE + return Z_DATA_ERROR; + } + } while (1); + break; + } + if ((e & 64) == 0) + { + t += t->base; + if ((e = (t += ((uInt)b & inflate_mask[e]))->exop) == 0) + { + DUMPBITS(t->bits) + Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? + "inflate: * literal '%c'\n" : + "inflate: * literal 0x%02x\n", t->base)); + *q++ = (Byte)t->base; + m--; + break; + } + } + else if (e & 32) + { + Tracevv((stderr, "inflate: * end of block\n")); + UNGRAB + UPDATE + return Z_STREAM_END; + } + else + { + z->msg = (char*)"invalid literal/length code"; + UNGRAB + UPDATE + return Z_DATA_ERROR; + } + } while (1); + } while (m >= 258 && n >= 10); + + /* not enough input or output--restore pointers and return */ + UNGRAB + UPDATE + return Z_OK; +} diff --git a/src/utilfuns/zlib/inffast.h b/src/utilfuns/zlib/inffast.h new file mode 100644 index 0000000..8facec5 --- /dev/null +++ b/src/utilfuns/zlib/inffast.h @@ -0,0 +1,17 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +extern int inflate_fast OF(( + uInt, + uInt, + inflate_huft *, + inflate_huft *, + inflate_blocks_statef *, + z_streamp )); diff --git a/src/utilfuns/zlib/inflate.c b/src/utilfuns/zlib/inflate.c new file mode 100644 index 0000000..32e9b8d --- /dev/null +++ b/src/utilfuns/zlib/inflate.c @@ -0,0 +1,366 @@ +/* inflate.c -- zlib interface to inflate modules + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "infblock.h" + +struct inflate_blocks_state {int dummy;}; /* for buggy compilers */ + +typedef enum { + METHOD, /* waiting for method byte */ + FLAG, /* waiting for flag byte */ + DICT4, /* four dictionary check bytes to go */ + DICT3, /* three dictionary check bytes to go */ + DICT2, /* two dictionary check bytes to go */ + DICT1, /* one dictionary check byte to go */ + DICT0, /* waiting for inflateSetDictionary */ + BLOCKS, /* decompressing blocks */ + CHECK4, /* four check bytes to go */ + CHECK3, /* three check bytes to go */ + CHECK2, /* two check bytes to go */ + CHECK1, /* one check byte to go */ + DONE, /* finished check, done */ + BAD} /* got an error--stay here */ +inflate_mode; + +/* inflate private state */ +struct internal_state { + + /* mode */ + inflate_mode mode; /* current inflate mode */ + + /* mode dependent information */ + union { + uInt method; /* if FLAGS, method byte */ + struct { + uLong was; /* computed check value */ + uLong need; /* stream check value */ + } check; /* if CHECK, check values to compare */ + uInt marker; /* if BAD, inflateSync's marker bytes count */ + } sub; /* submode */ + + /* mode independent information */ + int nowrap; /* flag for no wrapper */ + uInt wbits; /* log2(window size) (8..15, defaults to 15) */ + inflate_blocks_statef + *blocks; /* current inflate_blocks state */ + +}; + + +int ZEXPORT inflateReset(z) +z_streamp z; +{ + if (z == Z_NULL || z->state == Z_NULL) + return Z_STREAM_ERROR; + z->total_in = z->total_out = 0; + z->msg = Z_NULL; + z->state->mode = z->state->nowrap ? BLOCKS : METHOD; + inflate_blocks_reset(z->state->blocks, z, Z_NULL); + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + + +int ZEXPORT inflateEnd(z) +z_streamp z; +{ + if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) + return Z_STREAM_ERROR; + if (z->state->blocks != Z_NULL) + inflate_blocks_free(z->state->blocks, z); + ZFREE(z, z->state); + z->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + + +int ZEXPORT inflateInit2_(z, w, version, stream_size) +z_streamp z; +int w; +const char *version; +int stream_size; +{ + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != sizeof(z_stream)) + return Z_VERSION_ERROR; + + /* initialize state */ + if (z == Z_NULL) + return Z_STREAM_ERROR; + z->msg = Z_NULL; + if (z->zalloc == Z_NULL) + { + z->zalloc = zcalloc; + z->opaque = (voidpf)0; + } + if (z->zfree == Z_NULL) z->zfree = zcfree; + if ((z->state = (struct internal_state FAR *) + ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL) + return Z_MEM_ERROR; + z->state->blocks = Z_NULL; + + /* handle undocumented nowrap option (no zlib header or check) */ + z->state->nowrap = 0; + if (w < 0) + { + w = - w; + z->state->nowrap = 1; + } + + /* set window size */ + if (w < 8 || w > 15) + { + inflateEnd(z); + return Z_STREAM_ERROR; + } + z->state->wbits = (uInt)w; + + /* create inflate_blocks state */ + if ((z->state->blocks = + inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, (uInt)1 << w)) + == Z_NULL) + { + inflateEnd(z); + return Z_MEM_ERROR; + } + Tracev((stderr, "inflate: allocated\n")); + + /* reset state */ + inflateReset(z); + return Z_OK; +} + + +int ZEXPORT inflateInit_(z, version, stream_size) +z_streamp z; +const char *version; +int stream_size; +{ + return inflateInit2_(z, DEF_WBITS, version, stream_size); +} + + +#define NEEDBYTE {if(z->avail_in==0)return r;r=f;} +#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) + +int ZEXPORT inflate(z, f) +z_streamp z; +int f; +{ + int r; + uInt b; + + if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL) + return Z_STREAM_ERROR; + f = f == Z_FINISH ? Z_BUF_ERROR : Z_OK; + r = Z_BUF_ERROR; + while (1) switch (z->state->mode) + { + case METHOD: + NEEDBYTE + if (((z->state->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED) + { + z->state->mode = BAD; + z->msg = (char*)"unknown compression method"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + if ((z->state->sub.method >> 4) + 8 > z->state->wbits) + { + z->state->mode = BAD; + z->msg = (char*)"invalid window size"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + z->state->mode = FLAG; + case FLAG: + NEEDBYTE + b = NEXTBYTE; + if (((z->state->sub.method << 8) + b) % 31) + { + z->state->mode = BAD; + z->msg = (char*)"incorrect header check"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + Tracev((stderr, "inflate: zlib header ok\n")); + if (!(b & PRESET_DICT)) + { + z->state->mode = BLOCKS; + break; + } + z->state->mode = DICT4; + case DICT4: + NEEDBYTE + z->state->sub.check.need = (uLong)NEXTBYTE << 24; + z->state->mode = DICT3; + case DICT3: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 16; + z->state->mode = DICT2; + case DICT2: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 8; + z->state->mode = DICT1; + case DICT1: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE; + z->adler = z->state->sub.check.need; + z->state->mode = DICT0; + return Z_NEED_DICT; + case DICT0: + z->state->mode = BAD; + z->msg = (char*)"need dictionary"; + z->state->sub.marker = 0; /* can try inflateSync */ + return Z_STREAM_ERROR; + case BLOCKS: + r = inflate_blocks(z->state->blocks, z, r); + if (r == Z_DATA_ERROR) + { + z->state->mode = BAD; + z->state->sub.marker = 0; /* can try inflateSync */ + break; + } + if (r == Z_OK) + r = f; + if (r != Z_STREAM_END) + return r; + r = f; + inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was); + if (z->state->nowrap) + { + z->state->mode = DONE; + break; + } + z->state->mode = CHECK4; + case CHECK4: + NEEDBYTE + z->state->sub.check.need = (uLong)NEXTBYTE << 24; + z->state->mode = CHECK3; + case CHECK3: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 16; + z->state->mode = CHECK2; + case CHECK2: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE << 8; + z->state->mode = CHECK1; + case CHECK1: + NEEDBYTE + z->state->sub.check.need += (uLong)NEXTBYTE; + + if (z->state->sub.check.was != z->state->sub.check.need) + { + z->state->mode = BAD; + z->msg = (char*)"incorrect data check"; + z->state->sub.marker = 5; /* can't try inflateSync */ + break; + } + Tracev((stderr, "inflate: zlib check ok\n")); + z->state->mode = DONE; + case DONE: + return Z_STREAM_END; + case BAD: + return Z_DATA_ERROR; + default: + return Z_STREAM_ERROR; + } +#ifdef NEED_DUMMY_RETURN + return Z_STREAM_ERROR; /* Some dumb compilers complain without this */ +#endif +} + + +int ZEXPORT inflateSetDictionary(z, dictionary, dictLength) +z_streamp z; +const Bytef *dictionary; +uInt dictLength; +{ + uInt length = dictLength; + + if (z == Z_NULL || z->state == Z_NULL || z->state->mode != DICT0) + return Z_STREAM_ERROR; + + if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR; + z->adler = 1L; + + if (length >= ((uInt)1<<z->state->wbits)) + { + length = (1<<z->state->wbits)-1; + dictionary += dictLength - length; + } + inflate_set_dictionary(z->state->blocks, dictionary, length); + z->state->mode = BLOCKS; + return Z_OK; +} + + +int ZEXPORT inflateSync(z) +z_streamp z; +{ + uInt n; /* number of bytes to look at */ + Bytef *p; /* pointer to bytes */ + uInt m; /* number of marker bytes found in a row */ + uLong r, w; /* temporaries to save total_in and total_out */ + + /* set up */ + if (z == Z_NULL || z->state == Z_NULL) + return Z_STREAM_ERROR; + if (z->state->mode != BAD) + { + z->state->mode = BAD; + z->state->sub.marker = 0; + } + if ((n = z->avail_in) == 0) + return Z_BUF_ERROR; + p = z->next_in; + m = z->state->sub.marker; + + /* search */ + while (n && m < 4) + { + static const Byte mark[4] = {0, 0, 0xff, 0xff}; + if (*p == mark[m]) + m++; + else if (*p) + m = 0; + else + m = 4 - m; + p++, n--; + } + + /* restore */ + z->total_in += p - z->next_in; + z->next_in = p; + z->avail_in = n; + z->state->sub.marker = m; + + /* return no joy or set up to restart on a new block */ + if (m != 4) + return Z_DATA_ERROR; + r = z->total_in; w = z->total_out; + inflateReset(z); + z->total_in = r; z->total_out = w; + z->state->mode = BLOCKS; + return Z_OK; +} + + +/* Returns true if inflate is currently at the end of a block generated + * by Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + * implementation to provide an additional safety check. PPP uses Z_SYNC_FLUSH + * but removes the length bytes of the resulting empty stored block. When + * decompressing, PPP checks that at the end of input packet, inflate is + * waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(z) +z_streamp z; +{ + if (z == Z_NULL || z->state == Z_NULL || z->state->blocks == Z_NULL) + return Z_STREAM_ERROR; + return inflate_blocks_sync_point(z->state->blocks); +} diff --git a/src/utilfuns/zlib/inftrees.c b/src/utilfuns/zlib/inftrees.c new file mode 100644 index 0000000..ef1e0b6 --- /dev/null +++ b/src/utilfuns/zlib/inftrees.c @@ -0,0 +1,455 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#if !defined(BUILDFIXED) && !defined(STDC) +# define BUILDFIXED /* non ANSI compilers may not accept inffixed.h */ +#endif + +const char inflate_copyright[] = + " inflate 1.1.3 Copyright 1995-1998 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ +struct internal_state {int dummy;}; /* for buggy compilers */ + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + + +local int huft_build OF(( + uIntf *, /* code lengths in bits */ + uInt, /* number of codes */ + uInt, /* number of "simple" codes */ + const uIntf *, /* list of base values for non-simple codes */ + const uIntf *, /* list of extra bits for non-simple codes */ + inflate_huft * FAR*,/* result: starting table */ + uIntf *, /* maximum lookup bits (returns actual) */ + inflate_huft *, /* space for trees */ + uInt *, /* hufts used in space */ + uIntf * )); /* space for values */ + +/* Tables for deflate from PKZIP's appnote.txt. */ +local const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + /* see note #13 above about 258 */ +local const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */ +local const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; +local const uInt cpdext[30] = { /* Extra bits for distance codes */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + +/* + Huffman code decoding is performed using a multi-level table lookup. + The fastest way to decode is to simply build a lookup table whose + size is determined by the longest code. However, the time it takes + to build this table can also be a factor if the data being decoded + is not very long. The most common codes are necessarily the + shortest codes, so those codes dominate the decoding time, and hence + the speed. The idea is you can have a shorter table that decodes the + shorter, more probable codes, and then point to subsidiary tables for + the longer codes. The time it costs to decode the longer codes is + then traded against the time it takes to make longer tables. + + This results of this trade are in the variables lbits and dbits + below. lbits is the number of bits the first level table for literal/ + length codes can decode in one step, and dbits is the same thing for + the distance codes. Subsequent tables are also less than or equal to + those sizes. These values may be adjusted either when all of the + codes are shorter than that, in which case the longest code length in + bits is used, or when the shortest code is *longer* than the requested + table size, in which case the length of the shortest code in bits is + used. + + There are two different values for the two tables, since they code a + different number of possibilities each. The literal/length table + codes 286 possible values, or in a flat code, a little over eight + bits. The distance table codes 30 possible values, or a little less + than five bits, flat. The optimum values for speed end up being + about one bit more than those, so lbits is 8+1 and dbits is 5+1. + The optimum values may differ though from machine to machine, and + possibly even between compilers. Your mileage may vary. + */ + + +/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ +#define BMAX 15 /* maximum bit length of any code */ + +local int huft_build(b, n, s, d, e, t, m, hp, hn, v) +uIntf *b; /* code lengths in bits (all assumed <= BMAX) */ +uInt n; /* number of codes (assumed <= 288) */ +uInt s; /* number of simple-valued codes (0..s-1) */ +const uIntf *d; /* list of base values for non-simple codes */ +const uIntf *e; /* list of extra bits for non-simple codes */ +inflate_huft * FAR *t; /* result: starting table */ +uIntf *m; /* maximum lookup bits, returns actual */ +inflate_huft *hp; /* space for trees */ +uInt *hn; /* hufts used in space */ +uIntf *v; /* working area: values in order of bit length */ +/* Given a list of code lengths and a maximum table size, make a set of + tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR + if the given code set is incomplete (the tables are still built in this + case), Z_DATA_ERROR if the input is invalid (an over-subscribed set of + lengths), or Z_MEM_ERROR if not enough memory. */ +{ + + uInt a; /* counter for codes of length k */ + uInt c[BMAX+1]; /* bit length count table */ + uInt f; /* i repeats in table every f entries */ + int g; /* maximum code length */ + int h; /* table level */ + register uInt i; /* counter, current code */ + register uInt j; /* counter */ + register int k; /* number of bits in current code */ + int l; /* bits per table (returned in m) */ + uInt mask; /* (1 << w) - 1, to avoid cc -O bug on HP */ + register uIntf *p; /* pointer into c[], b[], or v[] */ + inflate_huft *q; /* points to current table */ + struct inflate_huft_s r; /* table entry for structure assignment */ + inflate_huft *u[BMAX]; /* table stack */ + register int w; /* bits before this table == (l * h) */ + uInt x[BMAX+1]; /* bit offsets, then code stack */ + uIntf *xp; /* pointer into x */ + int y; /* number of dummy codes added */ + uInt z; /* number of entries in current table */ + + + /* Generate counts for each bit length */ + p = c; +#define C0 *p++ = 0; +#define C2 C0 C0 C0 C0 +#define C4 C2 C2 C2 C2 + C4 /* clear c[]--assume BMAX+1 is 16 */ + p = b; i = n; + do { + c[*p++]++; /* assume all entries <= BMAX */ + } while (--i); + if (c[0] == n) /* null input--all zero length codes */ + { + *t = (inflate_huft *)Z_NULL; + *m = 0; + return Z_OK; + } + + + /* Find minimum and maximum length, bound *m by those */ + l = *m; + for (j = 1; j <= BMAX; j++) + if (c[j]) + break; + k = j; /* minimum code length */ + if ((uInt)l < j) + l = j; + for (i = BMAX; i; i--) + if (c[i]) + break; + g = i; /* maximum code length */ + if ((uInt)l > i) + l = i; + *m = l; + + + /* Adjust last length count to fill out codes, if needed */ + for (y = 1 << j; j < i; j++, y <<= 1) + if ((y -= c[j]) < 0) + return Z_DATA_ERROR; + if ((y -= c[i]) < 0) + return Z_DATA_ERROR; + c[i] += y; + + + /* Generate starting offsets into the value table for each length */ + x[1] = j = 0; + p = c + 1; xp = x + 2; + while (--i) { /* note that i == g from above */ + *xp++ = (j += *p++); + } + + + /* Make a table of values in order of bit lengths */ + p = b; i = 0; + do { + if ((j = *p++) != 0) + v[x[j]++] = i; + } while (++i < n); + n = x[g]; /* set n to length of v */ + + + /* Generate the Huffman codes and for each, make the table entries */ + x[0] = i = 0; /* first Huffman code is zero */ + p = v; /* grab values in bit order */ + h = -1; /* no tables yet--level -1 */ + w = -l; /* bits decoded == (l * h) */ + u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ + q = (inflate_huft *)Z_NULL; /* ditto */ + z = 0; /* ditto */ + + /* go through the bit lengths (k already is bits in shortest code) */ + for (; k <= g; k++) + { + a = c[k]; + while (a--) + { + /* here i is the Huffman code of length k bits for value *p */ + /* make tables up to required level */ + while (k > w + l) + { + h++; + w += l; /* previous table always l bits */ + + /* compute minimum size table less than or equal to l bits */ + z = g - w; + z = z > (uInt)l ? l : z; /* table size upper limit */ + if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ + { /* too few codes for k-w bit table */ + f -= a + 1; /* deduct codes from patterns left */ + xp = c + k; + if (j < z) + while (++j < z) /* try smaller tables up to z bits */ + { + if ((f <<= 1) <= *++xp) + break; /* enough codes to use up j bits */ + f -= *xp; /* else deduct codes from patterns */ + } + } + z = 1 << j; /* table entries for j-bit table */ + + /* allocate new table */ + if (*hn + z > MANY) /* (note: doesn't matter for fixed) */ + return Z_MEM_ERROR; /* not enough memory */ + u[h] = q = hp + *hn; + *hn += z; + + /* connect to last table, if there is one */ + if (h) + { + x[h] = i; /* save pattern for backing up */ + r.bits = (Byte)l; /* bits to dump before this table */ + r.exop = (Byte)j; /* bits in this table */ + j = i >> (w - l); + r.base = (uInt)(q - u[h-1] - j); /* offset to this table */ + u[h-1][j] = r; /* connect to last table */ + } + else + *t = q; /* first table is returned result */ + } + + /* set up table entry in r */ + r.bits = (Byte)(k - w); + if (p >= v + n) + r.exop = 128 + 64; /* out of values--invalid code */ + else if (*p < s) + { + r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ + r.base = *p++; /* simple code is just the value */ + } + else + { + r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */ + r.base = d[*p++ - s]; + } + + /* fill code-like entries with r */ + f = 1 << (k - w); + for (j = i >> w; j < z; j += f) + q[j] = r; + + /* backwards increment the k-bit code i */ + for (j = 1 << (k - 1); i & j; j >>= 1) + i ^= j; + i ^= j; + + /* backup over finished tables */ + mask = (1 << w) - 1; /* needed on HP, cc -O bug */ + while ((i & mask) != x[h]) + { + h--; /* don't need to update q */ + w -= l; + mask = (1 << w) - 1; + } + } + } + + + /* Return Z_BUF_ERROR if we were given an incomplete table */ + return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; +} + + +int inflate_trees_bits(c, bb, tb, hp, z) +uIntf *c; /* 19 code lengths */ +uIntf *bb; /* bits tree desired/actual depth */ +inflate_huft * FAR *tb; /* bits tree result */ +inflate_huft *hp; /* space for trees */ +z_streamp z; /* for messages */ +{ + int r; + uInt hn = 0; /* hufts used in space */ + uIntf *v; /* work area for huft_build */ + + if ((v = (uIntf*)ZALLOC(z, 19, sizeof(uInt))) == Z_NULL) + return Z_MEM_ERROR; + r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, + tb, bb, hp, &hn, v); + if (r == Z_DATA_ERROR) + z->msg = (char*)"oversubscribed dynamic bit lengths tree"; + else if (r == Z_BUF_ERROR || *bb == 0) + { + z->msg = (char*)"incomplete dynamic bit lengths tree"; + r = Z_DATA_ERROR; + } + ZFREE(z, v); + return r; +} + + +int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, hp, z) +uInt nl; /* number of literal/length codes */ +uInt nd; /* number of distance codes */ +uIntf *c; /* that many (total) code lengths */ +uIntf *bl; /* literal desired/actual bit depth */ +uIntf *bd; /* distance desired/actual bit depth */ +inflate_huft * FAR *tl; /* literal/length tree result */ +inflate_huft * FAR *td; /* distance tree result */ +inflate_huft *hp; /* space for trees */ +z_streamp z; /* for messages */ +{ + int r; + uInt hn = 0; /* hufts used in space */ + uIntf *v; /* work area for huft_build */ + + /* allocate work area */ + if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) + return Z_MEM_ERROR; + + /* build literal/length tree */ + r = huft_build(c, nl, 257, cplens, cplext, tl, bl, hp, &hn, v); + if (r != Z_OK || *bl == 0) + { + if (r == Z_DATA_ERROR) + z->msg = (char*)"oversubscribed literal/length tree"; + else if (r != Z_MEM_ERROR) + { + z->msg = (char*)"incomplete literal/length tree"; + r = Z_DATA_ERROR; + } + ZFREE(z, v); + return r; + } + + /* build distance tree */ + r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, hp, &hn, v); + if (r != Z_OK || (*bd == 0 && nl > 257)) + { + if (r == Z_DATA_ERROR) + z->msg = (char*)"oversubscribed distance tree"; + else if (r == Z_BUF_ERROR) { +#ifdef PKZIP_BUG_WORKAROUND + r = Z_OK; + } +#else + z->msg = (char*)"incomplete distance tree"; + r = Z_DATA_ERROR; + } + else if (r != Z_MEM_ERROR) + { + z->msg = (char*)"empty distance tree with lengths"; + r = Z_DATA_ERROR; + } + ZFREE(z, v); + return r; +#endif + } + + /* done */ + ZFREE(z, v); + return Z_OK; +} + + +/* build fixed tables only once--keep them here */ +#ifdef BUILDFIXED +local int fixed_built = 0; +#define FIXEDH 544 /* number of hufts used by fixed tables */ +local inflate_huft fixed_mem[FIXEDH]; +local uInt fixed_bl; +local uInt fixed_bd; +local inflate_huft *fixed_tl; +local inflate_huft *fixed_td; +#else +#include "inffixed.h" +#endif + + +int inflate_trees_fixed(bl, bd, tl, td, z) +uIntf *bl; /* literal desired/actual bit depth */ +uIntf *bd; /* distance desired/actual bit depth */ +inflate_huft * FAR *tl; /* literal/length tree result */ +inflate_huft * FAR *td; /* distance tree result */ +z_streamp z; /* for memory allocation */ +{ +#ifdef BUILDFIXED + /* build fixed tables if not already */ + if (!fixed_built) + { + int k; /* temporary variable */ + uInt f = 0; /* number of hufts used in fixed_mem */ + uIntf *c; /* length list for huft_build */ + uIntf *v; /* work area for huft_build */ + + /* allocate memory */ + if ((c = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) + return Z_MEM_ERROR; + if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) + { + ZFREE(z, c); + return Z_MEM_ERROR; + } + + /* literal table */ + for (k = 0; k < 144; k++) + c[k] = 8; + for (; k < 256; k++) + c[k] = 9; + for (; k < 280; k++) + c[k] = 7; + for (; k < 288; k++) + c[k] = 8; + fixed_bl = 9; + huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, + fixed_mem, &f, v); + + /* distance table */ + for (k = 0; k < 30; k++) + c[k] = 5; + fixed_bd = 5; + huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, + fixed_mem, &f, v); + + /* done */ + ZFREE(z, v); + ZFREE(z, c); + fixed_built = 1; + } +#endif + *bl = fixed_bl; + *bd = fixed_bd; + *tl = fixed_tl; + *td = fixed_td; + return Z_OK; +} diff --git a/src/utilfuns/zlib/inftrees.h b/src/utilfuns/zlib/inftrees.h new file mode 100644 index 0000000..85853e0 --- /dev/null +++ b/src/utilfuns/zlib/inftrees.h @@ -0,0 +1,58 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Huffman code lookup table entry--this entry is four bytes for machines + that have 16-bit pointers (e.g. PC's in the small or medium model). */ + +typedef struct inflate_huft_s FAR inflate_huft; + +struct inflate_huft_s { + union { + struct { + Byte Exop; /* number of extra bits or operation */ + Byte Bits; /* number of bits in this code or subcode */ + } what; + uInt pad; /* pad structure to a power of 2 (4 bytes for */ + } word; /* 16-bit, 8 bytes for 32-bit int's) */ + uInt base; /* literal, length base, distance base, + or table offset */ +}; + +/* Maximum size of dynamic tree. The maximum found in a long but non- + exhaustive search was 1004 huft structures (850 for length/literals + and 154 for distances, the latter actually the result of an + exhaustive search). The actual maximum is not known, but the + value below is more than safe. */ +#define MANY 1440 + +extern int inflate_trees_bits OF(( + uIntf *, /* 19 code lengths */ + uIntf *, /* bits tree desired/actual depth */ + inflate_huft * FAR *, /* bits tree result */ + inflate_huft *, /* space for trees */ + z_streamp)); /* for messages */ + +extern int inflate_trees_dynamic OF(( + uInt, /* number of literal/length codes */ + uInt, /* number of distance codes */ + uIntf *, /* that many (total) code lengths */ + uIntf *, /* literal desired/actual bit depth */ + uIntf *, /* distance desired/actual bit depth */ + inflate_huft * FAR *, /* literal/length tree result */ + inflate_huft * FAR *, /* distance tree result */ + inflate_huft *, /* space for trees */ + z_streamp)); /* for messages */ + +extern int inflate_trees_fixed OF(( + uIntf *, /* literal desired/actual bit depth */ + uIntf *, /* distance desired/actual bit depth */ + inflate_huft * FAR *, /* literal/length tree result */ + inflate_huft * FAR *, /* distance tree result */ + z_streamp)); /* for memory allocation */ diff --git a/src/utilfuns/zlib/infutil.c b/src/utilfuns/zlib/infutil.c new file mode 100644 index 0000000..824dab5 --- /dev/null +++ b/src/utilfuns/zlib/infutil.c @@ -0,0 +1,87 @@ +/* inflate_util.c -- data and routines common to blocks and codes + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "infblock.h" +#include "inftrees.h" +#include "infcodes.h" +#include "infutil.h" + +struct inflate_codes_state {int dummy;}; /* for buggy compilers */ + +/* And'ing with mask[n] masks the lower n bits */ +uInt inflate_mask[17] = { + 0x0000, + 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + + +/* copy as much as possible from the sliding window to the output area */ +int inflate_flush(s, z, r) +inflate_blocks_statef *s; +z_streamp z; +int r; +{ + uInt n; + Bytef *p; + Bytef *q; + + /* local copies of source and destination pointers */ + p = z->next_out; + q = s->read; + + /* compute number of bytes to copy as far as end of window */ + n = (uInt)((q <= s->write ? s->write : s->end) - q); + if (n > z->avail_out) n = z->avail_out; + if (n && r == Z_BUF_ERROR) r = Z_OK; + + /* update counters */ + z->avail_out -= n; + z->total_out += n; + + /* update check information */ + if (s->checkfn != Z_NULL) + z->adler = s->check = (*s->checkfn)(s->check, q, n); + + /* copy as far as end of window */ + zmemcpy(p, q, n); + p += n; + q += n; + + /* see if more to copy at beginning of window */ + if (q == s->end) + { + /* wrap pointers */ + q = s->window; + if (s->write == s->end) + s->write = s->window; + + /* compute bytes to copy */ + n = (uInt)(s->write - q); + if (n > z->avail_out) n = z->avail_out; + if (n && r == Z_BUF_ERROR) r = Z_OK; + + /* update counters */ + z->avail_out -= n; + z->total_out += n; + + /* update check information */ + if (s->checkfn != Z_NULL) + z->adler = s->check = (*s->checkfn)(s->check, q, n); + + /* copy */ + zmemcpy(p, q, n); + p += n; + q += n; + } + + /* update pointers */ + z->next_out = p; + s->read = q; + + /* done */ + return r; +} diff --git a/src/utilfuns/zlib/infutil.h b/src/utilfuns/zlib/infutil.h new file mode 100644 index 0000000..99d1135 --- /dev/null +++ b/src/utilfuns/zlib/infutil.h @@ -0,0 +1,98 @@ +/* infutil.h -- types and macros common to blocks and codes + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +#ifndef _INFUTIL_H +#define _INFUTIL_H + +typedef enum { + TYPE, /* get type bits (3, including end bit) */ + LENS, /* get lengths for stored */ + STORED, /* processing stored block */ + TABLE, /* get table lengths */ + BTREE, /* get bit lengths tree for a dynamic block */ + DTREE, /* get length, distance trees for a dynamic block */ + CODES, /* processing fixed or dynamic block */ + DRY, /* output remaining window bytes */ + DONE, /* finished last block, done */ + BAD} /* got a data error--stuck here */ +inflate_block_mode; + +/* inflate blocks semi-private state */ +struct inflate_blocks_state { + + /* mode */ + inflate_block_mode mode; /* current inflate_block mode */ + + /* mode dependent information */ + union { + uInt left; /* if STORED, bytes left to copy */ + struct { + uInt table; /* table lengths (14 bits) */ + uInt index; /* index into blens (or border) */ + uIntf *blens; /* bit lengths of codes */ + uInt bb; /* bit length tree depth */ + inflate_huft *tb; /* bit length decoding tree */ + } trees; /* if DTREE, decoding info for trees */ + struct { + inflate_codes_statef + *codes; + } decode; /* if CODES, current state */ + } sub; /* submode */ + uInt last; /* true if this block is the last block */ + + /* mode independent information */ + uInt bitk; /* bits in bit buffer */ + uLong bitb; /* bit buffer */ + inflate_huft *hufts; /* single malloc for tree space */ + Bytef *window; /* sliding window */ + Bytef *end; /* one byte after sliding window */ + Bytef *read; /* window read pointer */ + Bytef *write; /* window write pointer */ + check_func checkfn; /* check function */ + uLong check; /* check on output */ + +}; + + +/* defines for inflate input/output */ +/* update pointers and return */ +#define UPDBITS {s->bitb=b;s->bitk=k;} +#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} +#define UPDOUT {s->write=q;} +#define UPDATE {UPDBITS UPDIN UPDOUT} +#define LEAVE {UPDATE return inflate_flush(s,z,r);} +/* get bytes and bits */ +#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} +#define NEEDBYTE {if(n)r=Z_OK;else LEAVE} +#define NEXTBYTE (n--,*p++) +#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}} +#define DUMPBITS(j) {b>>=(j);k-=(j);} +/* output bytes */ +#define WAVAIL (uInt)(q<s->read?s->read-q-1:s->end-q) +#define LOADOUT {q=s->write;m=(uInt)WAVAIL;} +#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}} +#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} +#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;} +#define OUTBYTE(a) {*q++=(Byte)(a);m--;} +/* load local pointers */ +#define LOAD {LOADIN LOADOUT} + +/* masks for lower bits (size given to avoid silly warnings with Visual C++) */ +extern uInt inflate_mask[17]; + +/* copy as much as possible from the sliding window to the output area */ +extern int inflate_flush OF(( + inflate_blocks_statef *, + z_streamp , + int)); + +struct internal_state {int dummy;}; /* for buggy compilers */ + +#endif diff --git a/src/utilfuns/zlib/maketree.c b/src/utilfuns/zlib/maketree.c new file mode 100644 index 0000000..949d786 --- /dev/null +++ b/src/utilfuns/zlib/maketree.c @@ -0,0 +1,85 @@ +/* maketree.c -- make inffixed.h table for decoding fixed codes + * Copyright (C) 1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* This program is included in the distribution for completeness. + You do not need to compile or run this program since inffixed.h + is already included in the distribution. To use this program + you need to compile zlib with BUILDFIXED defined and then compile + and link this program with the zlib library. Then the output of + this program can be piped to inffixed.h. */ + +#include <stdio.h> +#include <stdlib.h> +#include "zutil.h" +#include "inftrees.h" + +/* simplify the use of the inflate_huft type with some defines */ +#define exop word.what.Exop +#define bits word.what.Bits + +/* generate initialization table for an inflate_huft structure array */ +void maketree(uInt b, inflate_huft *t) +{ + int i, e; + + i = 0; + while (1) + { + e = t[i].exop; + if (e && (e & (16+64)) == 0) /* table pointer */ + { + fprintf(stderr, "maketree: cannot initialize sub-tables!\n"); + exit(1); + } + if (i % 4 == 0) + printf("\n "); + printf(" {{{%u,%u}},%u}", t[i].exop, t[i].bits, t[i].base); + if (++i == (1<<b)) + break; + putchar(','); + } + puts(""); +} + +/* create the fixed tables in C initialization syntax */ +void main(void) +{ + int r; + uInt bl, bd; + inflate_huft *tl, *td; + z_stream z; + + z.zalloc = zcalloc; + z.opaque = (voidpf)0; + z.zfree = zcfree; + r = inflate_trees_fixed(&bl, &bd, &tl, &td, &z); + if (r) + { + fprintf(stderr, "inflate_trees_fixed error %d\n", r); + return; + } + puts("/* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by the maketree.c program"); + puts(" */"); + puts(""); + puts("/* WARNING: this file should *not* be used by applications. It is"); + puts(" part of the implementation of the compression library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + printf("local uInt fixed_bl = %d;\n", bl); + printf("local uInt fixed_bd = %d;\n", bd); + printf("local inflate_huft fixed_tl[] = {"); + maketree(bl, tl); + puts(" };"); + printf("local inflate_huft fixed_td[] = {"); + maketree(bd, td); + puts(" };"); +} diff --git a/src/utilfuns/zlib/trees.c b/src/utilfuns/zlib/trees.c new file mode 100644 index 0000000..4d59d6e --- /dev/null +++ b/src/utilfuns/zlib/trees.c @@ -0,0 +1,1214 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-1998 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id: trees.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef DEBUG +# include <ctype.h> +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +#define Buf_size (8 * 2*sizeof(char)) +/* Number of bits used within bi_buf. (bi_buf might be implemented on + * more than 16 bits on some systems.) + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, ct_data *ltree, + ct_data *dtree)); +local void set_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); +local void copy_block OF((deflate_state *s, charf *buf, unsigned len, + int header)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (value << s->bi_valid); + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = value;\ + s->bi_buf |= (val << s->bi_valid);\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* DEBUG */ + + +#define MAX(a,b) (a >= b ? a : b) +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1<<extra_lbits[code]); n++) { + _length_code[length++] = (uch)code; + } + } + Assert (length == 256, "tr_static_init: length != 256"); + /* Note that the length 255 (match length 258) can be represented + * in two different ways: code 284 + 5 bits or code 285, so we + * overwrite length_code[255] to use the best encoding: + */ + _length_code[length-1] = (uch)code; + + /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<<extra_dbits[code]); n++) { + _dist_code[dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: dist != 256"); + dist >>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef DEBUG +# include <stdio.h> +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; + s->last_eob_len = 8; /* enough lookahead for inflate */ +#ifdef DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (bits + xbits); + if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if (tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((long)bits - (long)tree[m].Len) + *(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1, + "inconsistent bit counts"); + Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); + + for (n = 0; n <= max_code; n++) { + int len = tree[n].Len; + if (len == 0) continue; + /* Now reverse the bits */ + tree[n].Code = bi_reverse(next_code[len]++, len); + + Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", + n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); + } +} + +/* =========================================================================== + * Construct one Huffman tree and assigns the code bit strings and lengths. + * Update the total bit length for the current block. + * IN assertion: the field freq is set for all tree elements. + * OUT assertions: the fields len and code are set to the optimal bit length + * and corresponding code. The length opt_len is updated; static_len is + * also updated if stree is not null. The field max_code is set. + */ +local void build_tree(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void _tr_stored_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ +#ifdef DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; +#endif + copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + * The current inflate code requires 9 bits of lookahead. If the + * last two codes for the previous block (real code plus EOB) were coded + * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode + * the last real code. In this case we send two empty static blocks instead + * of one. (There are no problems if the previous block is stored or fixed.) + * To simplify the code, we assume the worst case of last real code encoded + * on one bit only. + */ +void _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); + /* Of the 10 bits for the empty block, we have already sent + * (10 - bi_valid) bits. The lookahead for the last real code (before + * the EOB of the previous block) was thus at least one plus the length + * of the EOB plus what we have just sent of the empty static block. + */ + if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; +#endif + bi_flush(s); + } + s->last_eob_len = 7; +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. + */ +void _tr_flush_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is ascii or binary */ + if (s->data_type == Z_UNKNOWN) set_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute first the block length in bytes*/ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, eof); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+eof, 3); + compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+eof, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (eof) { + bi_windup(s); +#ifdef DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*eof)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + ct_data *ltree; /* literal tree */ + ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); + s->last_eob_len = ltree[END_BLOCK].Len; +} + +/* =========================================================================== + * Set the data type to ASCII or BINARY, using a crude approximation: + * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. + * IN assertion: the fields freq of dyn_ltree are set and the total of all + * frequencies does not exceed 64K (to fit in an int on 16 bit machines). + */ +local void set_data_type(s) + deflate_state *s; +{ + int n = 0; + unsigned ascii_freq = 0; + unsigned bin_freq = 0; + while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; + while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; + while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; + s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} + +/* =========================================================================== + * Copy a stored block, storing first the length and its + * one's complement if requested. + */ +local void copy_block(s, buf, len, header) + deflate_state *s; + charf *buf; /* the input data */ + unsigned len; /* its length */ + int header; /* true if block header must be written */ +{ + bi_windup(s); /* align on byte boundary */ + s->last_eob_len = 8; /* enough lookahead for inflate */ + + if (header) { + put_short(s, (ush)len); + put_short(s, (ush)~len); +#ifdef DEBUG + s->bits_sent += 2*16; +#endif + } +#ifdef DEBUG + s->bits_sent += (ulg)len<<3; +#endif + while (len--) { + put_byte(s, *buf++); + } +} diff --git a/src/utilfuns/zlib/uncompr.c b/src/utilfuns/zlib/uncompr.c new file mode 100644 index 0000000..0c1051a --- /dev/null +++ b/src/utilfuns/zlib/uncompr.c @@ -0,0 +1,58 @@ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: uncompr.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zlib.h" + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ +int ZEXPORT uncompress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; + + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + err = inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + inflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = inflateEnd(&stream); + return err; +} diff --git a/src/utilfuns/zlib/zutil.c b/src/utilfuns/zlib/zutil.c new file mode 100644 index 0000000..2eb7b46 --- /dev/null +++ b/src/utilfuns/zlib/zutil.c @@ -0,0 +1,225 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id: zutil.c,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#include "zutil.h" + +struct internal_state {int dummy;}; /* for buggy compilers */ + +#ifndef STDC +extern void exit OF((int)); +#endif + +const char *z_errmsg[10] = { +"need dictionary", /* Z_NEED_DICT 2 */ +"stream end", /* Z_STREAM_END 1 */ +"", /* Z_OK 0 */ +"file error", /* Z_ERRNO (-1) */ +"stream error", /* Z_STREAM_ERROR (-2) */ +"data error", /* Z_DATA_ERROR (-3) */ +"insufficient memory", /* Z_MEM_ERROR (-4) */ +"buffer error", /* Z_BUF_ERROR (-5) */ +"incompatible version",/* Z_VERSION_ERROR (-6) */ +""}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +#ifdef DEBUG + +# ifndef verbose +# define verbose 0 +# endif +int z_verbose = verbose; + +void z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + + +#ifndef HAVE_MEMCPY + +void zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifdef __TURBOC__ +#if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__) +/* Small and medium model in Turbo C are for now limited to near allocation + * with reduced MAX_WBITS and MAX_MEM_LEVEL + */ +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf = opaque; /* just to make some compilers happy */ + ulg bsize = (ulg)items*size; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + int n; + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + ptr = opaque; /* just to make some compilers happy */ + Assert(0, "zcfree: ptr not found"); +} +#endif +#endif /* __TURBOC__ */ + + +#if defined(M_I86) && !defined(__32BIT__) +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + return _halloc((long)items, size); +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + _hfree(ptr); +} + +#endif /* MSC */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + if (opaque) items += size - size; /* make compiler happy */ + return (voidpf)calloc(items, size); +} + +void zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + free(ptr); + if (opaque) return; /* make compiler happy */ +} + +#endif /* MY_ZCALLOC */ diff --git a/src/utilfuns/zlib/zutil.h b/src/utilfuns/zlib/zutil.h new file mode 100644 index 0000000..53278ba --- /dev/null +++ b/src/utilfuns/zlib/zutil.h @@ -0,0 +1,220 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id: zutil.h,v 1.1 2001/03/23 09:00:15 scribe Exp $ */ + +#ifndef _Z_UTIL_H +#define _Z_UTIL_H + +#include "zlib.h" + +#ifdef STDC +# include <stddef.h> +# include <string.h> +# include <stdlib.h> +#endif +#ifdef NO_ERRNO_H + extern int errno; +#else +# include <errno.h> +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern const char *z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = (char*)ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#ifdef MSDOS +# define OS_CODE 0x00 +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include <alloc.h> +# endif +# else /* MSC or DJGPP */ +# include <malloc.h> +# endif +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +#endif + +#ifdef WIN32 /* Window 95 & Windows NT */ +# define OS_CODE 0x0b +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include <unix.h> /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0F +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) +# define fdopen(fd,type) _fdopen(fd,type) +#endif + + + /* Common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#ifdef HAVE_STRERROR + extern char *strerror OF((int)); +# define zstrerror(errnum) strerror(errnum) +#else +# define zstrerror(errnum) "" +#endif + +#if defined(pyr) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + extern void zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef DEBUG +# include <stdio.h> + extern int z_verbose; + extern void z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +typedef uLong (ZEXPORT *check_func) OF((uLong check, const Bytef *buf, + uInt len)); +voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); +void zcfree OF((voidpf opaque, voidpf ptr)); + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +#endif /* _Z_UTIL_H */ |