diff options
author | danglassey <danglassey> | 2002-08-14 09:57:17 +0000 |
---|---|---|
committer | danglassey <danglassey> | 2002-08-14 09:57:17 +0000 |
commit | daa67ff1f728c07f2a116ee9a9f4505479ca6808 (patch) | |
tree | c224a537d30480002ae0560cc9104b543b4d1b5e /src/modules/common | |
parent | 6d6973e035aac5ec1676efccd5b8ada70c40b639 (diff) | |
download | sword-sf-cvs-import-1.1.1.tar.gz |
Initial import from crosswire CVS for syncingimport-1.1.1
Diffstat (limited to 'src/modules/common')
-rw-r--r-- | src/modules/common/entriesblk.cpp | 166 | ||||
-rw-r--r-- | src/modules/common/lzsscomprs.cpp | 665 | ||||
-rw-r--r-- | src/modules/common/rawstr.cpp | 551 | ||||
-rw-r--r-- | src/modules/common/rawstr4.cpp | 555 | ||||
-rw-r--r-- | src/modules/common/rawverse.cpp | 348 | ||||
-rw-r--r-- | src/modules/common/sapphire.cpp | 228 | ||||
-rw-r--r-- | src/modules/common/swcipher.cpp | 123 | ||||
-rw-r--r-- | src/modules/common/swcomprs.cpp | 190 | ||||
-rw-r--r-- | src/modules/common/zipcomprs.cpp | 158 | ||||
-rw-r--r-- | src/modules/common/zstr.cpp | 705 | ||||
-rw-r--r-- | src/modules/common/zverse.cpp | 518 |
11 files changed, 4207 insertions, 0 deletions
diff --git a/src/modules/common/entriesblk.cpp b/src/modules/common/entriesblk.cpp new file mode 100644 index 0000000..d38cf53 --- /dev/null +++ b/src/modules/common/entriesblk.cpp @@ -0,0 +1,166 @@ +#include <entriesblk.h> +#include <stdlib.h> +#include <string.h> + +const int EntriesBlock::METAHEADERSIZE = 4; + // count(4); +const int EntriesBlock::METAENTRYSIZE = 8; + // offset(4); size(4); + +EntriesBlock::EntriesBlock(const char *iBlock, unsigned long size) { + block = (char *)calloc(1, size); + memcpy(block, iBlock, size); +} + + +EntriesBlock::EntriesBlock() { + block = (char *)calloc(1, sizeof(__u32)); +} + + +EntriesBlock::~EntriesBlock() { + free(block); +} + + +void EntriesBlock::setCount(int count) { + __u32 rawCount = archtosword32(count); + memcpy(block, &rawCount, sizeof(__u32)); +} + + +int EntriesBlock::getCount() { + __u32 count = 0; + memcpy(&count, block, sizeof(__u32)); + count = swordtoarch32(count); + return count; +} + + +void EntriesBlock::getMetaEntry(int index, unsigned long *offset, unsigned long *size) { + __u32 rawOffset = 0; + __u32 rawSize = 0; + *offset = 0; + *size = 0; + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(&rawOffset, block + METAHEADERSIZE + (index * METAENTRYSIZE), sizeof(rawOffset)); + memcpy(&rawSize, block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), sizeof(rawSize)); + + *offset = (unsigned long)swordtoarch32(rawOffset); + *size = (unsigned long)swordtoarch32(rawSize); +} + + +void EntriesBlock::setMetaEntry(int index, unsigned long offset, unsigned long size) { + __u32 rawOffset = archtosword32(offset); + __u32 rawSize = archtosword32(size); + + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE), &rawOffset, sizeof(rawOffset)); + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), &rawSize, sizeof(rawSize)); +} + + +const char *EntriesBlock::getRawData(unsigned long *retSize) { + unsigned long max = 4; + int loop; + unsigned long offset; + unsigned long size; + for (loop = 0; loop < getCount(); loop++) { + getMetaEntry(loop, &offset, &size); + max = ((offset + size) > max) ? (offset + size) : max; + } + *retSize = max; + return block; +} + + +int EntriesBlock::addEntry(const char *entry) { + unsigned long dataSize; + getRawData(&dataSize); + unsigned long len = strlen(entry); + unsigned long offset; + unsigned long size; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + // new meta entry + new data size + 1 because null + block = (char *)realloc(block, dataSize + METAENTRYSIZE + len + 1); + // shift right to make room for new meta entry + memmove(block + dataStart + METAENTRYSIZE, block + dataStart, dataSize - dataStart); + + for (int loop = 0; loop < count; loop++) { + getMetaEntry(loop, &offset, &size); + if (offset) { // if not a deleted entry + offset += METAENTRYSIZE; + setMetaEntry(loop, offset, size); + } + } + + offset = dataSize; // original dataSize before realloc + size = len + 1; + // add our text to the end + memcpy(block + offset + METAENTRYSIZE, entry, size); + // increment count + setCount(count + 1); + // add our meta entry + setMetaEntry(count, offset + METAENTRYSIZE, size); + // return index of our new entry + return count; +} + + +const char *EntriesBlock::getEntry(int entryIndex) { + unsigned long offset; + unsigned long size; + static char *empty = ""; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? block+offset : empty; +} + + +unsigned long EntriesBlock::getEntrySize(int entryIndex) { + unsigned long offset; + unsigned long size; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? size : 0; +} + + +void EntriesBlock::removeEntry(int entryIndex) { + unsigned long offset; + unsigned long size, size2; + unsigned long dataSize; + getRawData(&dataSize); + getMetaEntry(entryIndex, &offset, &size); + unsigned long len = size - 1; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + + if (!offset) // already deleted + return; + + // shift left to retrieve space used for old entry + memmove(block + offset, block + offset + size, dataSize - (offset + size)); + + // fix offset for all entries after our entry that were shifted left + for (int loop = entryIndex + 1; loop < count; loop++) { + getMetaEntry(loop, &offset, &size2); + if (offset) { // if not a deleted entry + offset -= size; + setMetaEntry(loop, offset, size2); + } + } + + // zero out our meta entry + setMetaEntry(entryIndex, 0L, 0); +} + + diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp new file mode 100644 index 0000000..3606fbc --- /dev/null +++ b/src/modules/common/lzsscomprs.cpp @@ -0,0 +1,665 @@ +/****************************************************************************** + * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that + * provides LZSS compression + */ + +#include <string.h> +#include <stdlib.h> +#include <lzsscomprs.h> + + +/****************************************************************************** + * LZSSCompress Statics + */ + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char LZSSCompress::m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int LZSSCompress::m_match_position; +short int LZSSCompress::m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int LZSSCompress::m_lson[N + 1]; +short int LZSSCompress::m_rson[N + 257]; +short int LZSSCompress::m_dad[N + 1]; + + +/****************************************************************************** + * LZSSCompress Constructor - Initializes data for instance of LZSSCompress + * + */ + +LZSSCompress::LZSSCompress() : SWCompress() { +} + + +/****************************************************************************** + * LZSSCompress Destructor - Cleans up instance of LZSSCompress + */ + +LZSSCompress::~LZSSCompress() { +} + + +/****************************************************************************** + * LZSSCompress::InitTree - This function initializes the tree nodes to + * "empty" states. + */ + +void LZSSCompress::InitTree(void) { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) { + m_rson[i] = NOT_USED; + } +} + + +/****************************************************************************** + * LZSSCompress::InsertNode - This function inserts a string from the ring + * buffer into one of the trees. It loads the + * match position and length member variables + * for the longest match. + * + * The string to be inserted is identified by + * the parameter Pos, A full F bytes are + * inserted. So, + * m_ring_buffer[Pos ... Pos+F-1] + * are inserted. + * + * If the matched length is exactly F, then an + * old node is removed in favor of the new one + * (because the old one will be deleted + * sooner). + * + * Note that Pos plays a dual role. It is + * used as both a position in the ring buffer + * and also as a tree node. + * m_ring_buffer[Pos] defines a character that + * is used to identify a tree node. + * + * ENT: pos - position in the buffer + */ + +void LZSSCompress::InsertNode(short int Pos) +{ + short int i; + short int p; + int cmp; + unsigned char * key; + +/* + ASSERT(Pos >= 0); + ASSERT(Pos < N); +*/ + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) { + if (cmp >= 0) { + if (m_rson[p] != NOT_USED) { + p = m_rson[p]; + } + else { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else { + if (m_lson[p] != NOT_USED) { + p = m_lson[p]; + } + else { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) { + m_rson[ m_dad[p] ] = Pos; + } + else { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::DeleteNode - This function removes the node "Node" from the + * tree. + * + * ENT: node - node to be removed + */ + +void LZSSCompress::DeleteNode(short int Node) +{ + short int q; + +/* + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); +*/ + + if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) { + q = m_rson[Node]; + } + else { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) { + do { + q = m_rson[q]; + } while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) { + m_rson[ m_dad[Node] ] = q; + } + else { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void LZSSCompress::Encode(void) +{ + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a <position,length> pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight <position,length> pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars((char *) &(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (!mask) { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars((char *) code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) { + // Get next character... + + if (GetChars((char *) &c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars((char *) code_buf, code_buf_pos); + } + + + // must set zlen for parent class to know length of compressed buffer + zlen = zpos; +} + + +/****************************************************************************** + * LZSSCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void LZSSCompress::Decode(void) +{ + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + unsigned long totalLen = 0; + + direct = 1; // set direction needed by parent [Get|Send]Chars() + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else { + // Next byte must be a flag. + + if (GetChars((char *) &flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) { + if (GetChars((char *) c, 1) != 1) + break; + + if (SendChars((char *) c, 1) != 1) { + totalLen++; + break; + } + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // <position,length> pair. The position is in 12 bits and + // the length is in 4 bits. + + else { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars((char *) c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" :characters to the output stream. + + if (SendChars((char *) c, len) != (unsigned int)len) { + totalLen += len; + break; + } + } + } + slen = totalLen; +} diff --git a/src/modules/common/rawstr.cpp b/src/modules/common/rawstr.cpp new file mode 100644 index 0000000..c7363d9 --- /dev/null +++ b/src/modules/common/rawstr.cpp @@ -0,0 +1,551 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <rawstr.h> +#include <sysdata.h> +/****************************************************************************** + * RawStr Statics + */ + +int RawStr::instance = 0; +char RawStr::nl = '\n'; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr::RawStr(const char *ipath, int fileMode) +{ + char buf[127]; + + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr::~RawStr() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbufdat(long ioffset, char **buf) +{ + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + toupperstr_utf8(*buf); + } +} + + +/****************************************************************************** + * RawStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr::findoffset(const char *ikey, long *start, unsigned short *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 6; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + toupperstr_utf8(key); + + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-6:6; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 6) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + while (away) { + long laststart = *start; + unsigned short lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 6 : -6; + + bool bad = false; + if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr::preptext(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr::readtext(long istart, unsigned short *isize, char **idxbuf, char **buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + if (*buf) + delete [] *buf; + *buf = new char [ ++(*isize) * FILTERPAD ]; + *idxbuf = new char [ (*isize) * FILTERPAD ]; + + memset(*buf, 0, *isize); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)((*isize) - 1)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, *isize - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(*buf + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr::settext(const char *ikey, const char *buf, long len) +{ + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned short size; + unsigned short outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + toupperstr_utf8(key); + + len = (len < 0) ? strlen(buf) : len; + + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != -2) // not a new file + idxoff += 6; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 6, &start, &size, 0, &idxoff); + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + (len); + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len > 0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (int)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 2); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+6, shiftSize-6); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + settext(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawstr4.cpp b/src/modules/common/rawstr4.cpp new file mode 100644 index 0000000..da0789b --- /dev/null +++ b/src/modules/common/rawstr4.cpp @@ -0,0 +1,555 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <rawstr4.h> +#include <sysdata.h> + +/****************************************************************************** + * RawStr Statics + */ + +int RawStr4::instance = 0; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr4::RawStr4(const char *ipath, int fileMode) +{ + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr4::~RawStr4() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr4::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbufdat(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr4::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) { + *targetbuf = *trybuf; + } + *targetbuf = 0; + trybuf = 0; + toupperstr_utf8(targetbuf); + } +} + + +/****************************************************************************** + * RawStr4::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status -1 general error; -2 new file + */ + +signed char RawStr4::findoffset(const char *ikey, long *start, unsigned long *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = -1; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 8; + retval = (tailoff >= 0) ? 0 : -2; // if NOT new file + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + toupperstr_utf8(key); + + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf) { // In case of extra entry at end of idx + tryoff += (tryoff > (maxoff / 2))?-8:8; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 8) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + while (away) { + long laststart = *start; + unsigned long lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 8 : -8; + + bool bad = false; + if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr4::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr4::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr4::readtext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr4::readtext(long istart, unsigned long *isize, char **idxbuf, char **buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + + do { + if (*idxbuf) + delete [] *idxbuf; + if (*buf) + delete [] *buf; + *buf = new char [ ++(*isize) * FILTERPAD ]; + *idxbuf = new char [ (*isize) * FILTERPAD ]; + + memset(*buf, 0, *isize); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)((*isize) - 1)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, *isize - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(*buf + 6, &start, isize); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr4::setText(const char *ikey, const char *buf, long len) { + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned long size; + unsigned long outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + char errorStatus = findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + toupperstr_utf8(key); + + len = (len < 0) ? strlen(buf) : len; + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + if (errorStatus != -2) // not a new file + idxoff += 8; + else idxoff = 0; + } + else if ((!strcmp(key, dbKey)) && (len>0/*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len > 0)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 8, &start, &size, 0, &idxoff); + ++size; + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy(outbuf + size, buf, len); + size = outsize = size + len; + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len>0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (long)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 4); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+8, shiftSize-8); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr4::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr4::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawverse.cpp b/src/modules/common/rawverse.cpp new file mode 100644 index 0000000..f77fbe5 --- /dev/null +++ b/src/modules/common/rawverse.cpp @@ -0,0 +1,348 @@ +/****************************************************************************** + * rawverse.cpp - code for class 'RawVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#include <sys/stat.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <versekey.h> +#include <sysdata.h> + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + +/****************************************************************************** + * RawVerse Statics + */ + +int RawVerse::instance = 0; +const char *RawVerse::nl = "\r\n"; + + +/****************************************************************************** + * RawVerse Constructor - Initializes data for instance of RawVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawVerse::RawVerse(const char *ipath, int fileMode) +{ + char *buf; + + path = 0; + stdstr(&path, ipath); + buf = new char [ strlen(path) + 80 ]; + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.vss", path); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.vss", path); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot", path); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt", path); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + instance++; +} + + +/****************************************************************************** + * RawVerse Destructor - Cleans up instance of RawVerse + */ + +RawVerse::~RawVerse() +{ + int loop1; + + if (path) + delete [] path; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + } +} + + +/****************************************************************************** + * RawVerse::findoffset - Finds the offset of the key verse from the indexes + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void RawVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) { + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + if (idxfp[testmt-1]->getFd() >= 0) { + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), start, 4); + long len = read(idxfp[testmt-1]->getFd(), size, 2); // read size + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (len < 2) { + *size = (unsigned short)((*start) ? (lseek(textfp[testmt-1]->getFd(), 0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file + } + } + else { + *start = 0; + *size = 0; + } +} + + +/****************************************************************************** + * RawVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = 10; +// *to++ = nl[1]; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; +// *to++ = nl[0]; + *to++ = 10; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawVerse::readtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 2 (null)(null) + * buf - buffer to store text + * + */ + +void RawVerse::readtext(char testmt, long start, unsigned short size, char *buf) { + memset(buf, 0, size+1); + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + if (size) { + if (textfp[testmt-1]->getFd() >= 0) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + read(textfp[testmt-1]->getFd(), buf, (int)size - 2); + } + } +} + + +/****************************************************************************** + * RawVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawVerse::settext(char testmt, long idxoff, const char *buf, long len) +{ + long start, outstart; + unsigned short size; + unsigned short outsize; + + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + size = outsize = (len < 0) ? strlen(buf) : len; + + start = outstart = lseek(textfp[testmt-1]->getFd(), 0, SEEK_END); + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + + if (size) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + write(textfp[testmt-1]->getFd(), buf, (int)size); + + // add a new line to make data file easier to read in an editor + write(textfp[testmt-1]->getFd(), nl, 2); + } + else { + start = 0; + } + + outstart = archtosword32(start); + outsize = archtosword16(size); + + write(idxfp[testmt-1]->getFd(), &outstart, 4); + write(idxfp[testmt-1]->getFd(), &outsize, 2); + + +} + + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long start; + unsigned short size; + + destidxoff *= 6; + srcidxoff *= 6; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(idxfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), &start, 4); + read(idxfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(idxfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(idxfp[testmt-1]->getFd(), &start, 4); + write(idxfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char RawVerse::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.vss", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.vss", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} diff --git a/src/modules/common/sapphire.cpp b/src/modules/common/sapphire.cpp new file mode 100644 index 0000000..686bccb --- /dev/null +++ b/src/modules/common/sapphire.cpp @@ -0,0 +1,228 @@ +/* sapphire.cpp -- the Saphire II stream cipher class. + Dedicated to the Public Domain the author and inventor: + (Michael Paul Johnson). This code comes with no warranty. + Use it at your own risk. + Ported from the Pascal implementation of the Sapphire Stream + Cipher 9 December 1994. + Added hash pre- and post-processing 27 December 1994. + Modified initialization to make index variables key dependent, + made the output function more resistant to cryptanalysis, + and renamed to Sapphire II 2 January 1995 +*/ + + +#ifdef WIN32 +#include <memory.h> +#endif + +#ifdef UNIX +#include <memory.h> +#include <unistd.h> +#else +#ifndef _MSC_VER +#include <mem.h> +#endif +#endif + +#ifdef _WIN32_WCE +#include <string.h> +#endif + +#include "sapphire.h" + +unsigned char sapphire::keyrand(int limit, + unsigned char *user_key, + unsigned char keysize, + unsigned char *rsum, + unsigned *keypos) + { + unsigned u, // Value from 0 to limit to return. + retry_limiter, // No infinite loops allowed. + mask; // Select just enough bits. + + if (!limit) return 0; // Avoid divide by zero error. + retry_limiter = 0; + mask = 1; // Fill mask with enough bits to cover + while (mask < (unsigned)limit) // the desired range. + mask = (mask << 1) + 1; + do + { + *rsum = cards[*rsum] + user_key[(*keypos)++]; + if (*keypos >= keysize) + { + *keypos = 0; // Recycle the user key. + *rsum += keysize; // key "aaaa" != key "aaaaaaaa" + } + u = mask & *rsum; + if (++retry_limiter > 11) + u %= limit; // Prevent very rare long loops. + } + while (u > (unsigned)limit); + return u; + } + +void sapphire::initialize(unsigned char *key, unsigned char keysize) + { + // Key size may be up to 256 bytes. + // Pass phrases may be used directly, with longer length + // compensating for the low entropy expected in such keys. + // Alternatively, shorter keys hashed from a pass phrase or + // generated randomly may be used. For random keys, lengths + // of from 4 to 16 bytes are recommended, depending on how + // secure you want this to be. + + int i; + unsigned char toswap, swaptemp, rsum; + unsigned keypos; + + // If we have been given no key, assume the default hash setup. + + if (keysize < 1) + { + hash_init(); + return; + } + + // Start with cards all in order, one of each. + + for (i=0;i<256;i++) + cards[i] = i; + + // Swap the card at each position with some other card. + + toswap = 0; + keypos = 0; // Start with first byte of user key. + rsum = 0; + for (i=255;i>=0;i--) + { + toswap = keyrand(i, key, keysize, &rsum, &keypos); + swaptemp = cards[i]; + cards[i] = cards[toswap]; + cards[toswap] = swaptemp; + } + + // Initialize the indices and data dependencies. + // Indices are set to different values instead of all 0 + // to reduce what is known about the state of the cards + // when the first byte is emitted. + + rotor = cards[1]; + ratchet = cards[3]; + avalanche = cards[5]; + last_plain = cards[7]; + last_cipher = cards[rsum]; + + toswap = swaptemp = rsum = 0; + keypos = 0; + } + +void sapphire::hash_init(void) + { + // This function is used to initialize non-keyed hash + // computation. + + int i, j; + + // Initialize the indices and data dependencies. + + rotor = 1; + ratchet = 3; + avalanche = 5; + last_plain = 7; + last_cipher = 11; + + // Start with cards all in inverse order. + + for (i=0, j=255;i<256;i++,j--) + cards[i] = (unsigned char) j; + } + +sapphire::sapphire(unsigned char *key, unsigned char keysize) + { + if (key && keysize) + initialize(key, keysize); + } + +void sapphire::burn(void) + { + // Destroy the key and state information in RAM. + memset(cards, 0, 256); + rotor = ratchet = avalanche = last_plain = last_cipher = 0; + } + +sapphire::~sapphire() + { + burn(); + } + +unsigned char sapphire::encrypt(unsigned char b) + { +#ifdef USBINARY + // Picture a single enigma rotor with 256 positions, rewired + // on the fly by card-shuffling. + + // This cipher is a variant of one invented and written + // by Michael Paul Johnson in November, 1993. + + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_cipher = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_plain = b; + return last_cipher; +#else + return b; +#endif + } + +unsigned char sapphire::decrypt(unsigned char b) + { + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_plain = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_cipher = b; + return last_plain; + } + +void sapphire::hash_final(unsigned char *hash, // Destination + unsigned char hashlength) // Size of hash. + { + int i; + + for (i=255;i>=0;i--) + encrypt((unsigned char) i); + for (i=0;i<hashlength;i++) + hash[i] = encrypt(0); + } + diff --git a/src/modules/common/swcipher.cpp b/src/modules/common/swcipher.cpp new file mode 100644 index 0000000..d221b8b --- /dev/null +++ b/src/modules/common/swcipher.cpp @@ -0,0 +1,123 @@ +/****************************************************************************** + * swcipher.cpp - code for class 'SWCipher'- a driver class that provides + * cipher utilities. + */ + +#include <string.h> +#include <stdlib.h> +#include <swcipher.h> + + +/****************************************************************************** + * SWCipher Constructor - Initializes data for instance of SWCipher + * + */ + +SWCipher::SWCipher(unsigned char *key) { + master.initialize(key, strlen((char *)key)); + buf = 0; +} + + +/****************************************************************************** + * SWCipher Destructor - Cleans up instance of SWCipher + */ + +SWCipher::~SWCipher() +{ + if (buf) + free(buf); +} + + +char *SWCipher::Buf(const char *ibuf, unsigned int ilen) +{ + if (ibuf) { + + if (buf) + free(buf); + + if (!ilen) { + len = strlen(buf); + ilen = len + 1; + } + else len = ilen; + + buf = (char *) malloc(ilen); + memcpy(buf, ibuf, ilen); + cipher = false; + } + + Decode(); + + return buf; +} + + +char *SWCipher::cipherBuf(unsigned int *ilen, const char *ibuf) +{ + if (ibuf) { + + if (buf) + free(buf); + + buf = (char *) malloc(*ilen); + memcpy(buf, ibuf, *ilen); + len = *ilen; + cipher = true; + } + + Encode(); + + *ilen = (short)len; + return buf; +} + + +/****************************************************************************** + * SWCipher::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Encode(void) +{ + if (!cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.encrypt(buf[i]); + cipher = true; + } +} + + +/****************************************************************************** + * SWCipher::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Decode(void) +{ + if (cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.decrypt(buf[i]); + cipher = false; + } +} + + +/****************************************************************************** + * SWCipher::setCipherKey - setter for a new CipherKey + * + */ + +void SWCipher::setCipherKey(const char *ikey) { + unsigned char *key = (unsigned char *)ikey; + master.initialize(key, strlen((char *)key)); +} diff --git a/src/modules/common/swcomprs.cpp b/src/modules/common/swcomprs.cpp new file mode 100644 index 0000000..4bd2e5e --- /dev/null +++ b/src/modules/common/swcomprs.cpp @@ -0,0 +1,190 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'SWCompress'- a driver class that provides + * compression utilities. + */ + +#include <string.h> +#include <stdlib.h> +#include <swcomprs.h> + + +/****************************************************************************** + * SWCompress Constructor - Initializes data for instance of SWCompress + * + */ + +SWCompress::SWCompress() +{ + buf = zbuf = 0; + Init(); +} + + +/****************************************************************************** + * SWCompress Destructor - Cleans up instance of SWCompress + */ + +SWCompress::~SWCompress() +{ + if (zbuf) + free(zbuf); + + if (buf) + free(buf); +} + + +void SWCompress::Init() +{ + if (buf) + free(buf); + + if (zbuf) + free(zbuf); + + buf = 0; + zbuf = 0; + direct = 0; + zlen = 0; + slen = 0; + zpos = 0; + pos = 0; +} + + +char *SWCompress::Buf(const char *ibuf, unsigned long *len) { + // setting an uncompressed buffer + if (ibuf) { + Init(); + slen = (len) ? *len : strlen(ibuf); + buf = (char *) calloc(slen + 1, 1); + memcpy(buf, ibuf, slen); + } + + // getting an uncompressed buffer + if (!buf) { + buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return; + direct = 1; + Decode(); +// slen = strlen(buf); + if (len) + *len = slen; + } + return buf; +} + + +char *SWCompress::zBuf(unsigned long *len, char *ibuf) +{ + // setting a compressed buffer + if (ibuf) { + Init(); + zbuf = (char *) malloc(*len); + memcpy(zbuf, ibuf, *len); + zlen = *len; + } + + // getting a compressed buffer + if (!zbuf) { + direct = 0; + Encode(); + } + + *len = zlen; + return zbuf; +} + + +unsigned long SWCompress::GetChars(char *ibuf, unsigned long len) +{ + if (direct) { + len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos); + if (len > 0) { + memmove(ibuf, &zbuf[zpos], len); + zpos += len; + } + } + else { +// slen = strlen(buf); + len = (((slen - pos) > (unsigned)len) ? len : slen - pos); + if (len > 0) { + memmove(ibuf, &buf[pos], len); + pos += len; + } + } + return len; +} + + +unsigned long SWCompress::SendChars(char *ibuf, unsigned long len) +{ + if (direct) { + if (buf) { +// slen = strlen(buf); + if ((pos + len) > (unsigned)slen) { + buf = (char *) realloc(buf, pos + len + 1024); + memset(&buf[pos], 0, len + 1024); + } + } + else buf = (char *)calloc(1, len + 1024); + memmove(&buf[pos], ibuf, len); + pos += len; + } + else { + if (zbuf) { + if ((zpos + len) > zlen) { + zbuf = (char *) realloc(zbuf, zpos + len + 1024); + zlen = zpos + len + 1024; + } + } + else { + zbuf = (char *)calloc(1, len + 1024); + zlen = len + 1024; + } + memmove(&zbuf[zpos], ibuf, len); + zpos += len; + } + return len; +} + + +/****************************************************************************** + * SWCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Encode(void) +{ + cycleStream(); +} + + +/****************************************************************************** + * SWCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Decode(void) +{ + cycleStream(); +} + + +void SWCompress::cycleStream() { + char buf[1024]; + unsigned long len, totlen = 0; + + do { + len = GetChars(buf, 1024); + if (len) + totlen += SendChars(buf, len); + } while (len == 1024); + + zlen = slen = totlen; +} diff --git a/src/modules/common/zipcomprs.cpp b/src/modules/common/zipcomprs.cpp new file mode 100644 index 0000000..01ba430 --- /dev/null +++ b/src/modules/common/zipcomprs.cpp @@ -0,0 +1,158 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'ZipCompress'- a driver class that provides + * compression utilities. - using zlib + */ + +#include <string.h> +#include <string> +#include <stdlib.h> +#include <stdio.h> +#include <zipcomprs.h> +#include <zlib.h> + +/****************************************************************************** + * ZipCompress Constructor - Initializes data for instance of ZipCompress + * + */ + +ZipCompress::ZipCompress() : SWCompress() +{ +// fprintf(stderr, "init compress\n"); +} + + +/****************************************************************************** + * ZipCompress Destructor - Cleans up instance of ZipCompress + */ + +ZipCompress::~ZipCompress() { +} + + +/****************************************************************************** + * ZipCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void ZipCompress::Encode(void) +{ +/* +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // get buffer + char chunk[1024]; + char *buf = (char *)calloc(1, 1024); + char *chunkbuf = buf; + unsigned long chunklen; + unsigned long len = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + len += chunklen; + if (chunklen < 1023) + break; + else buf = (char *)realloc(buf, len + 1024); + chunkbuf = buf+len; + } + + + zlen = (long) (len*1.001)+15; + char *zbuf = new char[zlen+1]; + if (len) + { + //printf("Doing compress\n"); + if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len)!=Z_OK) + { + printf("ERROR in compression\n"); + } + else { + SendChars(zbuf, zlen); + } + } + else + { + fprintf(stderr, "No buffer to compress\n"); + } + delete [] zbuf; + free (buf); +} + + +/****************************************************************************** + * ZipCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void ZipCompress::Decode(void) +{ +/* +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ + + // get buffer + char chunk[1024]; + char *zbuf = (char *)calloc(1, 1024); + char *chunkbuf = zbuf; + int chunklen; + unsigned long zlen = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + zlen += chunklen; + if (chunklen < 1023) + break; + else zbuf = (char *)realloc(zbuf, zlen + 1024); + chunkbuf = zbuf + zlen; + } + + //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen); + if (zlen) { + unsigned long blen = zlen*20; // trust compression is less than 1000% + char *buf = new char[blen]; + //printf("Doing decompress {%s}\n", zbuf); + if (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen) != Z_OK) { + fprintf(stderr, "no room in outbuffer to during decompression. see zipcomp.cpp\n"); + } + SendChars(buf, blen); + delete [] buf; + slen = blen; + } + else { + fprintf(stderr, "No buffer to decompress!\n"); + } + //printf("Finished decoding\n"); + free (zbuf); +} diff --git a/src/modules/common/zstr.cpp b/src/modules/common/zstr.cpp new file mode 100644 index 0000000..cd1add5 --- /dev/null +++ b/src/modules/common/zstr.cpp @@ -0,0 +1,705 @@ +/****************************************************************************** + * zstr.cpp - code for class 'zStr'- a module that reads compressed text + * files and provides lookup and parsing functions based on + * class StrKey + */ + +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <stdlib.h> +#include <utilfuns.h> +#include <zstr.h> +#include <swcomprs.h> + +#include <sysdata.h> +#include <entriesblk.h> + +/****************************************************************************** + * zStr Statics + */ + +int zStr::instance = 0; +const int zStr::IDXENTRYSIZE = 8; +const int zStr::ZDXENTRYSIZE = 8; + + +/****************************************************************************** + * zStr Constructor - Initializes data for instance of zStr + * + * ENT: ipath - path of the directory where data and index files are located. + */ + +zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp) { + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + + compressor = (icomp) ? icomp : new SWCompress(); + this->blockCount = blockCount; +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdx", path); + zdxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdt", path); + zdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd <= 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + cacheBlock = 0; + cacheBlockIndex = -1; + cacheDirty = false; + + instance++; +} + + +/****************************************************************************** + * zStr Destructor - Cleans up instance of zStr + */ + +zStr::~zStr() { + + flushCache(); + + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); + FileMgr::systemFileMgr.close(zdxfd); + FileMgr::systemFileMgr.close(zdtfd); + + + if (compressor) + delete compressor; + +} + + +/****************************************************************************** + * zStr::getidxbufdat - Gets the index string at the given dat offset + * NOTE: buf is calloc'd, or if not null, realloc'd and must + * be free'd by calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromDatOffset(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + toupperstr_utf8(*buf); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * zStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is calloc'd, or if not null, realloc'd + * and must be freed by calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromIdxOffset(long ioffset, char **buf) { + __u32 offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, sizeof(__u32)); + offset = swordtoarch32(offset); + getKeyFromDatOffset(offset, buf); + } +} + + +/****************************************************************************** + * zStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * offset - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status + */ + +signed char zStr::findKeyIndex(const char *ikey, long *idxoff, long away) { + char *trybuf = 0, *key = 0, quitflag = 0; + signed char retval = 0; + __s32 headoff, tailoff, tryoff = 0, maxoff = 0; + __u32 start, size; + + if (idxfd->getFd() >= 0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - IDXENTRYSIZE; + if (*ikey) { + headoff = 0; + stdstr(&key, ikey); + toupperstr_utf8(key); + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + (((((tailoff / IDXENTRYSIZE) - (headoff / IDXENTRYSIZE))) / 2) * IDXENTRYSIZE) : lastoff; + lastoff = -1; + + getKeyFromIdxOffset(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-IDXENTRYSIZE:IDXENTRYSIZE; + retval = -1; + break; + } + + int diff = strcmp(key, trybuf); + if (!diff) + break; + + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + IDXENTRYSIZE) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else { tryoff = 0; } + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + start = size = 0; + retval = (read(idxfd->getFd(), &start, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + retval = (read(idxfd->getFd(), &size, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + while (away) { + __u32 laststart = start; + __u32 lastsize = size; + __s32 lasttry = tryoff; + tryoff += (away > 0) ? IDXENTRYSIZE : -IDXENTRYSIZE; + + bool bad = false; + if (((long)(tryoff + (away*IDXENTRYSIZE)) < -IDXENTRYSIZE) || (tryoff + (away*IDXENTRYSIZE) > (maxoff+IDXENTRYSIZE))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + start = laststart; + size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + + if (((laststart != start) || (lastsize != size)) && (start >= 0) && (size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * zStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zStr::prepText(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * zStr::getText - gets text at a given offset + * + * ENT: + * offset - idxoffset where the key is located. + * buf - buffer to store text + * idxbuf - buffer to store index key + * NOTE: buffer will be alloc'd / realloc'd and + * should be free'd by the client + * + */ + +void zStr::getText(long offset, char **idxbuf, char **buf) { + char *ch; + char *idxbuflocal = 0; + getKeyFromIdxOffset(offset, &idxbuflocal); + __u32 start; + __u32 size; + + do { + lseek(idxfd->getFd(), offset, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + *idxbuf = (*idxbuf) ? (char *)realloc(*idxbuf, size*2 + 1) : (char *)malloc(size*2 + 1); + memset(*buf, 0, size + 1); + memset(*idxbuf, 0, size + 1); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)(size)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, size - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(*buf + 6, &offset); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + __u32 localsize = strlen(idxbuflocal); + localsize = (localsize < (size - 1)) ? localsize : (size - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } + __u32 block = 0; + __u32 entry = 0; + memmove(&block, *buf, sizeof(__u32)); + memmove(&entry, *buf + sizeof(__u32), sizeof(__u32)); + block = swordtoarch32(block); + entry = swordtoarch32(entry); + getCompressedText(block, entry, buf); +} + + +/****************************************************************************** + * zStr::getCompressedText - Get text entry from a compressed index / zdata + * file. + */ + +void zStr::getCompressedText(long block, long entry, char **buf) { + + __u32 size = 0; + + if (cacheBlockIndex != block) { + __u32 start = 0; + + lseek(zdxfd->getFd(), block * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + + lseek(zdtfd->getFd(), start, SEEK_SET); + read(zdtfd->getFd(), *buf, size); + + flushCache(); + + unsigned long len = size; + compressor->zBuf(&len, *buf); + char * rawBuf = compressor->Buf(0, &len); + cacheBlock = new EntriesBlock(rawBuf, len); + cacheBlockIndex = block; + } + size = cacheBlock->getEntrySize(entry); + *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1); + strcpy(*buf, cacheBlock->getEntry(entry)); +} + + +/****************************************************************************** + * zLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zStr::setText(const char *ikey, const char *buf, long len) { + + __u32 start, outstart; + __u32 size, outsize; + __s32 endoff; + long idxoff = 0; + __s32 shiftSize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + len = (len < 0) ? strlen(buf) : len; + stdstr(&key, ikey); + toupperstr_utf8(key); + + char notFound = findKeyIndex(ikey, &idxoff, 0); + if (!notFound) { + getKeyFromIdxOffset(idxoff, &dbKey); + int diff = strcmp(key, dbKey); + if (diff < 0) { + } + else if (diff > 0) { + idxoff += IDXENTRYSIZE; + } + else if ((!diff) && (len > 0 /*we're not deleting*/)) { // got absolute entry + do { + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, size); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len)) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(tmpbuf + IDXENTRYSIZE, &idxoff); + delete [] tmpbuf; + } + else break; + } + while (true); // while we're resolving links + } + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ len + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + if (len > 0) { // NOT a link + if (!cacheBlock) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + else if (cacheBlock->getCount() >= blockCount) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + __u32 entry = cacheBlock->addEntry(buf); + cacheDirty = true; + outstart = archtosword32(cacheBlockIndex); + outsize = archtosword32(entry); + memcpy (outbuf + size, &outstart, sizeof(__u32)); + memcpy (outbuf + size + sizeof(__u32), &outsize, sizeof(__u32)); + size += (sizeof(__u32) * 2); + } + else { // link + memcpy(outbuf + size, buf, len); + size += len; + } + + start = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len > 0) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, sizeof(__u32)); + write(idxfd->getFd(), &outsize, sizeof(__u32)); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+IDXENTRYSIZE, shiftSize-IDXENTRYSIZE); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + } + } + + if (idxBytes) + delete [] idxBytes; + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * zLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zStr::linkEntry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +void zStr::flushCache() { + if (cacheBlock) { + if (cacheDirty) { + __u32 start = 0; + unsigned long size = 0; + __u32 outstart = 0, outsize = 0; + + const char *rawBuf = cacheBlock->getRawData(&size); + compressor->Buf(rawBuf, &size); + compressor->zBuf(&size); + + long zdxSize = lseek(zdxfd->getFd(), 0, SEEK_END); + long zdtSize = lseek(zdtfd->getFd(), 0, SEEK_END); + + if ((cacheBlockIndex * ZDXENTRYSIZE) > (zdxSize - ZDXENTRYSIZE)) { // New Block + start = zdtSize; + } + else { + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &outsize, sizeof(__u32)); + start = swordtoarch32(start); + outsize = swordtoarch32(outsize); + if (start + outsize >= zdtSize) { // last entry, just overwrite + // start is already set + } + else if (size < outsize) { // middle entry, but smaller, that's fine and let's preserve bigger size + size = outsize; + } + else { // middle and bigger-- we have serious problems, for now let's put it at the end = lots of wasted space + start = zdtSize; + } + } + + + + outstart = archtosword32(start); + outsize = archtosword32((__u32)size); + + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + lseek(zdtfd->getFd(), start, SEEK_SET); + rawBuf = compressor->zBuf(&size); + write(zdtfd->getFd(), rawBuf, size); + + // add a new line to make data file easier to read in an editor + write(zdtfd->getFd(), &nl, 2); + + write(zdxfd->getFd(), &outstart, sizeof(__u32)); + write(zdxfd->getFd(), &outsize, sizeof(__u32)); + + delete cacheBlock; + } + } + cacheBlockIndex = -1; + cacheBlock = 0; + cacheDirty = false; +} + + +/****************************************************************************** + * zLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char zStr::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdt", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/zverse.cpp b/src/modules/common/zverse.cpp new file mode 100644 index 0000000..6d76ddc --- /dev/null +++ b/src/modules/common/zverse.cpp @@ -0,0 +1,518 @@ +/****************************************************************************** + * zverse.h - code for class 'zVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey for compressed modules + */ + + +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <versekey.h> +#include <zverse.h> +#include <sysdata.h> + + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +/****************************************************************************** + * zVerse Statics + */ + +int zVerse::instance = 0; + +const char zVerse::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b'}; + +/****************************************************************************** + * zVerse Constructor - Initializes data for instance of zVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + * fileMode - open mode for the files (O_RDONLY, etc.) + * blockType - verse, chapter, book, etc. + */ + +zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp) +{ + char buf[127]; + + nl = '\n'; + path = 0; + cacheBufIdx = -1; + cacheTestament = 0; + cacheBuf = 0; + dirtyCache = false; + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + compressor = (icomp) ? icomp : new SWCompress(); + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockType]); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockType]); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockType]); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockType]); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockType]); + compfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockType]); + compfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + instance++; +} + + +/****************************************************************************** + * zVerse Destructor - Cleans up instance of zVerse + */ + +zVerse::~zVerse() +{ + int loop1; + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + if (path) + delete [] path; + + if (compressor) + delete compressor; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + FileMgr::systemFileMgr.close(compfp[loop1]); + } +} + + +/****************************************************************************** + * zVerse::findoffset - Finds the offset of the key verse from the indexes + * + * + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * book - book to find (0 - testament introduction) + * chapter - chapter to find (0 - book introduction) + * verse - verse to find (0 - chapter introduction) + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void zVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) +{ + // set start to offset in + // set size to + // set + unsigned long ulBuffNum=0; // buffer number + unsigned long ulVerseStart=0; // verse offset within buffer + unsigned short usVerseSize=0; // verse size + unsigned long ulCompOffset=0; // compressed buffer start + unsigned long ulCompSize=0; // buffer size compressed + unsigned long ulUnCompSize=0; // buffer size uncompressed + char *pcCompText=NULL; // compressed text + + *start = *size = 0; + //printf ("Finding offset %ld\n", idxoff); + idxoff *= 10; + if (!testmt) { + testmt = ((idxfp[0]) ? 1:2); + } + + // assert we have and valid file descriptor + if (compfp[testmt-1]->getFd() < 1) + return; + + long newOffset = lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + if (newOffset == idxoff) { + if (read(compfp[testmt-1]->getFd(), &ulBuffNum, 4) != 4) { + printf ("Error reading ulBuffNum\n"); + return; + } + } + else return; + + ulBuffNum = swordtoarch32(ulBuffNum); + + if (read(compfp[testmt-1]->getFd(), &ulVerseStart, 4) < 2) + { + printf ("Error reading ulVerseStart\n"); + return; + } + if (read(compfp[testmt-1]->getFd(), &usVerseSize, 2) < 2) + { + printf ("Error reading usVerseSize\n"); + return; + } + + *start = swordtoarch32(ulVerseStart); + *size = swordtoarch16(usVerseSize); + + if (*size) { + if (((long) ulBuffNum == cacheBufIdx) && (testmt == cacheTestament) && (cacheBuf)) { + // have the text buffered + return; + } + + //printf ("Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize); + + + if (lseek(idxfp[testmt-1]->getFd(), ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12) + { + printf ("Error seeking compressed file index\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompOffset, 4)<4) + { + printf ("Error reading ulCompOffset\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompSize, 4)<4) + { + printf ("Error reading ulCompSize\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulUnCompSize, 4)<4) + { + printf ("Error reading ulUnCompSize\n"); + return; + } + + ulCompOffset = swordtoarch32(ulCompOffset); + ulCompSize = swordtoarch32(ulCompSize); + ulUnCompSize = swordtoarch32(ulUnCompSize); + + if (lseek(textfp[testmt-1]->getFd(), ulCompOffset, SEEK_SET)!=(long)ulCompOffset) + { + printf ("Error: could not seek to right place in compressed text\n"); + return; + } + pcCompText = new char[ulCompSize]; + + if (read(textfp[testmt-1]->getFd(), pcCompText, ulCompSize)<(long)ulCompSize) + { + printf ("Error reading compressed text\n"); + return; + } + compressor->zBuf(&ulCompSize, pcCompText); + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + unsigned long len = 0; + compressor->Buf(0, &len); + cacheBuf = (char *)calloc(len + 1, 1); + memcpy(cacheBuf, compressor->Buf(), len); + + cacheTestament = testmt; + cacheBufIdx = ulBuffNum; + if (pcCompText) + delete [] pcCompText; + } +} + + +/****************************************************************************** + * zVerse::zreadtext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 1 (null) + * buf - buffer to store text + * + */ + +void zVerse::zreadtext(char testmt, long start, unsigned short size, char *inbuf) +{ + memset(inbuf, 0, size); + if (size > 2) { + strncpy(inbuf, &(cacheBuf[start]), size-2); + } +} + + +/****************************************************************************** + * zVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zVerse::settext(char testmt, long idxoff, const char *buf, long len) { + + len = (len < 0) ? strlen(buf) : len; + if (!testmt) + testmt = ((idxfp[0]) ? 1:2); + if ((!dirtyCache) || (cacheBufIdx < 0)) { + cacheBufIdx = lseek(idxfp[testmt-1]->getFd(), 0, SEEK_END) / 12; + cacheTestament = testmt; + if (cacheBuf) + free(cacheBuf); + cacheBuf = (char *)calloc(len + 1, 1); + } + else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len + 1)):calloc((len + 1), 1)); + + dirtyCache = true; + + unsigned long start, outstart; + unsigned long outBufIdx = cacheBufIdx; + unsigned short size; + unsigned short outsize; + + idxoff *= 10; + size = outsize = len; + + start = strlen(cacheBuf); + + if (!size) + start = outBufIdx = 0; + + outBufIdx = archtosword32(outBufIdx); + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &outBufIdx, 4); + write(compfp[testmt-1]->getFd(), &outstart, 4); + write(compfp[testmt-1]->getFd(), &outsize, 2); + strcat(cacheBuf, buf); +} + + +void zVerse::flushCache() { + if (dirtyCache) { + unsigned long idxoff; + unsigned long start, outstart; + unsigned long size, outsize; + unsigned long zsize, outzsize; + + idxoff = cacheBufIdx * 12; + size = outsize = zsize = outzsize = strlen(cacheBuf); + if (size) { +// if (compressor) { +// delete compressor; +// compressor = new LZSSCompress(); +// } + compressor->Buf(cacheBuf); + compressor->zBuf(&zsize); + outzsize = zsize; + + start = outstart = lseek(textfp[cacheTestament-1]->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + outzsize = archtosword32(zsize); + + write(textfp[cacheTestament-1]->getFd(), compressor->zBuf(&zsize), zsize); + + lseek(idxfp[cacheTestament-1]->getFd(), idxoff, SEEK_SET); + write(idxfp[cacheTestament-1]->getFd(), &outstart, 4); + write(idxfp[cacheTestament-1]->getFd(), &outzsize, 4); + write(idxfp[cacheTestament-1]->getFd(), &outsize, 4); + } + dirtyCache = false; + } +} + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long bufidx; + long start; + unsigned short size; + + destidxoff *= 10; + srcidxoff *= 10; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(compfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(compfp[testmt-1]->getFd(), &bufidx, 4); + read(compfp[testmt-1]->getFd(), &start, 4); + read(compfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(compfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &bufidx, 4); + write(compfp[testmt-1]->getFd(), &start, 4); + write(compfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char zVerse::createModule(const char *ipath, int blockBound) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); //compBufIdxOffset + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} + + +/****************************************************************************** + * zVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + if (to > buf) { + for (to--; to > buf; to--) { // remove trailing excess + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } + } +} |