diff options
author | danglassey <danglassey> | 2002-08-14 09:57:17 +0000 |
---|---|---|
committer | danglassey <danglassey> | 2002-08-14 09:57:17 +0000 |
commit | c9458897ebbb739d8db83c80e06512d8a612f743 (patch) | |
tree | f8c5381045887e34388cc6b26cfccc254bf766dc /src/modules/texts/rawtext | |
download | sword-sf-cvs-c9458897ebbb739d8db83c80e06512d8a612f743.tar.gz |
*** empty log message ***
Diffstat (limited to 'src/modules/texts/rawtext')
-rw-r--r-- | src/modules/texts/rawtext/Makefile | 5 | ||||
-rw-r--r-- | src/modules/texts/rawtext/Makefile.am | 4 | ||||
-rw-r--r-- | src/modules/texts/rawtext/kjvidx.cpp | 169 | ||||
-rw-r--r-- | src/modules/texts/rawtext/makebnds.c | 86 | ||||
-rw-r--r-- | src/modules/texts/rawtext/nuidx.cpp | 238 | ||||
-rw-r--r-- | src/modules/texts/rawtext/ojbtxidx.c | 166 | ||||
-rw-r--r-- | src/modules/texts/rawtext/rawtext.cpp | 580 | ||||
-rw-r--r-- | src/modules/texts/rawtext/rawtxidx.c | 146 | ||||
-rw-r--r-- | src/modules/texts/rawtext/rtfidx.cpp | 164 | ||||
-rw-r--r-- | src/modules/texts/rawtext/svetxidx.c | 153 | ||||
-rw-r--r-- | src/modules/texts/rawtext/vntidx.cpp | 185 |
11 files changed, 1896 insertions, 0 deletions
diff --git a/src/modules/texts/rawtext/Makefile b/src/modules/texts/rawtext/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/rawtext/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/rawtext/Makefile.am b/src/modules/texts/rawtext/Makefile.am new file mode 100644 index 0000000..d0e1d7e --- /dev/null +++ b/src/modules/texts/rawtext/Makefile.am @@ -0,0 +1,4 @@ +rawtextdir = $(top_srcdir)/src/modules/texts/rawtext + +libsword_la_SOURCES += $(rawtextdir)/rawtext.cpp + diff --git a/src/modules/texts/rawtext/kjvidx.cpp b/src/modules/texts/rawtext/kjvidx.cpp new file mode 100644 index 0000000..708a9e6 --- /dev/null +++ b/src/modules/texts/rawtext/kjvidx.cpp @@ -0,0 +1,169 @@ +#include <stdio.h> +#include <fcntl.h> +#include <versekey.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s <file to process> [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + memset(buf, ' ', 17); + + while (1) { + offadj = -10; + inquotes = 0; + sizeadj = 0; + if ((!memcmp(buf, "\\widctlpar {\\b\\f0\\cf2 ", 16)) && (!size)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (!memcmp(&buf[1], "\\f0\\fs16\\cf2\\up6", 15)) { + offadj = 0; + inquotes = 1; + sizeadj = (*buf == 10) ? -18:-17; + } + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (offadj > -10) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)); + } + lseek(fp, *offset+17, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/makebnds.c b/src/modules/texts/rawtext/makebnds.c new file mode 100644 index 0000000..44da447 --- /dev/null +++ b/src/modules/texts/rawtext/makebnds.c @@ -0,0 +1,86 @@ +#include <stdio.h> +#include <fcntl.h> + + +char *bnames[] = { + "Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", + "Joshua", "Judges", "Ruth", "I Samual", "II Samuel", + "I Kings", "II Kings", "I Chronicles", "II Chronicles", "Ezra", + "Nehemiah", "Esther", "Job", "Psalms", "Proverbs", + "Ecclesiastes", "Song of Solomon", "Isaiah", "Jeremiah", "Lamentations", + "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", + "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", + "Zephaniah", "Haggai", "Zechariah", "Malachi", + "Matthew", "Mark", "Luke", "John", "Acts", + "Romans", "I Corinthians", "II Corinthians", "Galatians", "Ephesians", + "Philippians", "Colossians", "I Thessalonians", "II Thessalonians", "I Timothy", + "II Timothy", "Titus", "Philemon", "Hebrews", "James", + "I Peter", "II Peter", "I John", "II John", "III John", + "Jude", "Revelation of John"}; + + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + int num1, num2, offset, offset2, chapmax, chapoff, chapoff2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc > 3) { + fprintf(stderr, "usage: %s <file to process> [NT?]\n", argv[0]); + exit(1); + } + + if (argc > 2) + curbook = 39; + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + read(bfp, &offset2, sizeof(offset2)); + read(cfp, &chapoff2, sizeof(chapoff2)); + while (read(bfp, &offset, sizeof(offset)) == sizeof(offset)) { + chapmax = (offset - offset2) / sizeof(offset); + printf("\n\{\"%s\", %d}, \n// %s\n", bnames[curbook], chapmax, bnames[curbook]); + curbook++; + for (curchap = 0; curchap < chapmax; curchap++) { + read(cfp, &chapoff, sizeof(chapoff)); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + chapoff2 = chapoff; + } + offset2 = offset; + } + pos = lseek(cfp, 0, SEEK_CUR); + offset = (int) lseek(cfp, 0, SEEK_END); + chapmax = (offset - offset2) / sizeof(offset); + printf("\n\{\"%s\", %d}, \n// %s\n", bnames[curbook], chapmax, bnames[curbook]); + curbook++; + lseek(cfp, pos, SEEK_SET); + for (curchap = 0; curchap < chapmax - 1; curchap++) { + read(cfp, &chapoff, sizeof(chapoff)); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + chapoff2 = chapoff; + } + chapoff = (int) lseek(vfp, 0, SEEK_END); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} diff --git a/src/modules/texts/rawtext/nuidx.cpp b/src/modules/texts/rawtext/nuidx.cpp new file mode 100644 index 0000000..edf298d --- /dev/null +++ b/src/modules/texts/rawtext/nuidx.cpp @@ -0,0 +1,238 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for MHC). Good luck! + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <versekey.h> + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + num1 = key1.Chapter(); + num2 = key1.Verse(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + writeidx(key1, key2, key3, offset, size); + key2++; + key3 = key2; + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startchap(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'C') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'V') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + if (size) + *offset = lseek(fp, 0, SEEK_CUR) - 3; + else *offset = lseek(fp, 0, SEEK_CUR) - 7; + if (size) { + ch2 = *num1; + vs2 = *num2; + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + *size = (offset2 - (*offset)); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s <file to process> [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/texts/rawtext/ojbtxidx.c b/src/modules/texts/rawtext/ojbtxidx.c new file mode 100644 index 0000000..f70cc01 --- /dev/null +++ b/src/modules/texts/rawtext/ojbtxidx.c @@ -0,0 +1,166 @@ +#include <stdio.h> +#include <fcntl.h> +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fd, vfd, cfd, bfd; + long pos, offset; + short size, tmp; + int num1, num2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file to process>\n", argv[0]); + exit(1); + } +#ifndef O_BINARY // O_BINARY is for Borland to be happy. If we're in GNU, just define it to a NULL mask +#define O_BINARY 0 +#endif + if ((fd = open(argv[1], O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfd, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfd, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfd, &pos, 4); /* Module intro */ + write(vfd, &size, 2); + write(vfd, &pos, 4); /* Testament intro */ + write(vfd, &size, 2); + + while (!findbreak(fd, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfd, 0, SEEK_CUR); + write(bfd, &pos, 4); + pos = lseek(vfd, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfd, &pos, 4); + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Book intro (vss) */ + write(vfd, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfd, 0, SEEK_CUR); + write(cfd, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Chapter intro */ + write(vfd, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7ld\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence (%2d:%3d:%3d)\n", curbook, num1-1, num2); + curchap = num1; +// break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence (%2d:%3d:%3d)\n", curbook, num1, num2-1); +// break; + tmp = 0; + curverse = num2; + write(vfd, &offset, 4); + write(vfd, &tmp, 2); + } + write(vfd, &offset, 4); + write(vfd, &size, 2); + } + + close(vfd); + close(cfd); + close(bfd); + close(fd); + return 0; +} + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size) +{ + char buf[8]; + char buf2[7]; + char loop, len, star; + + memset(buf, ' ', 7); + buf[7] = 0; + + while (1) { + + memmove(buf, &buf[1], 6); + if (read(fd, &buf[6], 1) != 1) + return 1; + + if ((buf[0] == 10) && ((buf[2] == '*') || (buf[3] == '*') || (buf[4] == '*'))) { + star = 0; + for (loop = 0; loop < 7; loop++) { + if (buf[loop] == '*') + star = 1; + if (isdigit(buf[loop])&&star) + break; + else buf[loop] = ' '; + } + if (loop < 7) { + sscanf(buf, "%d", num1); + continue; + } + } + + if ((buf[0] == '|') && (isdigit(buf[1]))) { + sscanf(&buf[1], "%d", num2); + sprintf(buf, "%d", *num2); + (*offset) = lseek(fd, 0, SEEK_CUR); + (*offset) -= (4-strlen(buf)); + + for (len = 1; len == 1; len = read(fd, &loop, 1)) { + if (loop == '|') + break; + } + + *size = (short)(lseek(fd, 0, SEEK_CUR) - *offset) - 1; + lseek(fd, -1, SEEK_CUR); + break; + } + } + return 0; +} + diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp new file mode 100644 index 0000000..c2214f8 --- /dev/null +++ b/src/modules/texts/rawtext/rawtext.cpp @@ -0,0 +1,580 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawText'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include <stdio.h> +#include <fcntl.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <string.h> +#include <utilfuns.h> +#include <rawverse.h> +#include <rawtext.h> + +#include <map> +#include <list> +#include <algorithm> +#include <regex.h> // GNU + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawText Constructor - Initializes data for instance of RawText + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWText(iname, idesc, idisp, enc, dir, mark, ilang), + RawVerse(ipath) { + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + for (int loop = 0; loop < 2; loop++) { + fastSearch[loop] = 0; + string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); + if (!access(fastidxname.c_str(), 04)) { + fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); + if (!access(fastidxname.c_str(), 04)) + fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str()); + } + } +} + + +/****************************************************************************** + * RawText Destructor - Cleans up instance of RawText + */ + +RawText::~RawText() +{ + if (fastSearch[0]) + delete fastSearch[0]; + + if (fastSearch[1]) + delete fastSearch[1]; +} + + +/****************************************************************************** + * RawText::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawText::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) { } + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + readtext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +signed char RawText::createSearchFramework() { + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + // dictionary holds words associated with a list + // containing every module position that contains + // the word. [0] Old Testament; [1] NT + map < string, list<long> > dictionary[2]; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + while (!Error()) { + long index = lkey->Index(); + wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1); + strcpy(wordBuf, StripText()); + + // grab each word from the text + word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); + while (word) { + + // make word upper case + toupperstr(word); + + // lookup word in dictionary (or make entry in dictionary + // for this word) and add this module position (index) to + // the word's associated list of module positions + dictionary[lkey->Testament()-1][word].push_back(index); + word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>"); + } + free(wordBuf); + (*this)++; + } + + // reposition module back to where it was before we were called + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + + // --------- Let's output an index from our dictionary ----------- + int datfd; + int idxfd; + map < string, list<long> >::iterator it; + list<long>::iterator it2; + unsigned long offset, entryoff; + unsigned short size; + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + // for old and new testament do... + for (int loop = 0; loop < 2; loop++) { + if ((datfd = open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) + return -1; + if ((idxfd = open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) { + close(datfd); + return -1; + } + + // iterate thru each word in the dictionary + for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) { + printf("%s: ", it->first.c_str()); + + // get our current offset in our word.dat file and write this as the start + // of the next entry in our database + offset = lseek(datfd, 0, SEEK_CUR); + write(idxfd, &offset, 4); + + // write our word out to the word.dat file, delineating with a \n + write(datfd, it->first.c_str(), strlen(it->first.c_str())); + write(datfd, "\n", 1); + + // force our mod position list for this word to be unique (remove + // duplicates that may exist if the word was found more than once + // in the verse + it->second.unique(); + + // iterate thru each mod position for this word and output it to + // our database + unsigned short count = 0; + for (it2 = it->second.begin(); it2 != it->second.end(); it2++) { + entryoff= *it2; + write(datfd, &entryoff, 4); + count++; + } + + // now see what our new position is in our word.dat file and + // determine the size of this database entry + size = lseek(datfd, 0, SEEK_CUR) - offset; + + // store the size of this database entry + write(idxfd, &size, 2); + printf("%d entries (size: %d)\n", count, size); + } + close(datfd); + close(idxfd); + } + return 0; +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + listkey.ClearList(); + + if ((fastSearch[0]) && (fastSearch[1])) { + + switch (searchType) { + case -2: { + + if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to + // ignore case + break; // can't handle fast case sensitive searches + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0; + try { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listkey; + } + + SWKey saveKey = *testKeyType; // save current place + + char error = 0; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + long start; + unsigned short size; + char *idxbuf = 0; + char *datbuf = 0; + list <long> indexes; + list <long> indexes2; + VerseKey vk; + vk = TOP; + + (*percent)(10, percentUserData); + + // toupper our copy of search string + stdstr(&wordBuf, istr); + toupperstr(wordBuf); + + // get list of individual words + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + + (*percent)(20, percentUserData); + + // clear our result set + indexes.erase(indexes.begin(), indexes.end()); + + // search both old and new testament indexes + for (int j = 0; j < 2; j++) { + // iterate thru each word the user passed to us. + for (int i = 0; i < wordCount; i++) { + + // clear this word's result set + indexes2.erase(indexes2.begin(), indexes2.end()); + error = 0; + + // iterate thru every word in the database that starts + // with our search word + for (int away = 0; !error; away++) { + idxbuf = 0; + + // find our word in the database and jump ahead _away_ + error = fastSearch[j]->findoffset(words[i], &start, &size, away); + + // get the word from the database + fastSearch[j]->getidxbufdat(start, &idxbuf); + + // check to see if it starts with our target word + if (strlen(idxbuf) > strlen(words[i])) + idxbuf[strlen(words[i])] = 0; +// else words[i][strlen(idxbuf)] = 0; + if (!strcmp(idxbuf, words[i])) { + + // get data for this word from database + free(idxbuf); + idxbuf = 0; + datbuf = 0; + fastSearch[j]->readtext(start, &size, &idxbuf, &datbuf); + + // we know that the data consists of sizof(long) + // records each a valid module position that constains + // this word + // + // iterate thru each of these module positions + long *keyindex = (long *)datbuf; + while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) { + if (i) { // if we're not on our first word + + // check to see if this word is already in the result set. + // This is our AND functionality + if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end()) + // add to new result set + indexes2.push_back(*keyindex); + } + else indexes2.push_back(*keyindex); + keyindex++; + } + free(datbuf); + } + else error = 1; // no more matches + free(idxbuf); + } + + // make new result set final result set + indexes = indexes2; + + percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData); + } + + // indexes contains our good verses, lets return them in a listkey + indexes.sort(); + + // iterate thru each good module position that meets the search + for (list <long>::iterator it = indexes.begin(); it != indexes.end(); it++) { + + // set a temporary verse key to this module position + vk.Testament(j+1); + vk.Error(); + vk.Index(*it); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listkey << (const char *) vk; + } + else listkey << (const char*) vk; + } + } + (*percent)(98, percentUserData); + + free(words); + free(wordBuf); + + *testKeyType = saveKey; // set current place back to original + + listkey = TOP; + (*percent)(100, percentUserData); + return listkey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listkey; + } + + // if we don't support this search, fall back to base class + return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + + +void RawText::setEntry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; +} + + +void RawText::linkEntry(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant + try { + destkey = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant + try { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawText::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawText::deleteEntry() { + + VerseKey *key = 0; + + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + +/****************************************************************************** + * RawText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; + try { + tmpkey = SWDYNAMIC_CAST(VerseKey, key); + } + catch ( ... ) {} + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry + && (start > 0) && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; +} diff --git a/src/modules/texts/rawtext/rawtxidx.c b/src/modules/texts/rawtext/rawtxidx.c new file mode 100644 index 0000000..311103e --- /dev/null +++ b/src/modules/texts/rawtext/rawtxidx.c @@ -0,0 +1,146 @@ +#include <stdio.h> +#include <fcntl.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file to process>\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n", buf); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n", buf); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[7]; + char buf2[7]; + char loop; + int offset2, ch2, vs2; + + memset(buf, ' ', 7); + + while (1) { + if (buf[3] == ':') { + memcpy(buf2, buf, 7); + for (loop = 0; loop < 7; loop++) { + if (!isdigit(buf2[loop])) + buf2[loop] = ' '; + } + buf2[3] = 0; + *num1 = atoi(buf2); + *num2 = atoi(&buf2[4]); + if (*num1 && *num2) { + *offset = lseek(fp, 0, SEEK_CUR); + sprintf(buf2, "%d", *num2); + *offset -= 2 - strlen(buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)) - (strlen(buf2) + 2); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/rtfidx.cpp b/src/modules/texts/rawtext/rtfidx.cpp new file mode 100644 index 0000000..9fdb305 --- /dev/null +++ b/src/modules/texts/rawtext/rtfidx.cpp @@ -0,0 +1,164 @@ +#include <stdio.h> +#include <fcntl.h> +#include <versekey.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s <file to process> [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + memset(buf, ' ', 17); + + while (1) { + offadj = -10; + inquotes = 0; + sizeadj = 0; + if (!memcmp(&buf[1], "\\f0\\fs16\\cf2\\up6", 15)) { + offadj = 0; + inquotes = 1; + sizeadj = (*buf == 10) ? -19:-17; + } + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (offadj > -10) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)); + } + lseek(fp, *offset+17, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/svetxidx.c b/src/modules/texts/rawtext/svetxidx.c new file mode 100644 index 0000000..26e67fd --- /dev/null +++ b/src/modules/texts/rawtext/svetxidx.c @@ -0,0 +1,153 @@ +#include <stdio.h> +#include <fcntl.h> +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fd, vfd, cfd, bfd; + long pos, offset; + short size, tmp; + int num1, num2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file to process>\n", argv[0]); + exit(1); + } +#ifndef O_BINARY // O_BINARY is for Borland to be happy. If we're in GNU, just define it to a NULL mask +#define O_BINARY 0 +#endif + if ((fd = open(argv[1], O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfd, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfd, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfd, &pos, 4); /* Module intro */ + write(vfd, &size, 2); + write(vfd, &pos, 4); /* Testament intro */ + write(vfd, &size, 2); + + while (!findbreak(fd, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfd, 0, SEEK_CUR); + write(bfd, &pos, 4); + pos = lseek(vfd, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfd, &pos, 4); + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Book intro (vss) */ + write(vfd, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfd, 0, SEEK_CUR); + write(cfd, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Chapter intro */ + write(vfd, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7ld\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence (%2d:%3d:%3d)\n", curbook, num1-1, num2); + curchap = num1; +// break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence (%2d:%3d:%3d)\n", curbook, num1, num2-1); +// break; + tmp = 0; + curverse = num2; + write(vfd, &offset, 4); + write(vfd, &tmp, 2); + } + write(vfd, &offset, 4); + write(vfd, &size, 2); + } + + close(vfd); + close(cfd); + close(bfd); + close(fd); +} + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size) +{ + char buf[8]; + char buf2[7]; + char loop, len; + + memset(buf, ' ', 7); + buf[7] = 0; + + while (1) { + + memmove(buf, &buf[1], 6); + if (read(fd, &buf[6], 1) != 1) + return 1; + + if ((buf[0] == 10) && (isdigit(buf[1]))) { + sscanf(buf, "%d %s", num2, buf2); + if (!strncmp(buf2, "KAP", 3)) { + *num1 = *num2; + continue; + } + sprintf(buf, "%d", *num2); + (*offset) = lseek(fd, 0, SEEK_CUR); + (*offset) -= (5-strlen(buf)); + for (len = 1; len == 1; len = read(fd, &loop, 1)) { + if ((loop == 10) || (loop == 13)) + break; + } + + *size = (short)(lseek(fd, 0, SEEK_CUR) - *offset) - 1; + lseek(fd, -1, SEEK_CUR); + break; + } + } + return 0; +} + diff --git a/src/modules/texts/rawtext/vntidx.cpp b/src/modules/texts/rawtext/vntidx.cpp new file mode 100644 index 0000000..bbb4a9e --- /dev/null +++ b/src/modules/texts/rawtext/vntidx.cpp @@ -0,0 +1,185 @@ +#include <stdio.h> +#include <fcntl.h> +#include <versekey.h> + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s <file to process> [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char buf3[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + strcpy (buf3, "\\par "); + buf3[5] = 10; + memset(buf, ' ', 17); + + while (1) { + offadj = -100; + inquotes = 0; + sizeadj = 0; + if (!memcmp(buf, "\\par FIN DEL NUEVO TESTAMENTO", 16)) { + offadj = -11; +// inquotes = 1; + sizeadj = -7; + } + + if ((!memcmp(buf, buf3, 6)) && (!size)) { + offadj = -11; +// inquotes = 1; + sizeadj = -7; + } + if (!memcmp(buf, "\\par ", 6)) { + if (isdigit(buf[6])) { + for (loop = 7; loop < 10; loop++) { + if (!isdigit(buf[loop])) + break; + } + offadj = -(11 - (loop - 6)); + // inquotes = 1; + sizeadj = -7; + } + } +/* + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } +*/ + if (offadj > -100) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + *size = (offset2 - (*offset)); + } + lseek(fp, *offset-sizeadj, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + |