diff options
author | Troy A. Griffitts <scribe@crosswire.org> | 2011-05-13 04:13:48 +0000 |
---|---|---|
committer | Troy A. Griffitts <scribe@crosswire.org> | 2011-05-13 04:13:48 +0000 |
commit | faebf4dec366e9257d38e6ea49aaca63c70cb69f (patch) | |
tree | b97f3fd93d7129b9b8a53c5d7e1faf5fad3e5a00 /modules/lockman | |
parent | 2960c50d0d3a8a6b596fe6a466b36d6926d266b7 (diff) | |
download | sword-tools-faebf4dec366e9257d38e6ea49aaca63c70cb69f.tar.gz |
Updated nasb convert util to be generic lockman converter (spanish names hardcoded right now)
compile update for modedit. removed context mod by default
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@324 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/lockman')
-rw-r--r-- | modules/lockman/cutil/Makefile | 15 | ||||
-rw-r--r-- | modules/lockman/cutil/lockosis.cpp | 1221 |
2 files changed, 1236 insertions, 0 deletions
diff --git a/modules/lockman/cutil/Makefile b/modules/lockman/cutil/Makefile new file mode 100644 index 0000000..84341f9 --- /dev/null +++ b/modules/lockman/cutil/Makefile @@ -0,0 +1,15 @@ +INCLUDES += -I/usr/include/sword -DHAVESWORD +LIBS += -lsword -lz -lsword -lz +#comment this out if you didn't compile sword with lucene support +LIBS += -lclucene +#comment these out if you didn't compile sword with ICU support +LIBS += -licui18n -licuuc -licudata -lpthread -licuio + +all: lockosis + +lockosis.o: lockosis.cpp + g++ -g -c ${INCLUDES} lockosis.cpp +lockosis: lockosis.o + g++ -g -o lockosis lockosis.o ${LIBS} ${LIBS} +clean: + rm lockosis lockosis.o diff --git a/modules/lockman/cutil/lockosis.cpp b/modules/lockman/cutil/lockosis.cpp new file mode 100644 index 0000000..890f8ef --- /dev/null +++ b/modules/lockman/cutil/lockosis.cpp @@ -0,0 +1,1221 @@ +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <iostream> +#include <string> + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#ifdef HAVESWORD +#include <versekey.h> +using namespace sword; +#endif + +using std::string; +using std::cout; +using std::endl; + +static const char *osisOTBooks[] = { + "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", + "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", + "2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov", + "Eccl", "Song", "Isa", "Jer", "Lam", + "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", + "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal", +// extra-Biblical + "Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth", + "EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan", + "Ps151", "Sir", "Tob", "Wis"}; +static const char *osisNTBooks[] = { + "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", + "2Cor", "Gal", "Eph", "Phil", "Col", + "1Thess", "2Thess", "1Tim", "2Tim", "Titus", + "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", + "3John", "Jude", "Rev"}; +static const char **osisBooks[] = { osisOTBooks, osisNTBooks }; +const char osisMax[2] = {57, 27}; + +static const char *nasbOTBooks[] = { + +"GENESIS", "EXODO", "LEVITICO", "NUMEROS", "DEUTERONOMIO", "JOSUE", "JUECES", +"RUT", "1 SAMUEL", "2 SAMUEL", "1 REYES", "2 REYES", "1 CRONICAS", "2 CRONICAS", +"ESDRAS", "NEHEMIAS", "ESTER", "JOB", "LOS SALMOS", "LOS PROVERBIOS", "ECLESIASTES", +"CANTARES", "ISAIAS", "JEREMIAS", "LAMENTACIONES", "EZEQUIEL", "DANIEL", "OSEAS", +"JOEL", "AMOS", "ABDIAS", "JONAS", "MIQUEAS", "NAHUM", "HABACUC", +"SOFONIAS", "HAGEO", "ZACARIAS", "MALAQUIAS" +/* + "GENESIS", "EXODUS", "LEVITICUS", "NUMBERS", "DEUTERONOMY", "JOSHUA", "JUDGES", + "RUTH", "1 SAMUEL", "2 SAMUEL", "1 KINGS", "2 KINGS", "1 CHRONICLES", "2 CHRONICLES", + "EZRA", "NEHEMIAH", "ESTHER", "JOB", "PSALMS", "PROVERBS", "ECCLESIASTES", + "SONG OF SOLOMON", "ISAIAH", "JEREMIAH", "LAMENTATIONS", "EZEKIEL", "DANIEL", "HOSEA", + "JOEL", "AMOS", "OBADIAH", "JONAH", "MICAH", "NAHUM", "HABAKKUK", + "ZEPHANIAH", "HAGGAI", "ZECHARIAH", "MALACHI" +*/ +}; +static const char *nasbNTBooks[] = { +"SAN MATEO", "SAN MARCOS", "SAN LUCAS", "SAN JUAN", "HECHOS", "ROMANOS", "1 CORINTIOS", +"2 CORINTIOS", "GALATAS", "EFESIOS", "FILIPENSES", "COLOSENSES", "1 TESALONICENSES", "2 TESALONICENSES", +"1 TIMOTEO", "2 TIMOTEO", "TITO", "FILEMON", "HEBREOS", "SANTIAGO", "1 SAN PEDRO", +"2 SAN PEDRO", "1 SAN JUAN", "2 SAN JUAN", "3 SAN JUAN", "SAN JUDAS", "EL APOCALIPSIS" +/* + "MATTHEW", "MARK", "LUKE", "JOHN", "ACTS", "ROMANS", "1 CORINTHIANS", + "2 CORINTHIANS", "GALATIANS", "EPHESIANS", "PHILIPPIANS", "COLOSSIANS", "1 THESSALONIANS", "2 THESSALONIANS", + "1 TIMOTHY", "2 TIMOTHY", "TITUS", "PHILEMON", "HEBREWS", "JAMES", "1 PETER", + "2 PETER", "1 JOHN", "2 JOHN", "3 JOHN", "JUDE", "REVELATION" +*/ +}; +static const char **nasbBooks[] = { nasbOTBooks, nasbNTBooks }; +const char nasbMax[2] = {39, 27}; + +char readline(int fd, char **buf) { + char ch; + if (*buf) + delete [] *buf; + *buf = 0; + int len; + + + long index = lseek(fd, 0, SEEK_CUR); + // clean up any preceding white space + while ((len = read(fd, &ch, 1)) == 1) { + if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t')) + break; + else index++; + } + + + while (ch != 10) { + if ((len = read(fd, &ch, 1)) != 1) + break; + } + + int size = (lseek(fd, 0, SEEK_CUR) - index) - 1; + + *buf = new char [ (size+2) * 2 ]; + + if (size > 0) { + lseek(fd, index, SEEK_SET); + read(fd, *buf, size); + read(fd, &ch, 1); //pop terminating char + (*buf)[size] = 0; + + // clean up any trailing junk on buf + int buflen = strlen(*buf); + for (char *it = *buf+(buflen-1); it > *buf; it--) { + if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t')) + break; + else *it = 0; + } + // convert all spanish characters to combined + for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) { + switch (*it) { +/* + case 0xE2 : // ‘ + if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) { + memmove(it, it+1, buflen - (it-(unsigned char *)*buf)); + buflen--; + it[0] = 0xcc; + it[1] = 0x80; // yeah, I know it's already 0x80, but that's just a coincidence + } + else { + fprintf(stderr, "oddity: %s\n", *buf); + exit(-4); + } + break; +*/ + case 0x60 : // ` + if (isalpha(it[-1])) { + memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); + buflen++; + it[0] = 0xcc; + it[1] = 0x80; + } + else { +// fprintf(stderr, "oddity: %s\n", *buf); +// exit(-4); + } + break; + case 0x7E : // ~ + memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); + buflen++; + it[0] = 0xcc; + it[1] = 0x83; + break; + } + } + } + else **buf = 0; + return !len; +} + + +void outHeader(); +void outTrailer(); +void unicodeTicks(string &outstring); +void prepLine(string &outstring, int currentTestament, bool note); +string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx); + + + + +int main(int argc, char **argv) { + + // Let's test our command line arguments + if (argc < 2) { +// fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]); + fprintf(stderr, "usage: %s <biblefile> [notesfile]\n\n", argv[0]); + exit(-1); + } + + + // Let's see if we can open our input file + int fd = open(argv[1], O_RDONLY|O_BINARY); + if (fd < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]); + exit(-2); + } + + int fdn = -1; + if (argc > 2) { + fdn = open(argv[2], O_RDONLY|O_BINARY); + if (fdn < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]); + exit(-2); + } + } + + outHeader(); + + string header; + char *buffer = 0; + char *nbuffer = 0; + int result = 0; + string currentBook = ""; + int currentBookNo = 0; + int currentTestament = 0; + int currentChapter = 0; + int currentVerse = 0; + bool inBook = false; + bool inChapter = false; + bool inVerse = false; + string noteLine = ""; + string outstring; + result = readline(fd, &buffer); + string lookahead = buffer; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + do { + int i, j; + result = readline(fd, &buffer); + if (lookahead.length()) { + string savebuf = buffer; + if (buffer) + delete [] buffer; + buffer = new char [ lookahead.length() + 1]; + strcpy(buffer, lookahead.c_str()); + lookahead = savebuf; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + result = 0; + } + else if (!result) { + string savebuf = buffer; + result = readline(fd, &buffer); + lookahead = buffer; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + + + if (buffer) + delete [] buffer; + buffer = new char [ savebuf.length() + 1]; + strcpy(buffer, savebuf.c_str()); + result = 0; + } + + outstring = buffer; + + + + // BOOK NAMES <BN> + if (!strncmp(outstring.c_str(), "<BN>", 4)) { + string book = outstring.c_str()+4; + book = book.substr(0, book.find_first_of("<")); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + if (inBook) { + outstring += "</div>"; + inBook = false; + } + outstring += (string)"<div type=\"book\" osisID=\""; + for (i = 0; i < 2; i++) { + for (j = 0; j < nasbMax[i]; j++) { + if (book == nasbBooks[i][j]) { + currentBook = osisBooks[i][j]; + outstring += currentBook; + currentTestament = i; + break; + } + } + if (j < nasbMax[i]) + break; + } + if (i > 1) { + fprintf(stderr, "error: %s: couldn't find book match for %s. Please check book array in conversion program.\n", argv[0], book.c_str()); + exit(-3); + } + outstring += (string)"\"><title type=\"main\" subType=\"x-Book\">" + book + "</title>"; + inBook = true; + } + + + // CHAPTERS + //<SN>PSALM + if ((!strncmp(outstring.c_str(), "<CN>", 4)) || (!strncmp(outstring.c_str(), "<SN>", 4))) { + string chapterTitle = outstring.c_str()+4; + chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<")); + string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + outstring += (string)"<chapter osisID=\"" + currentBook + "." + chapter + "\">"; + outstring += (string)"<title type=\"sub\" subType=\"x-Chapter\">" + chapterTitle + "</title>"; + currentChapter = atoi(chapter.c_str()); + inChapter = true; + } + + //<SF> + if (!strncmp(outstring.c_str(), "<SF>", 4)) { + string heading = outstring.c_str()+4; + heading = heading.substr(0, heading.find("</SF>")); + outstring = ""; + + if (!strncmp(lookahead.c_str(), "<PM>", 4)) { + lookahead.erase(0, 4); + outstring += "<milestone type=\"line\" subType=\"x-PM\"/>"; + } + if (inVerse) { + outstring += "</verse>\n"; + inVerse = false; + } + outstring += (string)"<title type=\"acrostic\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + + //<SH> + if (!strncmp(outstring.c_str(), "<SH>", 4)) { + string heading = outstring.c_str()+4; + heading = heading.substr(0, heading.find("</SH>")); + outstring = ""; + + if (!strncmp(lookahead.c_str(), "<PM>", 4)) { + lookahead.erase(0, 4); + outstring += "<milestone type=\"line\" subType=\"x-PM\"/>"; + } + if (inVerse) { + outstring += "</verse>\n"; + inVerse = false; + } + outstring += (string)"<title type=\"section\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + if (!strncmp(outstring.c_str(), "<SS>", 4)) { + string heading = (outstring.c_str()+4); + heading = heading.substr(0, heading.find("</SS>")); + outstring = (string)"<title type=\"psalm\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + if (!strncmp(outstring.c_str(), "<SB>", 4)) { + string heading = (outstring.c_str()+4); + heading = heading.substr(0, heading.find("</SB>")); + outstring = (string)"<title type=\"scope\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + + + + // {{x:y}} + // DUH, find_first_of looks for the first occurance of ANY single character of the supplied string +// int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it! + const char *outstr = outstring.c_str(); + const char *found = strstr(outstr, "{{"); + int start = (found) ? (found - outstr) : -1; +// ---- end of whacked replacement + + if (start > -1) { + found = strstr(outstr, "}}"); + int end = (found) ? (found - outstr) : -1; + end++; + int testmt = 0, book, chap; + string bkch = outstring.substr(start+2, end-start-2); + sscanf(bkch.c_str(), "%d:%d", &book, &chap); + currentChapter = chap; + int vNumEnd = outstring.find_first_of(" ", end); + currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str()); + currentBookNo = book; + if (book > nasbMax[0]) { + testmt = 1; + book -= nasbMax[0]; + } + if (currentBook != osisBooks[testmt][book-1]) { + fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d:%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); + exit(-3); + } + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string newstring = ""; + if (inVerse) { + newstring += "</verse>"; + inVerse = false; + } + newstring += "<verse osisID=\"" + currentBook + (string)"." + (string)chapString + (string)"." + (string)verseString + (string) "\">"; + outstring.replace(start, vNumEnd-start+1, newstring); + inVerse = true; + noteLine = ""; + } + + + + // multiple occurances on a line stuff + while (1) { + + // NOTE + outstr = outstring.c_str(); + found = strstr(outstr, "<N"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + int end = outstring.find_first_of(">", start+1); + string nStr = outstring.substr(start+2, end-start-2); + + // NOTE <N#> + if (isdigit(nStr.c_str()[0])) { + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); + string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N"); + outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>"); + continue; + } + } + + + + // <RS> + outstr = outstring.c_str(); + found = strstr(outstr, "<RS>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<q who=\"Jesus\">"); + continue; + + } + + // <RS> + outstr = outstring.c_str(); + found = strstr(outstr, "</RS>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 5, (string)"</q>"); + continue; + + } + // <RT> + outstr = outstring.c_str(); + found = strstr(outstr, "<RT>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-RT\"/>"); + continue; + + } + outstr = outstring.c_str(); + found = strstr(outstr, "<R"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + int end = outstring.find_first_of(">", start+1); + string nStr = outstring.substr(start+2, end-start-2); + + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); + string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R"); + outstring.replace(start, end-start+1, (string)"<note type=\"crossReference\" osisID=\"" + osisID + (string)".xref." + nStr + (string) "\" n=\"" + nStr + (string)"\">" + noteBody + "</note>"); + continue; + } + // transChange added {} + outstr = outstring.c_str(); + found = strstr(outstr, "{"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 1, (string)"<transChange type=\"added\">"); + int end = outstring.find_first_of("}", start+1); + outstring.erase(end, 1); + while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--; + outstring.insert(end+1, "</transChange>"); + continue; + + } +/* + // transChange tenseChange * + outstr = outstring.c_str(); + found = strstr(outstr, "*"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 1, (string)"<transChange type=\"tenseChanged\">"); + for (end = start + 34; (end < outstring.length()); end++) { + if ((!isalpha(outstring[end])) && + (outstring[end] != '\'')) + break; + } + outstring.replace(end, 1, "</transChange>"); + continue; + + } +*/ + // <,> + outstr = outstring.c_str(); + found = strstr(outstr, "<,>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 3, (string)"<milestone type=\"x-superiorComma\"/>"); + continue; + + } + // <NA> + outstr = outstring.c_str(); + found = strstr(outstr, "<NA>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-NA\"/>"); + continue; + + } + // <NB> + outstr = outstring.c_str(); + found = strstr(outstr, "<NB>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-NB\"/>"); + continue; + + } + // <NC> + outstr = outstring.c_str(); + found = strstr(outstr, "<NC>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-NC\"/>"); + continue; + + } + // paragraph break <PM> + outstr = outstring.c_str(); + found = strstr(outstr, "<PM>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PM\"/>"); + continue; + + } + // poetry break <PN> + outstr = outstring.c_str(); + found = strstr(outstr, "<PN>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-PN\" />"); + continue; + + } + // poetry break <PO> + outstr = outstring.c_str(); + found = strstr(outstr, "<PO>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PO\" />"); + continue; + + } + // poetry break <PE> + outstr = outstring.c_str(); + found = strstr(outstr, "<PE>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PE\" />"); + continue; + + } + // letter indent <HL> + outstr = outstring.c_str(); + found = strstr(outstr, "<HL>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)"<milestone type=\"x-HL\" />"); + continue; + + } + break; + } + + int strongsStart = 0; + int transChangeStart = 0; + bool strongsFound = false; + bool intoken = false; + bool intag = false; + bool inNote = false; + int tokenStart = 0; + string lastToken = ""; + string previousToken = ""; + int tenseChange = -1; + // strongs numbers + for (int i = 0; i < outstring.length(); i++) { + if ((!inNote) && (!intoken) && (!intag) && (!strongsFound)) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = i; + strongsFound = true; + } + } + if (outstring[i] =='*') + tenseChange = i; + if (outstring[i] == '<') { tokenStart = i+1; intoken = true; } + if (outstring[i] == '>') { + intoken = false; + previousToken = lastToken; + lastToken = outstring.substr(tokenStart, i-tokenStart); + // Not completely safe, but works for current NASB data + if (strchr(lastToken.c_str(), '/')) + intag = false; + else intag = true; + if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) { + transChangeStart = i+1; + } +/* + if (!strncmp(lastToken.c_str(), "seg", 3)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "divineName", 10)) { + strongsFound = false; + strongsStart = i+1; + } +*/ + if (!strncmp(lastToken.c_str(), "/divineName", 10)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "note", 4)) { + strongsFound = false; + strongsStart = i+1; + inNote = true; + } + if (!strncmp(lastToken.c_str(), "/note", 5)) { + strongsFound = false; + strongsStart = i+1; + inNote = false; + } + if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/transChange", 12)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "milestone", 9)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/seg", 4)) { + strongsFound = false; + strongsStart = i+1; + } + if ((!strncmp(lastToken.c_str(), "verse", 5))) { + intag = false; + } + + if ( (!strncmp(lastToken.c_str(), "MG", 2)) || + (!strncmp(lastToken.c_str(), "MH", 2))) { + + // insert </w> + // fix tenseChange to be inside <w> so we can include a subset of the <w> content. + outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "</w></transChange>":"</w>")); + i = (tokenStart-1) + ((tenseChange > -1) ? 18:4); + + // build <w ... > tag + char lang = lastToken[1]; // H or G + lastToken.replace(0, 1, "<w lemma=\"strong:"); + while ((start = lastToken.find(", ")) > -1) { + lastToken.replace(start, 2, (string)" strong:" + lang); + } + lastToken += "\">"; + intag = false; + + + if (tenseChange > -1) { + lastToken.insert(0, "<transChange type=\"tenseChange\">"); + } + if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) { + outstring.insert(transChangeStart, lastToken); + intag = true; + i += lastToken.length() - 1; // (-1 because we're about to i++) + } + + // insert our token + else { + outstring.insert(strongsStart, lastToken); + i += lastToken.length() - 1; // (-1 because we're about to i++) + } + strongsStart = i+1; + strongsFound = false; + if (tenseChange > -1) { + // relocate because position may have changed from all the token inserts + const char *buf = outstring.c_str(); + tenseChange = (strchr(buf, '*') - buf); + outstring.erase(tenseChange, 1); + tenseChange = -1; + } + } + } + } + + + // clean up stuff that didn't work quite right + while (1) { + + // divineName strongs tags misorderings + string target = "</w></divineName></seg>"; + size_t s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "</divineName></seg></w>"); + continue; + } + target = "</w>,</divineName></seg>"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "</divineName></seg></w>,"); + continue; + } + + break; + } + + + std::cout << outstring; + if (!result) std::cout << "\n"; + } + while (!result); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + if (inBook) { + outstring += "</div>"; + inBook = false; + } + std::cout << outstring; + + outTrailer(); + + // clean up our buffers that readline might have allocated + if (buffer) + delete [] buffer; + if (nbuffer) + delete [] nbuffer; + + close(fd); + + if (fdn > -1) + close(fdn); +} + +void outHeader() { + +std::cout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << "\n"; +std::cout << "<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.1.9.xsd\">" << "\n"; +std::cout << " <osisText osisIDWork=\"nasb\" xml:lang=\"en\">" << "\n"; +std::cout << " <header>" << "\n"; +std::cout << " <work osisWork=\"nasb\">" << "\n"; +std::cout << " <title>NEW AMERICAN STANDARD BIBLE</title>" << "\n"; +std::cout << " <identifier type=\"OSIS\">Bible.en.NASB.1995</identifier>" << "\n"; +std::cout << " <rights>Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION</rights>" << "\n"; +std::cout << " <refSystem>Bible</refSystem>" << "\n"; +std::cout << " </work>" << "\n"; +std::cout << " <work osisWork=\"strongs\">" << "\n"; +std::cout << " </work>" << "\n"; +std::cout << " </header>" << "\n"; + +} + +void outTrailer() { + std::cout << "</osisText>\n"; + std::cout << "</osis>\n"; +} + +void unicodeTicks(string &outstring) { + int end = 0; + + while (1) { + const char *outstr; + const char *found; + int start; + + outstr = outstring.c_str(); + found = strstr(outstr, "``"); + char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 2, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "`"); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "'"); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "\""); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + break; + } +} + +void prepLine(string &outstring, int currentTestament, bool note) { + int end = 0; + while (1) { +// ------------------------------------------ +// redundant markers + size_t s; + // <V> + s = outstring.find("<V>"); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + // <P> + s = outstring.find("<P>"); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + // <C> + s = outstring.find("<C>"); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + // <CC> + s = outstring.find("<CC>"); + if (s != string::npos) { + outstring.erase(s, 4); + continue; + } + + // <CP> + s = outstring.find("<CP>"); + if (s != string::npos) { + outstring.erase(s, 4); + continue; + } + + // <$F...>> + s = outstring.find("<$F"); + if (s != string::npos) { + size_t e = outstring.find(">>", s); + outstring.erase(s, e-s+2); + continue; + } +// ---------------------------------------------- + + // <A> + s = outstring.find("<A>"); + if (s != string::npos) { + outstring.replace(s, 3, "<milestone type=\"line\" subType=\"x-A\"/>"); + continue; + } + + // ~“ + string target = "~“"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>"); + continue; + } + + // ~‘ + target = "~‘"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>"); + continue; + } + + + const char *outstr = outstring.c_str(); + const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/"); + int start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 14, "<seg><divineName>Lord's</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD}/’\\{S}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 16, "<seg><divineName>Lord’s</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD,}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 9, "<seg><divineName>Lord</divineName></seg>,"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "<seg><divineName>Lord</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "S\\{EN~OR}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 10, "<seg><divineName>Sen~or</divineName></seg>"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "Y\\{AH,}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "<seg><divineName>Yah</divineName></seg>,"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "Y\\{AH}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "<seg><divineName>Yah</divineName></seg>"); + continue; + } + // is this really valid markup? should 'also be' be in small + // caps? 3 { and only 2 } ? + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {also be}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 18, "<seg><divineName>Lord</divineName></seg> also be}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {give}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 15, "<seg><divineName>Lord</divineName></seg> give}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {bless}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 16, "<seg><divineName>Lord</divineName></seg> bless}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 73, "<seg><divineName>Lord</divineName></seg> are my Refuge; You have made the Most High your dwelling place}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "M\\ENE/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 6, "M<hi type=\"x-smallcaps\">ene</hi>"); + continue; + } + found = strstr(outstr, "M\\ENE:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "M<hi type=\"x-smallcaps\">ene</hi>:"); + continue; + } + found = strstr(outstr, "T\\EKEL/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "T<hi type=\"x-smallcaps\">ekel</hi>"); + continue; + } + found = strstr(outstr, "T\\EKEL:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "T<hi type=\"x-smallcaps\">ekel</hi>:"); + continue; + } + found = strstr(outstr, "U\\FARSIN/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 9, "U<hi type=\"x-smallcaps\">farsin</hi>"); + continue; + } + found = strstr(outstr, "P\\ERES:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "P<hi type=\"x-smallcaps\">eres</hi>:"); + continue; + } + // LB ??? Don't have info on this. Assuming '-' + outstr = outstring.c_str(); + found = strstr(outstr, "<LB>"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 4, "-"); + continue; + } + + found = strstr(outstr+end, "\\"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + for (;start;start--) { + if ((!isupper(outstring[start])) && + (!strchr("\\/ ~", outstring[start]))) { + break; + } + } + for (start++; outstring[start] == ' '; start++); + if (currentTestament) { + outstring.insert(start, "<seg type=\"otPassage\">"); + start += 22; + } + else { + outstring.insert(start, "<seg><divineName>"); + start += 17; + const char *b = outstring.c_str(); + const char *found = strstr(b, "L\\{ORD}/"); + int s = (found) ? (found - b) : -1; + if (s > -1) + outstring.replace(s, 8, "Lord"); + end = s+4; + } + bool lower = false; + bool token = false; + for (;start < outstring.length(); start++) { + if (!token) { + if (outstring[start] == '\\') { + lower = true; + outstring.erase(start, 1); + start--; + continue; + } + if (outstring[start] == '/') { + lower = false; + outstring.erase(start, 1); + end = start; + start--; + continue; + } + // what is this? It screws MENE MENE up in Daniel +// if (outstring[start] == ':') +// break; + + if (isalpha(outstring[start])) { + if (islower(outstring[start])) + break; + if (lower) + outstring[start] = tolower(outstring[start]); + continue; + } + } + if (outstring[start] == '>') + token = false; + if (outstring[start] == '<') + token = true; + } + if (currentTestament) { + outstring.insert(end, "</seg>"); + end+=6; + } + else { + outstring.insert(end, "</divineName></seg>"); + end+=19; + } + continue; + } + + if (note) { + outstr = outstring.c_str(); + found = strstr(outstr, "{"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, "<hi type=\"italic\">"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, "</hi>"); + continue; + } + s = outstring.find("</reference></hi>"); + if (s != string::npos) { + outstring.replace(s, 17, "</hi></reference>"); + continue; + } + } + break; + } +} + +string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) { + char *nbuffer = 0; + int start = -1; + const char *found = (const char *)-1; + const char *outstr = (const char *)-1; + + while (start == -1) { + if (!noteLine.length() && fdn > -1) { + if (readline(fdn, &nbuffer)) return ""; // eof + noteLine = nbuffer; + } + outstr = noteLine.c_str(); + found = strstr(outstr, "{{"); + start = (found) ? (found - outstr) : -1; + if (start == -1) noteLine = ""; + } + + if (start > -1) { + found = strstr(outstr, "}}"); + int end = (found) ? (found - outstr) : -1; + end++; + int testmt = 0, book, chap; + string bkch = noteLine.substr(start+2, end-start-2); + sscanf(bkch.c_str(), "%d:%d", &book, &chap); + int vNumEnd = noteLine.find_first_of(" ", end); + int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str()); + if ((book != currentBookNo) || (chap != currentChapter) || (verse != currentVerse)) { + fprintf(stderr, "Not correct note line(%d:%d, %d:%d, %d:%d): %s\n\n", currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str()); + exit(-1); + } + } + else { + fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str()); + exit(-1); + } + + + + + + outstr = noteLine.c_str(); + string tag = (string)"<"+(string)nx+nStr+(string)">"; + found = strstr(outstr, tag.c_str()); + start = (found) ? (found - outstr) : -1; + string retVal = ""; + + if (start > -1) { + start += tag.length(); + found = strstr(outstr+start, " <"); + int end = (found) ? (found - outstr) : -1; + if (end<0) end = noteLine.length(); + retVal = noteLine.substr(start, end-start); + } + unicodeTicks(retVal); +#ifdef HAVESWORD + if (*nx == 'R') { + // } { get's deleted. e.g. {Luke} {9:10-17} + outstr = retVal.c_str(); + found = strstr(outstr, "} {"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + retVal.replace(start, 3, " "); + } + outstr = retVal.c_str(); + found = strstr(outstr, ";}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + retVal.replace(start, 2, "};"); + } + VerseKey key = osisID.c_str(); + retVal = VerseKey::convertToOSIS(retVal.c_str(), &key); + } +#endif + prepLine(retVal, 0, true); + if (nbuffer) + delete [] nbuffer; + return retVal; +} + |