path: root/modules/nasb1995/lockosis.cpp
diff options
Diffstat (limited to 'modules/nasb1995/lockosis.cpp')
1 files changed, 1233 insertions, 0 deletions
diff --git a/modules/nasb1995/lockosis.cpp b/modules/nasb1995/lockosis.cpp
new file mode 100644
index 0000000..4209a8f
--- /dev/null
+++ b/modules/nasb1995/lockosis.cpp
@@ -0,0 +1,1233 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <clocale>
+#include <locale>
+#ifndef __GNUC__
+#include <io.h>
+#include <unistd.h>
+#include <iostream>
+#include <string>
+#ifndef O_BINARY
+#define O_BINARY 0
+#include <versekey.h>
+#include <localemgr.h>
+#include <stringmgr.h>
+using namespace sword;
+using std::string;
+using std::cout;
+using std::endl;
+static const char *osisOTBooks[] = {
+ "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg",
+ "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr",
+ "2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov",
+ "Eccl", "Song", "Isa", "Jer", "Lam",
+ "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic",
+ "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal",
+// extra-Biblical
+ "Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth",
+ "EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan",
+ "Ps151", "Sir", "Tob", "Wis"};
+static const char *osisNTBooks[] = {
+ "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor",
+ "2Cor", "Gal", "Eph", "Phil", "Col",
+ "1Thess", "2Thess", "1Tim", "2Tim", "Titus",
+ "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John",
+ "3John", "Jude", "Rev"};
+static const char **osisBooks[] = { osisOTBooks, osisNTBooks };
+const char nasbMax[2] = {39, 27};
+char readline(int fd, char **buf) {
+ char ch;
+ if (*buf)
+ delete [] *buf;
+ *buf = 0;
+ int len;
+ long index = lseek(fd, 0, SEEK_CUR);
+ // clean up any preceding white space
+ while ((len = read(fd, &ch, 1)) == 1) {
+ if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t'))
+ break;
+ else index++;
+ }
+ while (ch != 10) {
+ if ((len = read(fd, &ch, 1)) != 1)
+ break;
+ }
+ int size = (lseek(fd, 0, SEEK_CUR) - index) - 1;
+ *buf = new char [ (size+2) * 2 ];
+ if (size > 0) {
+ lseek(fd, index, SEEK_SET);
+ read(fd, *buf, size);
+ read(fd, &ch, 1); //pop terminating char
+ (*buf)[size] = 0;
+ // clean up any trailing junk on buf
+ int buflen = strlen(*buf);
+ for (char *it = *buf+(buflen-1); it > *buf; it--) {
+ if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t'))
+ break;
+ else *it = 0;
+ }
+ // convert all spanish characters to combined
+ for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) {
+ switch (*it) {
+ case 0xE2 : // ‘
+ if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) {
+ memmove(it, it+1, buflen - (it-(unsigned char *)*buf));
+ buflen--;
+ it[0] = 0xcc;
+ it[1] = 0x80; // yeah, I know it's already 0x80, but that's just a coincidence
+ }
+ else {
+ fprintf(stderr, "oddity: %s\n", *buf);
+ exit(-4);
+ }
+ break;
+ case 0x60 : // `
+ if (isalpha(it[-1])) {
+ memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1);
+ buflen++;
+ it[0] = 0xcc;
+ it[1] = 0x80;
+ }
+ else {
+// fprintf(stderr, "oddity: %s\n", *buf);
+// exit(-4);
+ }
+ break;
+ case 0x7E : // ~
+ memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1);
+ buflen++;
+ it[0] = 0xcc;
+ it[1] = 0x83;
+ break;
+ }
+ }
+ }
+ else **buf = 0;
+ return !len;
+void outHeader();
+void outTrailer();
+void unicodeTicks(string &outstring);
+void prepLine(string &outstring, int currentTestament, bool note);
+string getNoteBody(int nfd, string &noteLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx);
+int replaceFirst(string &haystack, string needle, string replacement);
+int main(int argc, char **argv) {
+ LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName("es");
+ std::setlocale(LC_CTYPE, "");
+ // Let's test our command line arguments
+ if (argc < 2) {
+// fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]);
+ fprintf(stderr, "usage: %s <biblefile> [notesfile]\n\n", argv[0]);
+ exit(-1);
+ }
+ // Let's see if we can open our input file
+ int fd = open(argv[1], O_RDONLY|O_BINARY);
+ if (fd < 0) {
+ fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]);
+ exit(-2);
+ }
+ int fdn = -1;
+ if (argc > 2) {
+ fdn = open(argv[2], O_RDONLY|O_BINARY);
+ if (fdn < 0) {
+ fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]);
+ exit(-2);
+ }
+ }
+ outHeader();
+ string header;
+ char *buffer = 0;
+ char *nbuffer = 0;
+ int result = 0;
+ string currentBook = "";
+ int currentBookNo = 0;
+ int currentTestament = 0;
+ int currentChapter = 0;
+ int currentVerse = 0;
+ bool inBook = false;
+ bool inChapter = false;
+ bool inVerse = false;
+ string noteLine = "";
+ string preChapNote = "";
+ string outstring;
+ result = readline(fd, &buffer);
+ string lookahead = buffer;
+ unicodeTicks(lookahead);
+ prepLine(lookahead, currentTestament, false);
+ do {
+ result = readline(fd, &buffer);
+ if (lookahead.length()) {
+ string savebuf = buffer;
+ if (buffer)
+ delete [] buffer;
+ buffer = new char [ lookahead.length() + 1];
+ strcpy(buffer, lookahead.c_str());
+ lookahead = savebuf;
+ unicodeTicks(lookahead);
+ prepLine(lookahead, currentTestament, false);
+ result = 0;
+ }
+ else if (!result) {
+ string savebuf = buffer;
+ result = readline(fd, &buffer);
+ lookahead = buffer;
+ unicodeTicks(lookahead);
+ prepLine(lookahead, currentTestament, false);
+ if (buffer)
+ delete [] buffer;
+ buffer = new char [ savebuf.length() + 1];
+ strcpy(buffer, savebuf.c_str());
+ result = 0;
+ }
+ outstring = buffer;
+ if (!strncmp(outstring.c_str(), "<BN>", 4)) {
+ string book = outstring.c_str()+4;
+ book = book.substr(0, book.find_first_of("<"));
+ outstring = "";
+ if (inVerse) {
+ outstring += "</verse>";
+ inVerse = false;
+ }
+ if (inChapter) {
+ outstring += "</chapter>";
+ inChapter = false;
+ }
+ if (inBook) {
+ outstring += "</div>";
+ inBook = false;
+ }
+ outstring += (string)"<div type=\"book\" osisID=\"";
+ VerseKey bookName(book.c_str());
+ if (bookName.popError()) {
+ fprintf(stderr, "error: %s: couldn't find book match for %s. Please check book array in conversion program.\n", argv[0], book.c_str());
+ exit(-3);
+ }
+ currentBook = bookName.getOSISBookName();
+ outstring += currentBook;
+ currentTestament = bookName.getTestament()-1;
+ outstring += (string)"\"><title type=\"main\" subType=\"x-Book\">" + book + "</title>";
+ inBook = true;
+ }
+ if ((!strncmp(outstring.c_str(), "<CN>", 4)) || (!strncmp(outstring.c_str(), "<SN>", 4))) {
+ string chapterTitle = outstring.c_str()+4;
+ chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<"));
+ string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1);
+ outstring = "";
+ if (inVerse) {
+ outstring += "</verse>";
+ inVerse = false;
+ }
+ if (inChapter) {
+ outstring += "</chapter>";
+ inChapter = false;
+ }
+ outstring += (string)"<chapter osisID=\"" + currentBook + "." + chapter + "\">";
+ outstring += (string)"<title type=\"sub\" subType=\"x-Chapter\">" + chapterTitle + "</title>";
+ currentChapter = atoi(chapter.c_str());
+ inChapter = true;
+ currentVerse = 1;
+ }
+ //<SF>
+ if (!strncmp(outstring.c_str(), "<SF>", 4)) {
+ string heading = outstring.c_str()+4;
+ heading = heading.substr(0, heading.find("</SF>"));
+ outstring = "";
+ if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
+ lookahead.erase(0, 4);
+ outstring += "<milestone type=\"line\" subType=\"x-PM\"/>";
+ }
+ if (inVerse) {
+ outstring += "</verse>\n";
+ inVerse = false;
+ }
+ outstring += (string)"<title type=\"acrostic\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>";
+ }
+ //<SH>
+ if ((!strncmp(outstring.c_str(), "<SH>", 4)) || (!strncmp(outstring.c_str(), "<SHI>", 5))) {
+ bool shi = outstring.c_str()[3] == 'I';
+ if (shi) {
+ fprintf(stderr, "found shi.\n");
+ }
+ string heading = outstring.c_str()+(shi ? 5 : 4);
+ heading = heading.substr(0, heading.find(shi ? "</SHI>" : "</SH>"));
+ outstring = "";
+ if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
+ lookahead.erase(0, 4);
+ outstring += "<milestone type=\"line\" subType=\"x-PM\"/>";
+ }
+ if (inVerse) {
+ outstring += "</verse>\n";
+ inVerse = false;
+ }
+ outstring += (string)"<title type=\"section\"";
+ if (!shi) outstring += (string)" subType=\"x-preverse\"";
+ outstring += (string)">" + heading + (string)"</title>";
+ }
+ if (!strncmp(outstring.c_str(), "<SS>", 4)) {
+ string heading = (outstring.c_str()+4);
+ heading = heading.substr(0, heading.find("</SS>"));
+ outstring = (string)"<title type=\"psalm\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>";
+ }
+ if (!strncmp(outstring.c_str(), "<SB>", 4)) {
+ string heading = (outstring.c_str()+4);
+ heading = heading.substr(0, heading.find("</SB>"));
+ outstring = (string)"<title type=\"scope\" subType=\"x-preverse\">" + heading + (string)"</title>";
+ }
+ // {{x::y}}
+ // DUH, find_first_of looks for the first occurance of ANY single character of the supplied string
+// int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it!
+ const char *outstr = outstring.c_str();
+ const char *found = strstr(outstr, "{{");
+ int start = (found) ? (found - outstr) : -1;
+// ---- end of whacked replacement
+ if (start > -1) {
+ found = strstr(outstr, "}}");
+ int end = (found) ? (found - outstr) : -1;
+ end++;
+ int testmt = 0, book = 0, chap = 0;
+ string bkch = outstring.substr(start+2, end-start-2);
+ sscanf(bkch.c_str(), "%d::%d", &book, &chap);
+ currentChapter = chap;
+ int vNumEnd = outstring.find_first_of(" ", end);
+ currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str());
+ currentBookNo = book;
+ if (book > nasbMax[0]) {
+ testmt = 1;
+ book -= nasbMax[0];
+ }
+ if (currentBook != osisBooks[testmt][book-1]) {
+ fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d::%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str());
+ exit(-3);
+ }
+ char chapString[20], verseString[20];
+ sprintf(chapString, "%d", currentChapter);
+ sprintf(verseString, "%d", currentVerse);
+ string newstring = "";
+ if (inVerse) {
+ newstring += "</verse>";
+ inVerse = false;
+ }
+ newstring += "<verse osisID=\"" + currentBook + (string)"." + (string)chapString + (string)"." + (string)verseString + (string) "\">";
+ outstring.replace(start, vNumEnd-start+1, newstring);
+ inVerse = true;
+ noteLine = preChapNote;
+ preChapNote = "";
+ }
+ // multiple occurances on a line stuff
+ while (1) {
+ // NOTE
+ outstr = outstring.c_str();
+ found = strstr(outstr, "<N");
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ int end = outstring.find_first_of(">", start+1);
+ string nStr = outstring.substr(start+2, end-start-2);
+ // NOTE <N#>
+ if (isdigit(nStr.c_str()[0]) || strchr("ABCDEFG", nStr.c_str()[0])) {
+ bool preChap = strchr("A", nStr.c_str()[0]);
+ if (preChap) noteLine = "";
+ char chapString[20], verseString[20];
+ sprintf(chapString, "%d", currentChapter);
+ sprintf(verseString, "%d", currentVerse);
+ string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str();
+ string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N");
+ if (preChap) preChapNote = noteLine;
+ outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>");
+// outstring.replace(start, end-start+1, (string)"--note--");
+ continue;
+ }
+ }
+ // <RS>
+ if (replaceFirst(outstring, "<RS>", "<q who=\"Jesus\">") > -1) continue;
+ // <RS>
+ if (replaceFirst(outstring, "</RS>", "</q>") > -1) continue;
+ // <RT>
+ if (replaceFirst(outstring, "<RT>", "<milestone type=\"x-RT\"/>") > -1) continue;
+ if (replaceFirst(outstring, "<SHI>", "<title type=\"section\">") > -1) continue;
+ if (replaceFirst(outstring, "</SHI>", "</title>") > -1) continue;
+ // <?>
+ if (replaceFirst(outstring, "<?>", "¿") > -1) continue;
+ // <!>
+ if (replaceFirst(outstring, "<!>", "¡") > -1) continue;
+ outstr = outstring.c_str();
+ found = strstr(outstr, "<R");
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ int end = outstring.find_first_of(">", start+1);
+ string nStr = outstring.substr(start+2, end-start-2);
+ char chapString[20], verseString[20];
+ sprintf(chapString, "%d", currentChapter);
+ sprintf(verseString, "%d", currentVerse);
+ string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str();
+ string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R");
+ outstring.replace(start, end-start+1, (string)"<note type=\"crossReference\" osisID=\"" + osisID + (string)".xref." + nStr + (string) "\" n=\"" + nStr + (string)"\">" + noteBody + "</note>");
+ continue;
+ }
+ // transChange added {}
+ outstr = outstring.c_str();
+ found = strstr(outstr, "{");
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ outstring.replace(start, 1, (string)"<transChange type=\"added\">");
+ size_t end = outstring.find_first_of("}", start+1);
+ if (end != string::npos) {
+ outstring.erase(end, 1);
+ }
+ else end = outstring.size()-1;
+ while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--;
+ outstring.insert(end+1, "</transChange>");
+ continue;
+ }
+ // transChange tenseChange *
+ outstr = outstring.c_str();
+ found = strstr(outstr, "*");
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ outstring.replace(start, 1, (string)"<transChange type=\"tenseChanged\">");
+ for (end = start + 34; (end < outstring.length()); end++) {
+ if ((!isalpha(outstring[end])) &&
+ (outstring[end] != '\''))
+ break;
+ }
+ outstring.replace(end, 1, "</transChange>");
+ continue;
+ }
+ // <,>
+ if (replaceFirst(outstring, "<,>", "<milestone type=\"x-superiorComma\"/>") > -1) continue;
+ // <NA>
+ if (replaceFirst(outstring, "<NA>", "<milestone type=\"x-superiorOne\"/>") > -1) continue;
+ // <NB>
+ if (replaceFirst(outstring, "<NB>", "<milestone type=\"x-superiorTwo\"/>") > -1) continue;
+ // <NC>
+ if (replaceFirst(outstring, "<NC>", "<milestone type=\"x-superiorThree\"/>") > -1) continue;
+ // paragraph break <PM>
+ if (replaceFirst(outstring, "<PM>", "<milestone type=\"line\" subType=\"x-PM\"/>") > -1) continue;
+ // poetry break <PN>
+ if (replaceFirst(outstring, "<PN>", "<milestone type=\"x-Poetry\" />") > -1) continue;
+ // poetry break <PO>
+ if (replaceFirst(outstring, "<PO>", "<milestone type=\"line\" subType=\"x-Poetry\" />") > -1) continue;
+ // poetry break <PR>
+ if (replaceFirst(outstring, "<PR>", "<milestone type=\"x-PoetryEnd\" />") > -1) continue;
+ // letter indent <HL>
+ if (replaceFirst(outstring, "<HL>", "<milestone type=\"x-HL\" />") > -1) continue;
+ // letter indent <HLL>
+ if (replaceFirst(outstring, "<HLL>", "<milestone type=\"line\" subType=\"x-HLL\" />") > -1) continue;
+ break;
+ }
+ int strongsStart = 0;
+ int transChangeStart = 0;
+ bool strongsStartFound = false;
+ bool intoken = false;
+ bool intag = false;
+ bool inNote = false;
+ int tokenStart = 0;
+ string lastToken = "";
+ string previousToken = "";
+ int tenseChange = -1;
+ // strongs numbers
+ for (unsigned int i = 0; i < outstring.length(); ++i) {
+ if ((!strongsStartFound) && (!inNote) && (!intoken)) {
+ if (!intag) {
+ if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
+ strongsStart = i;
+ strongsStartFound = true;
+ }
+ }
+ else if (!strncmp(lastToken.c_str(), "hi", 2)) {
+ if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
+ strongsStart = tokenStart - 1;
+ strongsStartFound = true;
+ }
+ }
+ }
+ if (outstring[i] =='*')
+ tenseChange = i;
+ if (outstring[i] == '<') { tokenStart = i+1; intoken = true; }
+ if (outstring[i] == '>') {
+ intoken = false;
+ previousToken = lastToken;
+ lastToken = outstring.substr(tokenStart, i-tokenStart);
+ // Not completely safe, but works for current NASB data
+ if (strchr(lastToken.c_str(), '/'))
+ intag = false;
+ else intag = true;
+ if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) {
+ transChangeStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "seg", 3)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "divineName", 10)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "/divineName", 10)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "note", 4)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ inNote = true;
+ }
+ if (!strncmp(lastToken.c_str(), "/note", 5)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ inNote = false;
+ }
+ if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "/q", 2)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "/transChange", 12)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "milestone", 9)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "/seg", 4)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "verse", 5)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if ((!strncmp(lastToken.c_str(), "verse", 5))) {
+ intag = false;
+ }
+ if ( (!strncmp(lastToken.c_str(), "MG", 2)) ||
+ (!strncmp(lastToken.c_str(), "MH", 2))) {
+ // insert </w>
+ // fix tenseChange to be inside <w> so we can include a subset of the <w> content.
+ outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "</w></transChange>":"</w>"));
+ i = (tokenStart-1) + ((tenseChange > -1) ? 18:4);
+ // build <w ... > tag
+ char lang = lastToken[1]; // H or G
+ lastToken.replace(0, 1, "<w lemma=\"strong:");
+ while ((start = lastToken.find(", ")) > -1) {
+ lastToken.replace(start, 2, (string)" strong:" + lang);
+ }
+ lastToken += "\">";
+ intag = false;
+ if (tenseChange > -1) {
+ lastToken.insert(0, "<transChange type=\"tenseChange\">");
+ }
+ if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) {
+ outstring.insert(transChangeStart, lastToken);
+ intag = true;
+ i += lastToken.length() - 1; // (-1 because we're about to i++)
+ }
+ // insert our token
+ else {
+ outstring.insert(strongsStart, lastToken);
+ i += lastToken.length() - 1; // (-1 because we're about to i++)
+ }
+ strongsStart = i+1;
+ strongsStartFound = false;
+ if (tenseChange > -1) {
+ // relocate because position may have changed from all the token inserts
+ const char *buf = outstring.c_str();
+ tenseChange = (strchr(buf, '*') - buf);
+ outstring.erase(tenseChange, 1);
+ tenseChange = -1;
+ }
+ }
+ }
+ }
+ // clean up stuff that didn't work quite right
+ while (1) {
+ // divineName strongs tags misorderings
+ string target = "</w></divineName></seg>";
+ size_t s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "</divineName></seg></w>");
+ continue;
+ }
+ target = "</w>,</divineName></seg>";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "</divineName></seg></w>,");
+ continue;
+ }
+ break;
+ }
+ std::cout << outstring;
+ if (!result) std::cout << "\n";
+ }
+ while (!result);
+ outstring = "";
+ if (inVerse) {
+ outstring += "</verse>";
+ inVerse = false;
+ }
+ if (inChapter) {
+ outstring += "</chapter>";
+ inChapter = false;
+ }
+ if (inBook) {
+ outstring += "</div>";
+ inBook = false;
+ }
+ std::cout << outstring;
+ outTrailer();
+ // clean up our buffers that readline might have allocated
+ if (buffer)
+ delete [] buffer;
+ if (nbuffer)
+ delete [] nbuffer;
+ close(fd);
+ if (fdn > -1)
+ close(fdn);
+void outHeader() {
+std::cout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << "\n";
+std::cout << "<osis xmlns=\"\" xmlns:xsi=\"\" xsi:schemaLocation=\" osisCore.1.9.xsd\">" << "\n";
+std::cout << " <osisText osisIDWork=\"nasb\" xml:lang=\"en\">" << "\n";
+std::cout << " <header>" << "\n";
+std::cout << " <work osisWork=\"nasb\">" << "\n";
+std::cout << " <title>NEW AMERICAN STANDARD BIBLE</title>" << "\n";
+std::cout << " <identifier type=\"OSIS\">Bible.en.NASB.1995</identifier>" << "\n";
+std::cout << " <rights>Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION</rights>" << "\n";
+std::cout << " <refSystem>Bible</refSystem>" << "\n";
+std::cout << " </work>" << "\n";
+std::cout << " <work osisWork=\"strongs\">" << "\n";
+std::cout << " </work>" << "\n";
+std::cout << " </header>" << "\n";
+void outTrailer() {
+ std::cout << "</osisText>\n";
+ std::cout << "</osis>\n";
+void unicodeTicks(string &outstring) {
+ while (1) {
+ const char *outstr;
+ const char *found;
+ int start;
+ outstr = outstring.c_str();
+ found = strstr(outstr, "``");
+ char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0;
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ outstring.replace(start, 2, "“");
+ continue;
+ }
+ outstr = outstring.c_str();
+ found = strstr(outstr, "`");
+ uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0;
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ outstring.replace(start, 1, uchar);
+ continue;
+ }
+ outstr = outstring.c_str();
+ found = strstr(outstr, "'");
+ uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0;
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ outstring.replace(start, 1, uchar);
+ continue;
+ }
+ outstr = outstring.c_str();
+ found = strstr(outstr, "\"");
+ uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0;
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ outstring.replace(start, 1, uchar);
+ continue;
+ }
+ break;
+ }
+// return offset of occurence replace; otherwise -1
+int replaceFirst(string &haystack, string needle, string replacement) {
+ const char *outstr = haystack.c_str();
+ const char *found = strstr(outstr, needle.c_str());
+ int start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ haystack.replace(start, needle.size(), replacement);
+ }
+ return start;
+void prepLine(string &outstring, int currentTestament, bool note) {
+ int end = 0;
+ while (1) {
+// ------------------------------------------
+// redundant or unneeded or unknown markers
+ size_t s;
+ // <1EVA>
+ if (replaceFirst(outstring, "<1EVA>", "") > -1) continue;
+ // <1EVB>
+ if (replaceFirst(outstring, "<1EVB>", "") > -1) continue;
+ // <FA>
+ if (replaceFirst(outstring, "<FA>", "") > -1) continue;
+ // <PR>
+ if (replaceFirst(outstring, "<PR>", "") > -1) continue;
+ // <V>
+ if (replaceFirst(outstring, "<V>", "") > -1) continue;
+ // <T>
+ if (replaceFirst(outstring, "<T>", "") > -1) continue;
+ // <P>
+ if (replaceFirst(outstring, "<P>", "") > -1) continue;
+ // <C>
+ if (replaceFirst(outstring, "<C>", "") > -1) continue;
+ // <CC>
+ if (replaceFirst(outstring, "<CC>", "") > -1) continue;
+ // <CP>
+ if (replaceFirst(outstring, "<CP>", "") > -1) continue;
+ // <$F...>>
+ s = outstring.find("<$F");
+ if (s != string::npos) {
+ size_t e = outstring.find(">>", s);
+ outstring.erase(s, e-s+2);
+ continue;
+ }
+ // <EOV>
+ s = outstring.find("<EOV>");
+ if (s != string::npos) {
+ size_t e = outstring.find("</EOV>", s);
+ outstring.erase(s, e-s+6);
+ continue;
+ }
+// ----------------------------------------------
+ // <A>
+ if (replaceFirst(outstring, "<A>", "<milestone type=\"line\" subType=\"x-A\"/>") > -1) continue;
+ // ~“
+ char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0;
+// string target = "~“";
+ string target = uchar;
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
+ continue;
+ }
+ // +«
+ target = "+«";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>");
+ continue;
+ }
+ // +»
+ target = "+»";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>");
+ continue;
+ }
+ // +“
+ target = "+“";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
+ continue;
+ }
+ // +”
+ target = "+”";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>");
+ continue;
+ }
+ // +‘
+ target = "+‘";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
+ continue;
+ }
+ // +’
+ target = "+’";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>");
+ continue;
+ }
+ // -«
+ target = "-«";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"«\"/>");
+ continue;
+ }
+ // -»
+ target = "-»";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"»\"/>");
+ continue;
+ }
+ // -“
+ target = "-“";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"“\"/>");
+ continue;
+ }
+ // -”
+ target = "-”";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"”\"/>");
+ continue;
+ }
+ // -‘
+ target = "-‘";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"‘\"/>");
+ continue;
+ }
+ // -’
+ target = "-’";
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"’\"/>");
+ continue;
+ }
+ // ~‘
+ uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x98; uchar[5]=0;
+// target = "~‘";
+ target = uchar;
+ s = outstring.find(target);
+ if (s != string::npos) {
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
+ continue;
+ }
+ if (replaceFirst(outstring, "<B>", "<hi type=\"bold\">") > -1) continue;
+ if (replaceFirst(outstring, "</B>", "</hi>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD}</>'<\\>{S}</>", "<seg><divineName>Lord's</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD’S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S} </>", "<seg><divineName>Lord’s</divineName></seg> ") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>ORD</>’<\\>S<MH3068></>", "<seg><divineName>Lord’s<MH3068></divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD,}</>", "<seg><divineName>Lord</divineName></seg>,") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD} </>", "<seg><divineName>Lord</divineName></seg> ") > -1) continue;
+ if (replaceFirst(outstring, "L}<\\>{ORD}</>{", "<seg><divineName>Lord</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L}<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>}") > -1) continue;
+ if (replaceFirst(outstring, "S<\\>{EN~OR}</>", "<seg><divineName>Sen~or</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "S<\\>{EÑOR}</>", "<seg><divineName>Señor</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "Y<\\>{AH,}</>", "<seg><divineName>Yah</divineName></seg>,") > -1) continue;
+ if (replaceFirst(outstring, "Y<\\>{AH,} </>", "<seg><divineName>Yah</divineName></seg>, ") > -1) continue;
+ if (replaceFirst(outstring, "Y<\\>{AH}</>", "<seg><divineName>Yah</divineName></seg>") > -1) continue;
+ // Do these first before Daniel Inscriptions
+ // LB = add macron, only with 'a': ā
+ if (replaceFirst(outstring, "a<LB>", "ā") > -1) continue;
+ if (replaceFirst(outstring, "E<LE>", "Ē") > -1) continue;
+ if (replaceFirst(outstring, "e<LE>", "ē") > -1) continue;
+ if (replaceFirst(outstring, "MENE", "<hi type=\"inscription\">Mene</hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENE</>", "<hi type=\"inscription\">Mene</hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENE:</>", "<hi type=\"inscription\">Mene</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "TEKEL", "<hi type=\"inscription\">Tekel</hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKEL</>", "<hi type=\"inscription\">Tekel</hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKEL:</>", "<hi type=\"inscription\">Tekel</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "PERES", "<hi type=\"inscription\">Peres</hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERES</>", "<hi type=\"inscription\">Peres</hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERES:</>", "<hi type=\"inscription\">Peres</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "MENĒ", "<hi type=\"inscription\">Menē</hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENĒ</>", "<hi type=\"inscription\">Menē</hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENĒ:</>", "<hi type=\"inscription\">Menē</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "TEKĒL", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKĒL</>", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKĒL:</>", "<hi type=\"inscription\">Tekēl</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "PERĒS", "<hi type=\"inscription\">Perēs</hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERĒS</>", "<hi type=\"inscription\">Perēs</hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERĒS:</>", "<hi type=\"inscription\">Perēs</hi>:") > -1) continue;
+ const char *outstr = outstring.c_str();
+ const char *found = strstr(outstr+end, "<\\>");
+ int start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ for (--start;start;start--) {
+ if ((!std::isupper(outstring[start])) &&
+ (!strchr("\\/ ~", outstring[start]))) {
+ break;
+ }
+ }
+ for (start++; outstring[start] == ' '; start++);
+ if (currentTestament) {
+ outstring.insert(start, "<seg type=\"otPassage\">");
+ start += 22;
+ }
+ else {
+ outstring.insert(start, "<seg><divineName>");
+ start += 17;
+ int s = replaceFirst(outstring, "L<\\>{ORD}</>", "Lord");
+ if (s > -1) end = s+4;
+ }
+ // do small cap logic
+ bool lower = false;
+ string token = "";
+ for (int charLen = 1; start < (int)outstring.length(); start += charLen) {
+ const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start;
+ const unsigned char *endChar = startChar;
+ SW_u32 testChar = getUniCharFromUTF8(&endChar, true);
+ charLen = endChar - startChar; // set the size of the UTF-8 sequence
+ if (!token.size()) {
+ if (testChar == '<') {
+ token = "<";
+ continue;
+ }
+ // what is this? It screws MENE MENE up in Daniel
+// if (testChar == ':')
+// break;
+ if (StringMgr::getSystemStringMgr()->isAlpha(testChar)) {
+ if (StringMgr::getSystemStringMgr()->isLower(testChar))
+ break;
+ if (lower)
+ outstring.replace(start, charLen, SWBuf((const char *)startChar, charLen).toLower());
+ continue;
+ }
+ }
+ else {
+ token += testChar;
+ if (testChar == '>') {
+ if (token == "<\\>") {
+ lower = true;
+ outstring.erase(start-2, 3);
+ start -= 3;
+ }
+ if (token == "</>") {
+ lower = false;
+ outstring.erase(start-2, 3);
+ end = start - 2;
+ start -= 3;
+ unsigned int nextStrongs = outstring.find("<M");
+ unsigned int nextUp = outstring.find("</>");
+ if (nextStrongs != string::npos && nextUp != string::npos && nextStrongs < nextUp) {
+ break;
+ }
+ }
+ // end divineName if we hit a PO in the middle
+ if (token == "<PO>") {
+ break;
+ }
+ unsigned int s = token.find("<N");
+ if (s == string::npos || s > 0) s = token.find("<R");
+ if (s == 0 && token.size() > 2) {
+ if (StringMgr::getSystemStringMgr()->isDigit(token[2])) {
+ break;
+ }
+ }
+ token = "";
+ }
+ }
+ }
+ if (currentTestament) {
+ outstring.insert(end, "</seg>");
+ end+=6;
+ }
+ else {
+ outstring.insert(end, "</divineName></seg>");
+ end+=19;
+ }
+ continue;
+ }
+ // these are places where we unnecessarily stop and then start otPassage
+ // we could make the otPassage logic work better, but these exception clean
+ // thing up for now.
+ if (replaceFirst(outstring, "</seg>’<seg type=\"otPassage\">s", "’s") > -1) continue;
+ if (replaceFirst(outstring, "</seg>-<seg type=\"otPassage\">", "-") > -1) continue;
+ if (replaceFirst(outstring, "</seg>,<seg type=\"otPassage\">", ",") > -1) continue;
+ if (replaceFirst(outstring, "</seg>, <seg type=\"otPassage\">", ", ") > -1) continue;
+ if (replaceFirst(outstring, "</seg>! <seg type=\"otPassage\">", "! ") > -1) continue;
+ if (replaceFirst(outstring, "</seg>; <seg type=\"otPassage\">", "; ") > -1) continue;
+ if (replaceFirst(outstring, "</seg> <seg type=\"otPassage\">", " ") > -1) continue;
+ if (replaceFirst(outstring, "</seg>, ‘<seg type=\"otPassage\">", ", ‘") > -1) continue;
+ if (replaceFirst(outstring, "</seg>,’ <seg type=\"otPassage\">", ",’ ") > -1) continue;
+ if (note) {
+ outstr = outstring.c_str();
+ found = strstr(outstr, "{");
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ outstring.replace(start, 1, "<hi type=\"italic\">");
+ continue;
+ }
+ outstr = outstring.c_str();
+ found = strstr(outstr, "}");
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ outstring.replace(start, 1, "</hi>");
+ continue;
+ }
+ s = outstring.find("</reference></hi>");
+ if (s != string::npos) {
+ const size_t s2 = outstring.find("<hi type=\"italic\"><reference");
+ if (s2 == string::npos) {
+ outstring.replace(s, 17, "</hi></reference>");
+ continue;
+ }
+ }
+ }
+// if (replaceFirst(outstring, ")</hi>", "</hi>)") > -1) continue;
+ break;
+ }
+string getNoteBody(int fdn, string &noteLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) {
+ char *nbuffer = 0;
+ int start = -1;
+ const char *found = (const char *)-1;
+ const char *outstr = (const char *)-1;
+ while (start == -1) {
+ if (!noteLine.length() && fdn > -1) {
+ if (readline(fdn, &nbuffer)) return ""; // eof
+ noteLine = nbuffer;
+ }
+ outstr = noteLine.c_str();
+ found = strstr(outstr, "{{");
+ start = (found) ? (found - outstr) : -1;
+ // be sure we have at least one of these. We've found note lines without any actual notes
+ if (found) found = strstr(outstr, "<R");
+ if (!found) found = strstr(outstr, "<N");
+ if (!found) start = -1;
+ if (start == -1) noteLine = "";
+ }
+ if (start > -1) {
+ found = strstr(outstr, "}}");
+ int end = (found) ? (found - outstr) : -1;
+ end++;
+ int book, chap;
+ string bkch = noteLine.substr(start+2, end-start-2);
+ sscanf(bkch.c_str(), "%d::%d", &book, &chap);
+ int vNumEnd = noteLine.find_first_of(" ", end);
+ int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str());
+ if ((book != currentBookNo) || (chap != currentChapter) || ((verse != currentVerse) && /*kindof allow case in header before verse marker */ (verse != currentVerse+1))) {
+ fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str());
+ exit(-1);
+ }
+ }
+ else {
+ fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str());
+ exit(-1);
+ }
+ outstr = noteLine.c_str();
+ string tag = (string)"<"+(string)nx+nStr+(string)">";
+ found = strstr(outstr, tag.c_str());
+ start = (found) ? (found - outstr) : -1;
+ string retVal = "";
+ if (start > -1) {
+ start += tag.length();
+ const char *nFound = strstr(outstr+start, " <N");
+ const char *rFound = strstr(outstr+start, " <R");
+ found = (nFound && (!rFound || nFound < rFound)) ? nFound : rFound;
+ int end = (found) ? (found - outstr) : -1;
+ if (end<0) end = noteLine.length();
+ retVal = noteLine.substr(start, end-start);
+ }
+ unicodeTicks(retVal);
+ if (*nx == 'R') {
+ // } { get's deleted. e.g. {Luke} {9:10-17}
+ outstr = retVal.c_str();
+ found = strstr(outstr, "} {");
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ retVal.replace(start, 3, " ");
+ }
+ outstr = retVal.c_str();
+ found = strstr(outstr, ";}");
+ start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ retVal.replace(start, 2, "};");
+ }
+ VerseKey key = osisID.c_str();
+//std::cerr << osisID.c_str() << ": Convert to OSISRef: " << retVal.c_str();
+ retVal = VerseKey::convertToOSIS(retVal.c_str(), &key);
+//std::cerr << ": " << retVal.c_str();
+ }
+ prepLine(retVal, 0, true);
+ if (nbuffer)
+ delete [] nbuffer;
+//std::cerr << ": " << retVal.c_str() << "\n";
+ return retVal;