diff options
Diffstat (limited to 'modules/nasb1995/lockosis.cpp')
-rw-r--r-- | modules/nasb1995/lockosis.cpp | 1233 |
1 files changed, 1233 insertions, 0 deletions
diff --git a/modules/nasb1995/lockosis.cpp b/modules/nasb1995/lockosis.cpp new file mode 100644 index 0000000..4209a8f --- /dev/null +++ b/modules/nasb1995/lockosis.cpp @@ -0,0 +1,1233 @@ +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <clocale> +#include <locale> + + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <iostream> +#include <string> + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#ifdef HAVESWORD +#include <versekey.h> +#include <localemgr.h> +#include <stringmgr.h> +using namespace sword; +#endif + +using std::string; +using std::cout; +using std::endl; + +static const char *osisOTBooks[] = { + "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", + "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", + "2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov", + "Eccl", "Song", "Isa", "Jer", "Lam", + "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", + "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal", +// extra-Biblical + "Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth", + "EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan", + "Ps151", "Sir", "Tob", "Wis"}; +static const char *osisNTBooks[] = { + "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", + "2Cor", "Gal", "Eph", "Phil", "Col", + "1Thess", "2Thess", "1Tim", "2Tim", "Titus", + "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", + "3John", "Jude", "Rev"}; +static const char **osisBooks[] = { osisOTBooks, osisNTBooks }; + +const char nasbMax[2] = {39, 27}; + +char readline(int fd, char **buf) { + char ch; + if (*buf) + delete [] *buf; + *buf = 0; + int len; + + + long index = lseek(fd, 0, SEEK_CUR); + // clean up any preceding white space + while ((len = read(fd, &ch, 1)) == 1) { + if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t')) + break; + else index++; + } + + + while (ch != 10) { + if ((len = read(fd, &ch, 1)) != 1) + break; + } + + int size = (lseek(fd, 0, SEEK_CUR) - index) - 1; + + *buf = new char [ (size+2) * 2 ]; + + if (size > 0) { + lseek(fd, index, SEEK_SET); + read(fd, *buf, size); + read(fd, &ch, 1); //pop terminating char + (*buf)[size] = 0; + + // clean up any trailing junk on buf + int buflen = strlen(*buf); + for (char *it = *buf+(buflen-1); it > *buf; it--) { + if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t')) + break; + else *it = 0; + } + // convert all spanish characters to combined + for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) { + switch (*it) { +/* + case 0xE2 : // ‘ + if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) { + memmove(it, it+1, buflen - (it-(unsigned char *)*buf)); + buflen--; + it[0] = 0xcc; + it[1] = 0x80; // yeah, I know it's already 0x80, but that's just a coincidence + } + else { + fprintf(stderr, "oddity: %s\n", *buf); + exit(-4); + } + break; +*/ + case 0x60 : // ` + if (isalpha(it[-1])) { + memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); + buflen++; + it[0] = 0xcc; + it[1] = 0x80; + } + else { +// fprintf(stderr, "oddity: %s\n", *buf); +// exit(-4); + } + break; + case 0x7E : // ~ + memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); + buflen++; + it[0] = 0xcc; + it[1] = 0x83; + break; + } + } + } + else **buf = 0; + return !len; +} + + +void outHeader(); +void outTrailer(); +void unicodeTicks(string &outstring); +void prepLine(string &outstring, int currentTestament, bool note); +string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx); +int replaceFirst(string &haystack, string needle, string replacement); + + + + +int main(int argc, char **argv) { + +#ifdef HAVESWORD + LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName("es"); +#endif + + std::setlocale(LC_CTYPE, ""); + + // Let's test our command line arguments + if (argc < 2) { +// fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]); + fprintf(stderr, "usage: %s <biblefile> [notesfile]\n\n", argv[0]); + exit(-1); + } + + + // Let's see if we can open our input file + int fd = open(argv[1], O_RDONLY|O_BINARY); + if (fd < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]); + exit(-2); + } + + int fdn = -1; + if (argc > 2) { + fdn = open(argv[2], O_RDONLY|O_BINARY); + if (fdn < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]); + exit(-2); + } + } + + outHeader(); + + string header; + char *buffer = 0; + char *nbuffer = 0; + int result = 0; + string currentBook = ""; + int currentBookNo = 0; + int currentTestament = 0; + int currentChapter = 0; + int currentVerse = 0; + bool inBook = false; + bool inChapter = false; + bool inVerse = false; + string noteLine = ""; + string preChapNote = ""; + string outstring; + result = readline(fd, &buffer); + string lookahead = buffer; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + do { + result = readline(fd, &buffer); + if (lookahead.length()) { + string savebuf = buffer; + if (buffer) + delete [] buffer; + buffer = new char [ lookahead.length() + 1]; + strcpy(buffer, lookahead.c_str()); + lookahead = savebuf; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + result = 0; + } + else if (!result) { + string savebuf = buffer; + result = readline(fd, &buffer); + lookahead = buffer; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + + + if (buffer) + delete [] buffer; + buffer = new char [ savebuf.length() + 1]; + strcpy(buffer, savebuf.c_str()); + result = 0; + } + + outstring = buffer; + + + + // BOOK NAMES <BN> + if (!strncmp(outstring.c_str(), "<BN>", 4)) { + string book = outstring.c_str()+4; + book = book.substr(0, book.find_first_of("<")); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + if (inBook) { + outstring += "</div>"; + inBook = false; + } + outstring += (string)"<div type=\"book\" osisID=\""; + + VerseKey bookName(book.c_str()); + if (bookName.popError()) { + fprintf(stderr, "error: %s: couldn't find book match for %s. Please check book array in conversion program.\n", argv[0], book.c_str()); + exit(-3); + } + currentBook = bookName.getOSISBookName(); + outstring += currentBook; + currentTestament = bookName.getTestament()-1; + + outstring += (string)"\"><title type=\"main\" subType=\"x-Book\">" + book + "</title>"; + inBook = true; + } + + + // CHAPTERS + //<SN>PSALM + if ((!strncmp(outstring.c_str(), "<CN>", 4)) || (!strncmp(outstring.c_str(), "<SN>", 4))) { + string chapterTitle = outstring.c_str()+4; + chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<")); + string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + outstring += (string)"<chapter osisID=\"" + currentBook + "." + chapter + "\">"; + outstring += (string)"<title type=\"sub\" subType=\"x-Chapter\">" + chapterTitle + "</title>"; + currentChapter = atoi(chapter.c_str()); + inChapter = true; + currentVerse = 1; + } + + //<SF> + if (!strncmp(outstring.c_str(), "<SF>", 4)) { + string heading = outstring.c_str()+4; + heading = heading.substr(0, heading.find("</SF>")); + outstring = ""; + + if (!strncmp(lookahead.c_str(), "<PM>", 4)) { + lookahead.erase(0, 4); + outstring += "<milestone type=\"line\" subType=\"x-PM\"/>"; + } + if (inVerse) { + outstring += "</verse>\n"; + inVerse = false; + } + outstring += (string)"<title type=\"acrostic\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + + //<SH> + if ((!strncmp(outstring.c_str(), "<SH>", 4)) || (!strncmp(outstring.c_str(), "<SHI>", 5))) { + bool shi = outstring.c_str()[3] == 'I'; + if (shi) { + fprintf(stderr, "found shi.\n"); + } + string heading = outstring.c_str()+(shi ? 5 : 4); + heading = heading.substr(0, heading.find(shi ? "</SHI>" : "</SH>")); + outstring = ""; + + if (!strncmp(lookahead.c_str(), "<PM>", 4)) { + lookahead.erase(0, 4); + outstring += "<milestone type=\"line\" subType=\"x-PM\"/>"; + } + if (inVerse) { + outstring += "</verse>\n"; + inVerse = false; + } + outstring += (string)"<title type=\"section\""; + if (!shi) outstring += (string)" subType=\"x-preverse\""; + outstring += (string)">" + heading + (string)"</title>"; + } + if (!strncmp(outstring.c_str(), "<SS>", 4)) { + string heading = (outstring.c_str()+4); + heading = heading.substr(0, heading.find("</SS>")); + outstring = (string)"<title type=\"psalm\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + if (!strncmp(outstring.c_str(), "<SB>", 4)) { + string heading = (outstring.c_str()+4); + heading = heading.substr(0, heading.find("</SB>")); + outstring = (string)"<title type=\"scope\" subType=\"x-preverse\">" + heading + (string)"</title>"; + } + + + + // {{x::y}} + // DUH, find_first_of looks for the first occurance of ANY single character of the supplied string +// int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it! + const char *outstr = outstring.c_str(); + const char *found = strstr(outstr, "{{"); + int start = (found) ? (found - outstr) : -1; +// ---- end of whacked replacement + + if (start > -1) { + found = strstr(outstr, "}}"); + int end = (found) ? (found - outstr) : -1; + end++; + int testmt = 0, book = 0, chap = 0; + string bkch = outstring.substr(start+2, end-start-2); + sscanf(bkch.c_str(), "%d::%d", &book, &chap); + currentChapter = chap; + int vNumEnd = outstring.find_first_of(" ", end); + currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str()); + currentBookNo = book; + if (book > nasbMax[0]) { + testmt = 1; + book -= nasbMax[0]; + } + if (currentBook != osisBooks[testmt][book-1]) { + fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d::%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); + exit(-3); + } + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string newstring = ""; + if (inVerse) { + newstring += "</verse>"; + inVerse = false; + } + newstring += "<verse osisID=\"" + currentBook + (string)"." + (string)chapString + (string)"." + (string)verseString + (string) "\">"; + outstring.replace(start, vNumEnd-start+1, newstring); + inVerse = true; + noteLine = preChapNote; + preChapNote = ""; + } + + + + // multiple occurances on a line stuff + while (1) { + + // NOTE + outstr = outstring.c_str(); + found = strstr(outstr, "<N"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + int end = outstring.find_first_of(">", start+1); + string nStr = outstring.substr(start+2, end-start-2); + + // NOTE <N#> + if (isdigit(nStr.c_str()[0]) || strchr("ABCDEFG", nStr.c_str()[0])) { + bool preChap = strchr("A", nStr.c_str()[0]); + if (preChap) noteLine = ""; + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); + string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N"); + if (preChap) preChapNote = noteLine; + outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>"); +// outstring.replace(start, end-start+1, (string)"--note--"); + continue; + } + } + + + + // <RS> + if (replaceFirst(outstring, "<RS>", "<q who=\"Jesus\">") > -1) continue; + + // <RS> + if (replaceFirst(outstring, "</RS>", "</q>") > -1) continue; + + // <RT> + if (replaceFirst(outstring, "<RT>", "<milestone type=\"x-RT\"/>") > -1) continue; + + if (replaceFirst(outstring, "<SHI>", "<title type=\"section\">") > -1) continue; + if (replaceFirst(outstring, "</SHI>", "</title>") > -1) continue; + + // <?> + if (replaceFirst(outstring, "<?>", "¿") > -1) continue; + + // <!> + if (replaceFirst(outstring, "<!>", "¡") > -1) continue; + + outstr = outstring.c_str(); + found = strstr(outstr, "<R"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + int end = outstring.find_first_of(">", start+1); + string nStr = outstring.substr(start+2, end-start-2); + + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); + string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R"); + outstring.replace(start, end-start+1, (string)"<note type=\"crossReference\" osisID=\"" + osisID + (string)".xref." + nStr + (string) "\" n=\"" + nStr + (string)"\">" + noteBody + "</note>"); + continue; + } + // transChange added {} + outstr = outstring.c_str(); + found = strstr(outstr, "{"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 1, (string)"<transChange type=\"added\">"); + size_t end = outstring.find_first_of("}", start+1); + if (end != string::npos) { + outstring.erase(end, 1); + } + else end = outstring.size()-1; + while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--; + outstring.insert(end+1, "</transChange>"); + + continue; + + } +/* + // transChange tenseChange * + outstr = outstring.c_str(); + found = strstr(outstr, "*"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 1, (string)"<transChange type=\"tenseChanged\">"); + for (end = start + 34; (end < outstring.length()); end++) { + if ((!isalpha(outstring[end])) && + (outstring[end] != '\'')) + break; + } + outstring.replace(end, 1, "</transChange>"); + continue; + + } +*/ + // <,> + if (replaceFirst(outstring, "<,>", "<milestone type=\"x-superiorComma\"/>") > -1) continue; + + // <NA> + if (replaceFirst(outstring, "<NA>", "<milestone type=\"x-superiorOne\"/>") > -1) continue; + + // <NB> + if (replaceFirst(outstring, "<NB>", "<milestone type=\"x-superiorTwo\"/>") > -1) continue; + + // <NC> + if (replaceFirst(outstring, "<NC>", "<milestone type=\"x-superiorThree\"/>") > -1) continue; + + // paragraph break <PM> + if (replaceFirst(outstring, "<PM>", "<milestone type=\"line\" subType=\"x-PM\"/>") > -1) continue; + + // poetry break <PN> + if (replaceFirst(outstring, "<PN>", "<milestone type=\"x-Poetry\" />") > -1) continue; + + // poetry break <PO> + if (replaceFirst(outstring, "<PO>", "<milestone type=\"line\" subType=\"x-Poetry\" />") > -1) continue; + + // poetry break <PR> + if (replaceFirst(outstring, "<PR>", "<milestone type=\"x-PoetryEnd\" />") > -1) continue; + + // letter indent <HL> + if (replaceFirst(outstring, "<HL>", "<milestone type=\"x-HL\" />") > -1) continue; + + // letter indent <HLL> + if (replaceFirst(outstring, "<HLL>", "<milestone type=\"line\" subType=\"x-HLL\" />") > -1) continue; + break; + } + + int strongsStart = 0; + int transChangeStart = 0; + bool strongsStartFound = false; + bool intoken = false; + bool intag = false; + bool inNote = false; + int tokenStart = 0; + string lastToken = ""; + string previousToken = ""; + int tenseChange = -1; + // strongs numbers + for (unsigned int i = 0; i < outstring.length(); ++i) { + if ((!strongsStartFound) && (!inNote) && (!intoken)) { + if (!intag) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = i; + strongsStartFound = true; + } + } + else if (!strncmp(lastToken.c_str(), "hi", 2)) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = tokenStart - 1; + strongsStartFound = true; + } + } + + } + if (outstring[i] =='*') + tenseChange = i; + if (outstring[i] == '<') { tokenStart = i+1; intoken = true; } + if (outstring[i] == '>') { + intoken = false; + previousToken = lastToken; + lastToken = outstring.substr(tokenStart, i-tokenStart); + // Not completely safe, but works for current NASB data + if (strchr(lastToken.c_str(), '/')) + intag = false; + else intag = true; + if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) { + transChangeStart = i+1; + } +/* + if (!strncmp(lastToken.c_str(), "seg", 3)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "divineName", 10)) { + strongsStartFound = false; + strongsStart = i+1; + } +*/ + if (!strncmp(lastToken.c_str(), "/divineName", 10)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "note", 4)) { + strongsStartFound = false; + strongsStart = i+1; + inNote = true; + } + if (!strncmp(lastToken.c_str(), "/note", 5)) { + strongsStartFound = false; + strongsStart = i+1; + inNote = false; + } + if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/q", 2)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/transChange", 12)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "milestone", 9)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/seg", 4)) { + strongsStartFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "verse", 5)) { + strongsStartFound = false; + strongsStart = i+1; + } + + if ((!strncmp(lastToken.c_str(), "verse", 5))) { + intag = false; + } + + if ( (!strncmp(lastToken.c_str(), "MG", 2)) || + (!strncmp(lastToken.c_str(), "MH", 2))) { + + // insert </w> + // fix tenseChange to be inside <w> so we can include a subset of the <w> content. + outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "</w></transChange>":"</w>")); + i = (tokenStart-1) + ((tenseChange > -1) ? 18:4); + + // build <w ... > tag + char lang = lastToken[1]; // H or G + lastToken.replace(0, 1, "<w lemma=\"strong:"); + while ((start = lastToken.find(", ")) > -1) { + lastToken.replace(start, 2, (string)" strong:" + lang); + } + lastToken += "\">"; + intag = false; + + + if (tenseChange > -1) { + lastToken.insert(0, "<transChange type=\"tenseChange\">"); + } + if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) { + outstring.insert(transChangeStart, lastToken); + intag = true; + i += lastToken.length() - 1; // (-1 because we're about to i++) + } + + // insert our token + else { + outstring.insert(strongsStart, lastToken); + i += lastToken.length() - 1; // (-1 because we're about to i++) + } + strongsStart = i+1; + strongsStartFound = false; + if (tenseChange > -1) { + // relocate because position may have changed from all the token inserts + const char *buf = outstring.c_str(); + tenseChange = (strchr(buf, '*') - buf); + outstring.erase(tenseChange, 1); + tenseChange = -1; + } + } + } + } + + + // clean up stuff that didn't work quite right + while (1) { + + // divineName strongs tags misorderings + string target = "</w></divineName></seg>"; + size_t s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "</divineName></seg></w>"); + continue; + } + target = "</w>,</divineName></seg>"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "</divineName></seg></w>,"); + continue; + } + + break; + } + + + std::cout << outstring; + if (!result) std::cout << "\n"; + } + while (!result); + outstring = ""; + if (inVerse) { + outstring += "</verse>"; + inVerse = false; + } + if (inChapter) { + outstring += "</chapter>"; + inChapter = false; + } + if (inBook) { + outstring += "</div>"; + inBook = false; + } + std::cout << outstring; + + outTrailer(); + + // clean up our buffers that readline might have allocated + if (buffer) + delete [] buffer; + if (nbuffer) + delete [] nbuffer; + + close(fd); + + if (fdn > -1) + close(fdn); +} + +void outHeader() { + +std::cout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << "\n"; +std::cout << "<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.1.9.xsd\">" << "\n"; +std::cout << " <osisText osisIDWork=\"nasb\" xml:lang=\"en\">" << "\n"; +std::cout << " <header>" << "\n"; +std::cout << " <work osisWork=\"nasb\">" << "\n"; +std::cout << " <title>NEW AMERICAN STANDARD BIBLE</title>" << "\n"; +std::cout << " <identifier type=\"OSIS\">Bible.en.NASB.1995</identifier>" << "\n"; +std::cout << " <rights>Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION</rights>" << "\n"; +std::cout << " <refSystem>Bible</refSystem>" << "\n"; +std::cout << " </work>" << "\n"; +std::cout << " <work osisWork=\"strongs\">" << "\n"; +std::cout << " </work>" << "\n"; +std::cout << " </header>" << "\n"; + +} + +void outTrailer() { + std::cout << "</osisText>\n"; + std::cout << "</osis>\n"; +} + +void unicodeTicks(string &outstring) { + + while (1) { + const char *outstr; + const char *found; + int start; + + outstr = outstring.c_str(); + found = strstr(outstr, "``"); + char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 2, "“"); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "`"); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "'"); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "\""); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + break; + } +} + + +// return offset of occurence replace; otherwise -1 +int replaceFirst(string &haystack, string needle, string replacement) { + const char *outstr = haystack.c_str(); + const char *found = strstr(outstr, needle.c_str()); + int start = (found) ? (found - outstr) : -1; + if (start > -1) { + haystack.replace(start, needle.size(), replacement); + } + return start; +} + + +void prepLine(string &outstring, int currentTestament, bool note) { + int end = 0; + while (1) { +// ------------------------------------------ +// redundant or unneeded or unknown markers + size_t s; + + // <1EVA> + if (replaceFirst(outstring, "<1EVA>", "") > -1) continue; + + // <1EVB> + if (replaceFirst(outstring, "<1EVB>", "") > -1) continue; + + // <FA> + if (replaceFirst(outstring, "<FA>", "") > -1) continue; + + // <PR> + if (replaceFirst(outstring, "<PR>", "") > -1) continue; + + // <V> + if (replaceFirst(outstring, "<V>", "") > -1) continue; + + // <T> + if (replaceFirst(outstring, "<T>", "") > -1) continue; + + // <P> + if (replaceFirst(outstring, "<P>", "") > -1) continue; + + // <C> + if (replaceFirst(outstring, "<C>", "") > -1) continue; + + // <CC> + if (replaceFirst(outstring, "<CC>", "") > -1) continue; + + // <CP> + if (replaceFirst(outstring, "<CP>", "") > -1) continue; + + // <$F...>> + s = outstring.find("<$F"); + if (s != string::npos) { + size_t e = outstring.find(">>", s); + outstring.erase(s, e-s+2); + continue; + } + // <EOV> + s = outstring.find("<EOV>"); + if (s != string::npos) { + size_t e = outstring.find("</EOV>", s); + outstring.erase(s, e-s+6); + continue; + } +// ---------------------------------------------- + + // <A> + if (replaceFirst(outstring, "<A>", "<milestone type=\"line\" subType=\"x-A\"/>") > -1) continue; + + // ~“ + char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0; +// string target = "~“"; + string target = uchar; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>"); + continue; + } + // +« + target = "+«"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>"); + continue; + } + // +» + target = "+»"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>"); + continue; + } + // +“ + target = "+“"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>"); + continue; + } + // +” + target = "+”"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>"); + continue; + } + // +‘ + target = "+‘"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>"); + continue; + } + // +’ + target = "+’"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>"); + continue; + } + // -« + target = "-«"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"«\"/>"); + continue; + } + // -» + target = "-»"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"»\"/>"); + continue; + } + // -“ + target = "-“"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"“\"/>"); + continue; + } + // -” + target = "-”"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"”\"/>"); + continue; + } + // -‘ + target = "-‘"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"‘\"/>"); + continue; + } + // -’ + target = "-’"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"’\"/>"); + continue; + } + + // ~‘ + uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x98; uchar[5]=0; +// target = "~‘"; + target = uchar; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>"); + continue; + } + if (replaceFirst(outstring, "<B>", "<hi type=\"bold\">") > -1) continue; + if (replaceFirst(outstring, "</B>", "</hi>") > -1) continue; + + if (replaceFirst(outstring, "L<\\>{ORD}</>'<\\>{S}</>", "<seg><divineName>Lord's</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD’S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S} </>", "<seg><divineName>Lord’s</divineName></seg> ") > -1) continue; + if (replaceFirst(outstring, "L<\\>ORD</>’<\\>S<MH3068></>", "<seg><divineName>Lord’s<MH3068></divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD,}</>", "<seg><divineName>Lord</divineName></seg>,") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L<\\>{ORD} </>", "<seg><divineName>Lord</divineName></seg> ") > -1) continue; + if (replaceFirst(outstring, "L}<\\>{ORD}</>{", "<seg><divineName>Lord</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "L}<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>}") > -1) continue; + if (replaceFirst(outstring, "S<\\>{EN~OR}</>", "<seg><divineName>Sen~or</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "S<\\>{EÑOR}</>", "<seg><divineName>Señor</divineName></seg>") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH,}</>", "<seg><divineName>Yah</divineName></seg>,") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH,} </>", "<seg><divineName>Yah</divineName></seg>, ") > -1) continue; + if (replaceFirst(outstring, "Y<\\>{AH}</>", "<seg><divineName>Yah</divineName></seg>") > -1) continue; + + // Do these first before Daniel Inscriptions + // LB = add macron, only with 'a': ā + if (replaceFirst(outstring, "a<LB>", "ā") > -1) continue; + if (replaceFirst(outstring, "E<LE>", "Ē") > -1) continue; + if (replaceFirst(outstring, "e<LE>", "ē") > -1) continue; + + if (replaceFirst(outstring, "MENE", "<hi type=\"inscription\">Mene</hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENE</>", "<hi type=\"inscription\">Mene</hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENE:</>", "<hi type=\"inscription\">Mene</hi>:") > -1) continue; + if (replaceFirst(outstring, "TEKEL", "<hi type=\"inscription\">Tekel</hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKEL</>", "<hi type=\"inscription\">Tekel</hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKEL:</>", "<hi type=\"inscription\">Tekel</hi>:") > -1) continue; + if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue; + if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue; + if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue; + if (replaceFirst(outstring, "PERES", "<hi type=\"inscription\">Peres</hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERES</>", "<hi type=\"inscription\">Peres</hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERES:</>", "<hi type=\"inscription\">Peres</hi>:") > -1) continue; + + if (replaceFirst(outstring, "MENĒ", "<hi type=\"inscription\">Menē</hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENĒ</>", "<hi type=\"inscription\">Menē</hi>") > -1) continue; + if (replaceFirst(outstring, "M<\\>ENĒ:</>", "<hi type=\"inscription\">Menē</hi>:") > -1) continue; + if (replaceFirst(outstring, "TEKĒL", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKĒL</>", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue; + if (replaceFirst(outstring, "T<\\>EKĒL:</>", "<hi type=\"inscription\">Tekēl</hi>:") > -1) continue; + if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue; + if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue; + if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue; + if (replaceFirst(outstring, "PERĒS", "<hi type=\"inscription\">Perēs</hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERĒS</>", "<hi type=\"inscription\">Perēs</hi>") > -1) continue; + if (replaceFirst(outstring, "P<\\>ERĒS:</>", "<hi type=\"inscription\">Perēs</hi>:") > -1) continue; + + const char *outstr = outstring.c_str(); + const char *found = strstr(outstr+end, "<\\>"); + int start = (found) ? (found - outstr) : -1; + + if (start > -1) { + for (--start;start;start--) { + if ((!std::isupper(outstring[start])) && + (!strchr("\\/ ~", outstring[start]))) { + break; + } + } + for (start++; outstring[start] == ' '; start++); + if (currentTestament) { + outstring.insert(start, "<seg type=\"otPassage\">"); + start += 22; + } + else { + outstring.insert(start, "<seg><divineName>"); + start += 17; + + int s = replaceFirst(outstring, "L<\\>{ORD}</>", "Lord"); + if (s > -1) end = s+4; + } + + // do small cap logic + bool lower = false; + string token = ""; + for (int charLen = 1; start < (int)outstring.length(); start += charLen) { + const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start; + const unsigned char *endChar = startChar; + SW_u32 testChar = getUniCharFromUTF8(&endChar, true); + charLen = endChar - startChar; // set the size of the UTF-8 sequence + if (!token.size()) { + if (testChar == '<') { + token = "<"; + continue; + } + // what is this? It screws MENE MENE up in Daniel +// if (testChar == ':') +// break; + + if (StringMgr::getSystemStringMgr()->isAlpha(testChar)) { + if (StringMgr::getSystemStringMgr()->isLower(testChar)) + break; + if (lower) + outstring.replace(start, charLen, SWBuf((const char *)startChar, charLen).toLower()); + continue; + } + } + else { + token += testChar; + + if (testChar == '>') { + if (token == "<\\>") { + lower = true; + outstring.erase(start-2, 3); + start -= 3; + } + if (token == "</>") { + lower = false; + outstring.erase(start-2, 3); + end = start - 2; + start -= 3; + unsigned int nextStrongs = outstring.find("<M"); + unsigned int nextUp = outstring.find("</>"); + if (nextStrongs != string::npos && nextUp != string::npos && nextStrongs < nextUp) { + break; + } + } + // end divineName if we hit a PO in the middle + if (token == "<PO>") { + break; + } + unsigned int s = token.find("<N"); + if (s == string::npos || s > 0) s = token.find("<R"); + if (s == 0 && token.size() > 2) { + if (StringMgr::getSystemStringMgr()->isDigit(token[2])) { + break; + } + } + token = ""; + } + } + } + if (currentTestament) { + outstring.insert(end, "</seg>"); + end+=6; + } + else { + outstring.insert(end, "</divineName></seg>"); + end+=19; + } + continue; + } + + // these are places where we unnecessarily stop and then start otPassage + // we could make the otPassage logic work better, but these exception clean + // thing up for now. + if (replaceFirst(outstring, "</seg>’<seg type=\"otPassage\">s", "’s") > -1) continue; + if (replaceFirst(outstring, "</seg>-<seg type=\"otPassage\">", "-") > -1) continue; + if (replaceFirst(outstring, "</seg>,<seg type=\"otPassage\">", ",") > -1) continue; + if (replaceFirst(outstring, "</seg>, <seg type=\"otPassage\">", ", ") > -1) continue; + if (replaceFirst(outstring, "</seg>! <seg type=\"otPassage\">", "! ") > -1) continue; + if (replaceFirst(outstring, "</seg>; <seg type=\"otPassage\">", "; ") > -1) continue; + if (replaceFirst(outstring, "</seg> <seg type=\"otPassage\">", " ") > -1) continue; + if (replaceFirst(outstring, "</seg>, ‘<seg type=\"otPassage\">", ", ‘") > -1) continue; + if (replaceFirst(outstring, "</seg>,’ <seg type=\"otPassage\">", ",’ ") > -1) continue; + + if (note) { + outstr = outstring.c_str(); + found = strstr(outstr, "{"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, "<hi type=\"italic\">"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, "</hi>"); + continue; + } + s = outstring.find("</reference></hi>"); + if (s != string::npos) { + const size_t s2 = outstring.find("<hi type=\"italic\"><reference"); + if (s2 == string::npos) { + outstring.replace(s, 17, "</hi></reference>"); + continue; + } + } + } + +// if (replaceFirst(outstring, ")</hi>", "</hi>)") > -1) continue; + + break; + } +} + +string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) { + char *nbuffer = 0; + int start = -1; + const char *found = (const char *)-1; + const char *outstr = (const char *)-1; + + while (start == -1) { + if (!noteLine.length() && fdn > -1) { + if (readline(fdn, &nbuffer)) return ""; // eof + noteLine = nbuffer; + } + outstr = noteLine.c_str(); + found = strstr(outstr, "{{"); + start = (found) ? (found - outstr) : -1; + // be sure we have at least one of these. We've found note lines without any actual notes + if (found) found = strstr(outstr, "<R"); + if (!found) found = strstr(outstr, "<N"); + if (!found) start = -1; + if (start == -1) noteLine = ""; + } + + if (start > -1) { + found = strstr(outstr, "}}"); + int end = (found) ? (found - outstr) : -1; + end++; + int book, chap; + string bkch = noteLine.substr(start+2, end-start-2); + sscanf(bkch.c_str(), "%d::%d", &book, &chap); + int vNumEnd = noteLine.find_first_of(" ", end); + int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str()); + if ((book != currentBookNo) || (chap != currentChapter) || ((verse != currentVerse) && /*kindof allow case in header before verse marker */ (verse != currentVerse+1))) { + fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str()); + exit(-1); + } + } + else { + fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str()); + exit(-1); + } + + + + + + outstr = noteLine.c_str(); + string tag = (string)"<"+(string)nx+nStr+(string)">"; + found = strstr(outstr, tag.c_str()); + start = (found) ? (found - outstr) : -1; + string retVal = ""; + + if (start > -1) { + start += tag.length(); + const char *nFound = strstr(outstr+start, " <N"); + const char *rFound = strstr(outstr+start, " <R"); + found = (nFound && (!rFound || nFound < rFound)) ? nFound : rFound; + int end = (found) ? (found - outstr) : -1; + if (end<0) end = noteLine.length(); + retVal = noteLine.substr(start, end-start); + } + unicodeTicks(retVal); +#ifdef HAVESWORD + if (*nx == 'R') { + // } { get's deleted. e.g. {Luke} {9:10-17} + outstr = retVal.c_str(); + found = strstr(outstr, "} {"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + retVal.replace(start, 3, " "); + } + outstr = retVal.c_str(); + found = strstr(outstr, ";}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + retVal.replace(start, 2, "};"); + } + VerseKey key = osisID.c_str(); +//std::cerr << osisID.c_str() << ": Convert to OSISRef: " << retVal.c_str(); + retVal = VerseKey::convertToOSIS(retVal.c_str(), &key); +//std::cerr << ": " << retVal.c_str(); + } +#endif + prepLine(retVal, 0, true); + if (nbuffer) + delete [] nbuffer; +//std::cerr << ": " << retVal.c_str() << "\n"; + return retVal; +} + |