From 0fb84c645eab4f21d9d5237953f7377c51168e39 Mon Sep 17 00:00:00 2001 From: "Troy A. Griffitts" Date: Sat, 10 Sep 2022 09:47:55 +0000 Subject: Generalized NASB95 conversion a bit in preparation of NASB2020 git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@543 07627401-56e2-0310-80f4-f8cd0041bdcd --- modules/nasb95/lockosis.cpp | 1370 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1370 insertions(+) create mode 100644 modules/nasb95/lockosis.cpp (limited to 'modules/nasb95/lockosis.cpp') diff --git a/modules/nasb95/lockosis.cpp b/modules/nasb95/lockosis.cpp new file mode 100644 index 0000000..deaadbb --- /dev/null +++ b/modules/nasb95/lockosis.cpp @@ -0,0 +1,1370 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#ifdef HAVESWORD +#include +#include +#include +using namespace sword; +#endif + +using std::string; +using std::cout; +using std::endl; + +static const char *osisOTBooks[] = { + "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", + "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", + "2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov", + "Eccl", "Song", "Isa", "Jer", "Lam", + "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", + "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal", +// extra-Biblical + "Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth", + "EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan", + "Ps151", "Sir", "Tob", "Wis"}; +static const char *osisNTBooks[] = { + "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", + "2Cor", "Gal", "Eph", "Phil", "Col", + "1Thess", "2Thess", "1Tim", "2Tim", "Titus", + "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", + "3John", "Jude", "Rev"}; +static const char **osisBooks[] = { osisOTBooks, osisNTBooks }; + +const char nasbMax[2] = {39, 27}; + +char readline(int fd, char **buf) { + char ch; + if (*buf) + delete [] *buf; + *buf = 0; + int len; + + + long index = lseek(fd, 0, SEEK_CUR); + // clean up any preceding white space + while ((len = read(fd, &ch, 1)) == 1) { + if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t')) + break; + else index++; + } + + + while (ch != 10) { + if ((len = read(fd, &ch, 1)) != 1) + break; + } + + int size = (lseek(fd, 0, SEEK_CUR) - index) - 1; + + *buf = new char [ (size+2) * 2 ]; + + if (size > 0) { + lseek(fd, index, SEEK_SET); + read(fd, *buf, size); + read(fd, &ch, 1); //pop terminating char + (*buf)[size] = 0; + + // clean up any trailing junk on buf + int buflen = strlen(*buf); + for (char *it = *buf+(buflen-1); it > *buf; it--) { + if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t')) + break; + else *it = 0; + } + // convert all spanish characters to combined + for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) { + switch (*it) { +/* + case 0xE2 : // ‘ + if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) { + memmove(it, it+1, buflen - (it-(unsigned char *)*buf)); + buflen--; + it[0] = 0xcc; + it[1] = 0x80; // yeah, I know it's already 0x80, but that's just a coincidence + } + else { + fprintf(stderr, "oddity: %s\n", *buf); + exit(-4); + } + break; +*/ + case 0x60 : // ` + if (isalpha(it[-1])) { + memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); + buflen++; + it[0] = 0xcc; + it[1] = 0x80; + } + else { +// fprintf(stderr, "oddity: %s\n", *buf); +// exit(-4); + } + break; + case 0x7E : // ~ + memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); + buflen++; + it[0] = 0xcc; + it[1] = 0x83; + break; + } + } + } + else **buf = 0; + return !len; +} + + +void outHeader(); +void outTrailer(); +void unicodeTicks(string &outstring); +void prepLine(string &outstring, int currentTestament, bool note); +string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx); + + + + +int main(int argc, char **argv) { + +#ifdef HAVESWORD + LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName("es"); +#endif + + std::setlocale(LC_CTYPE, ""); + + // Let's test our command line arguments + if (argc < 2) { +// fprintf(stderr, "usage: %s [0|1 - file includes prepended verse references]\n", argv[0]); + fprintf(stderr, "usage: %s [notesfile]\n\n", argv[0]); + exit(-1); + } + + + // Let's see if we can open our input file + int fd = open(argv[1], O_RDONLY|O_BINARY); + if (fd < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]); + exit(-2); + } + + int fdn = -1; + if (argc > 2) { + fdn = open(argv[2], O_RDONLY|O_BINARY); + if (fdn < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]); + exit(-2); + } + } + + outHeader(); + + string header; + char *buffer = 0; + char *nbuffer = 0; + int result = 0; + string currentBook = ""; + int currentBookNo = 0; + int currentTestament = 0; + int currentChapter = 0; + int currentVerse = 0; + bool inBook = false; + bool inChapter = false; + bool inVerse = false; + string noteLine = ""; + string preChapNote = ""; + string outstring; + result = readline(fd, &buffer); + string lookahead = buffer; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + do { + result = readline(fd, &buffer); + if (lookahead.length()) { + string savebuf = buffer; + if (buffer) + delete [] buffer; + buffer = new char [ lookahead.length() + 1]; + strcpy(buffer, lookahead.c_str()); + lookahead = savebuf; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + result = 0; + } + else if (!result) { + string savebuf = buffer; + result = readline(fd, &buffer); + lookahead = buffer; + unicodeTicks(lookahead); + prepLine(lookahead, currentTestament, false); + + + if (buffer) + delete [] buffer; + buffer = new char [ savebuf.length() + 1]; + strcpy(buffer, savebuf.c_str()); + result = 0; + } + + outstring = buffer; + + + + // BOOK NAMES + if (!strncmp(outstring.c_str(), "", 4)) { + string book = outstring.c_str()+4; + book = book.substr(0, book.find_first_of("<")); + outstring = ""; + if (inVerse) { + outstring += ""; + inVerse = false; + } + if (inChapter) { + outstring += ""; + inChapter = false; + } + if (inBook) { + outstring += ""; + inBook = false; + } + outstring += (string)"
" + book + ""; + inBook = true; + } + + + // CHAPTERS + //PSALM + if ((!strncmp(outstring.c_str(), "", 4)) || (!strncmp(outstring.c_str(), "", 4))) { + string chapterTitle = outstring.c_str()+4; + chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<")); + string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1); + outstring = ""; + if (inVerse) { + outstring += ""; + inVerse = false; + } + if (inChapter) { + outstring += ""; + inChapter = false; + } + outstring += (string)""; + outstring += (string)"" + chapterTitle + ""; + currentChapter = atoi(chapter.c_str()); + inChapter = true; + currentVerse = 1; + } + + // + if (!strncmp(outstring.c_str(), "", 4)) { + string heading = outstring.c_str()+4; + heading = heading.substr(0, heading.find("")); + outstring = ""; + + if (!strncmp(lookahead.c_str(), "", 4)) { + lookahead.erase(0, 4); + outstring += ""; + } + if (inVerse) { + outstring += "\n"; + inVerse = false; + } + outstring += (string)"" + heading + (string)""; + } + + // + if (!strncmp(outstring.c_str(), "", 4)) { + string heading = outstring.c_str()+4; + heading = heading.substr(0, heading.find("")); + outstring = ""; + + if (!strncmp(lookahead.c_str(), "", 4)) { + lookahead.erase(0, 4); + outstring += ""; + } + if (inVerse) { + outstring += "\n"; + inVerse = false; + } + outstring += (string)"" + heading + (string)""; + } + if (!strncmp(outstring.c_str(), "", 4)) { + string heading = (outstring.c_str()+4); + heading = heading.substr(0, heading.find("")); + outstring = (string)"" + heading + (string)""; + } + if (!strncmp(outstring.c_str(), "", 4)) { + string heading = (outstring.c_str()+4); + heading = heading.substr(0, heading.find("")); + outstring = (string)"" + heading + (string)""; + } + + + + // {{x:y}} + // DUH, find_first_of looks for the first occurance of ANY single character of the supplied string +// int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it! + const char *outstr = outstring.c_str(); + const char *found = strstr(outstr, "{{"); + int start = (found) ? (found - outstr) : -1; +// ---- end of whacked replacement + + if (start > -1) { + found = strstr(outstr, "}}"); + int end = (found) ? (found - outstr) : -1; + end++; + int testmt = 0, book = 0, chap = 0; + string bkch = outstring.substr(start+2, end-start-2); + sscanf(bkch.c_str(), "%d:%d", &book, &chap); + currentChapter = chap; + int vNumEnd = outstring.find_first_of(" ", end); + currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str()); + currentBookNo = book; + if (book > nasbMax[0]) { + testmt = 1; + book -= nasbMax[0]; + } + if (currentBook != osisBooks[testmt][book-1]) { + fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d:%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); + exit(-3); + } + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string newstring = ""; + if (inVerse) { + newstring += ""; + inVerse = false; + } + newstring += ""; + outstring.replace(start, vNumEnd-start+1, newstring); + inVerse = true; + noteLine = preChapNote; + preChapNote = ""; + } + + + + // multiple occurances on a line stuff + while (1) { + + // NOTE + outstr = outstring.c_str(); + found = strstr(outstr, " -1) { + int end = outstring.find_first_of(">", start+1); + string nStr = outstring.substr(start+2, end-start-2); + + // NOTE + if (isdigit(nStr.c_str()[0]) || strchr("ABCDEFG", nStr.c_str()[0])) { + bool preChap = strchr("A", nStr.c_str()[0]); + if (preChap) noteLine = ""; + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); + string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N"); + if (preChap) preChapNote = noteLine; + outstring.replace(start, end-start+1, (string)"" + noteBody + ""); + continue; + } + } + + + + // + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + + // + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 5, (string)""); + continue; + + } + // + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + // + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 3, (string)"¿"); + continue; + + } + // + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 3, (string)"¡"); + continue; + + } + outstr = outstring.c_str(); + found = strstr(outstr, " -1) { + int end = outstring.find_first_of(">", start+1); + string nStr = outstring.substr(start+2, end-start-2); + + char chapString[20], verseString[20]; + sprintf(chapString, "%d", currentChapter); + sprintf(verseString, "%d", currentVerse); + string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); + string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R"); + outstring.replace(start, end-start+1, (string)"" + noteBody + ""); + continue; + } + // transChange added {} + outstr = outstring.c_str(); + found = strstr(outstr, "{"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 1, (string)""); + size_t end = outstring.find_first_of("}", start+1); + if (end != string::npos) { + outstring.erase(end, 1); + } + else end = outstring.size()-1; + while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--; + outstring.insert(end+1, ""); + + continue; + + } +/* + // transChange tenseChange * + outstr = outstring.c_str(); + found = strstr(outstr, "*"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 1, (string)""); + for (end = start + 34; (end < outstring.length()); end++) { + if ((!isalpha(outstring[end])) && + (outstring[end] != '\'')) + break; + } + outstring.replace(end, 1, ""); + continue; + + } +*/ + // <,> + outstr = outstring.c_str(); + found = strstr(outstr, "<,>"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 3, (string)""); + continue; + + } + // + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + // + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + // + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + // paragraph break + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + // poetry break + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + // poetry break + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + // poetry break + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + // letter indent + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + outstring.replace(start, 4, (string)""); + continue; + + } + break; + } + + int strongsStart = 0; + int transChangeStart = 0; + bool strongsFound = false; + bool intoken = false; + bool intag = false; + bool inNote = false; + int tokenStart = 0; + string lastToken = ""; + string previousToken = ""; + int tenseChange = -1; + // strongs numbers + for (unsigned int i = 0; i < outstring.length(); i++) { + if ((!inNote) && (!intoken) && (!intag) && (!strongsFound)) { + if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { + strongsStart = i; + strongsFound = true; + } + } + if (outstring[i] =='*') + tenseChange = i; + if (outstring[i] == '<') { tokenStart = i+1; intoken = true; } + if (outstring[i] == '>') { + intoken = false; + previousToken = lastToken; + lastToken = outstring.substr(tokenStart, i-tokenStart); + // Not completely safe, but works for current NASB data + if (strchr(lastToken.c_str(), '/')) + intag = false; + else intag = true; + if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) { + transChangeStart = i+1; + } +/* + if (!strncmp(lastToken.c_str(), "seg", 3)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "divineName", 10)) { + strongsFound = false; + strongsStart = i+1; + } +*/ + if (!strncmp(lastToken.c_str(), "/divineName", 10)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "note", 4)) { + strongsFound = false; + strongsStart = i+1; + inNote = true; + } + if (!strncmp(lastToken.c_str(), "/note", 5)) { + strongsFound = false; + strongsStart = i+1; + inNote = false; + } + if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/transChange", 12)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "milestone", 9)) { + strongsFound = false; + strongsStart = i+1; + } + if (!strncmp(lastToken.c_str(), "/seg", 4)) { + strongsFound = false; + strongsStart = i+1; + } + if ((!strncmp(lastToken.c_str(), "verse", 5))) { + intag = false; + } + + if ( (!strncmp(lastToken.c_str(), "MG", 2)) || + (!strncmp(lastToken.c_str(), "MH", 2))) { + + // insert + // fix tenseChange to be inside so we can include a subset of the content. + outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "":"")); + i = (tokenStart-1) + ((tenseChange > -1) ? 18:4); + + // build tag + char lang = lastToken[1]; // H or G + lastToken.replace(0, 1, " -1) { + lastToken.replace(start, 2, (string)" strong:" + lang); + } + lastToken += "\">"; + intag = false; + + + if (tenseChange > -1) { + lastToken.insert(0, ""); + } + if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) { + outstring.insert(transChangeStart, lastToken); + intag = true; + i += lastToken.length() - 1; // (-1 because we're about to i++) + } + + // insert our token + else { + outstring.insert(strongsStart, lastToken); + i += lastToken.length() - 1; // (-1 because we're about to i++) + } + strongsStart = i+1; + strongsFound = false; + if (tenseChange > -1) { + // relocate because position may have changed from all the token inserts + const char *buf = outstring.c_str(); + tenseChange = (strchr(buf, '*') - buf); + outstring.erase(tenseChange, 1); + tenseChange = -1; + } + } + } + } + + + // clean up stuff that didn't work quite right + while (1) { + + // divineName strongs tags misorderings + string target = ""; + size_t s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + target = ","; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ","); + continue; + } + + break; + } + + + std::cout << outstring; + if (!result) std::cout << "\n"; + } + while (!result); + outstring = ""; + if (inVerse) { + outstring += ""; + inVerse = false; + } + if (inChapter) { + outstring += ""; + inChapter = false; + } + if (inBook) { + outstring += "
"; + inBook = false; + } + std::cout << outstring; + + outTrailer(); + + // clean up our buffers that readline might have allocated + if (buffer) + delete [] buffer; + if (nbuffer) + delete [] nbuffer; + + close(fd); + + if (fdn > -1) + close(fdn); +} + +void outHeader() { + +std::cout << "" << "\n"; +std::cout << "" << "\n"; +std::cout << " " << "\n"; +std::cout << "
" << "\n"; +std::cout << " " << "\n"; +std::cout << " NEW AMERICAN STANDARD BIBLE" << "\n"; +std::cout << " Bible.en.NASB.1995" << "\n"; +std::cout << " Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION" << "\n"; +std::cout << " Bible" << "\n"; +std::cout << " " << "\n"; +std::cout << " " << "\n"; +std::cout << " " << "\n"; +std::cout << "
" << "\n"; + +} + +void outTrailer() { + std::cout << "
\n"; + std::cout << "
\n"; +} + +void unicodeTicks(string &outstring) { + + while (1) { + const char *outstr; + const char *found; + int start; + + outstr = outstring.c_str(); + found = strstr(outstr, "``"); + char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 2, "“"); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "`"); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "'"); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + outstr = outstring.c_str(); + found = strstr(outstr, "\""); + uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0; + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, uchar); + continue; + } + + break; + } +} + +void prepLine(string &outstring, int currentTestament, bool note) { + int end = 0; + while (1) { +// ------------------------------------------ +// redundant markers + size_t s; + // + s = outstring.find(""); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + //

+ s = outstring.find("

"); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + // + s = outstring.find(""); + if (s != string::npos) { + outstring.erase(s, 3); + continue; + } + + // + s = outstring.find(""); + if (s != string::npos) { + outstring.erase(s, 4); + continue; + } + + // + s = outstring.find(""); + if (s != string::npos) { + outstring.erase(s, 4); + continue; + } + + // <$F...>> + s = outstring.find("<$F"); + if (s != string::npos) { + size_t e = outstring.find(">>", s); + outstring.erase(s, e-s+2); + continue; + } +// ---------------------------------------------- + + // + s = outstring.find(""); + if (s != string::npos) { + outstring.replace(s, 3, ""); + continue; + } + + // ~“ + char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0; +// string target = "~“"; + string target = uchar; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // +« + target = "+«"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // +» + target = "+»"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // +“ + target = "+“"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // +” + target = "+”"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // +‘ + target = "+‘"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // +’ + target = "+’"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // -« + target = "-«"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // -» + target = "-»"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // -“ + target = "-“"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // -” + target = "-”"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // -‘ + target = "-‘"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + // -’ + target = "-’"; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + + // ~‘ + uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x98; uchar[5]=0; +// target = "~‘"; + target = uchar; + s = outstring.find(target); + if (s != string::npos) { + outstring.replace(s, target.length(), ""); + continue; + } + + + const char *outstr = outstring.c_str(); + const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/"); + int start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 14, "Lord's"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD}/’\\{S}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 16, "Lord’s"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD,}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 9, "Lord,"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "Lord"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "S\\{EN~OR}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 10, "Sen~or"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "S\\{EÑOR}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 10, "Señor"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "Y\\{AH,}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "Yah,"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "Y\\{AH}/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "Yah"); + continue; + } + // is this really valid markup? should 'also be' be in small + // caps? 3 { and only 2 } ? + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {also be}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 18, "Lord also be}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {give}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 15, "Lord give}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {bless}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 16, "Lord bless}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 73, "Lord are my Refuge; You have made the Most High your dwelling place}"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "MENE"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 4, "Mene"); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "M\\ENE/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 6, "Mene"); + continue; + } + found = strstr(outstr, "M\\ENE:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "Mene:"); + continue; + } + found = strstr(outstr, "TEKEL"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 5, "Tekel"); + continue; + } + found = strstr(outstr, "T\\EKEL/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "Tekel"); + continue; + } + found = strstr(outstr, "T\\EKEL:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "Tekel:"); + continue; + } + found = strstr(outstr, "UPHARSIN"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "Upharsin"); + continue; + } + found = strstr(outstr, "UFARSIN"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "Ufarsin"); + continue; + } + found = strstr(outstr, "U\\FARSIN/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 9, "Ufarsin"); + continue; + } + found = strstr(outstr, "PERES"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 5, "Peres"); + continue; + } + found = strstr(outstr, "P\\ERES/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 7, "Peres"); + continue; + } + found = strstr(outstr, "P\\ERES:/"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 8, "Peres:"); + continue; + } + // LB ??? Don't have info on this. Assuming '-' + outstr = outstring.c_str(); + found = strstr(outstr, ""); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 4, "-"); + continue; + } + + found = strstr(outstr+end, "\\"); + start = (found) ? (found - outstr) : -1; + + if (start > -1) { + for (;start;start--) { + if ((!std::isupper(outstring[start])) && + (!strchr("\\/ ~", outstring[start]))) { + break; + } + } + for (start++; outstring[start] == ' '; start++); + if (currentTestament) { + outstring.insert(start, ""); + start += 22; + } + else { + outstring.insert(start, ""); + start += 17; + const char *b = outstring.c_str(); + const char *found = strstr(b, "L\\{ORD}/"); + int s = (found) ? (found - b) : -1; + if (s > -1) + outstring.replace(s, 8, "Lord"); + end = s+4; + } + bool lower = false; + bool token = false; + for (int charLen = 1;start < (int)outstring.length(); start+=charLen) { + const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start; + const unsigned char *endChar = startChar; + SW_u32 testChar = getUniCharFromUTF8(&endChar, true); + charLen = endChar - startChar; // set the size of the UTF-8 sequence + if (!token) { + if (testChar == '\\') { + lower = true; + outstring.erase(start, 1); + start--; + continue; + } + if (testChar == '/') { + lower = false; + outstring.erase(start, 1); + end = start; + start--; + continue; + } + // what is this? It screws MENE MENE up in Daniel +// if (testChar == ':') +// break; + + if (StringMgr::getSystemStringMgr()->isAlpha(testChar)) { + if (StringMgr::getSystemStringMgr()->isLower(testChar)) + break; + if (lower) + outstring.replace(start, charLen, SWBuf((const char *)startChar, charLen).toLower()); + continue; + } + } + if (testChar == '>') + token = false; + if (testChar == '<') + token = true; + } + if (currentTestament) { + outstring.insert(end, ""); + end+=6; + } + else { + outstring.insert(end, ""); + end+=19; + } + continue; + } + + if (note) { + outstr = outstring.c_str(); + found = strstr(outstr, "{"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, ""); + continue; + } + outstr = outstring.c_str(); + found = strstr(outstr, "}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + outstring.replace(start, 1, ""); + continue; + } + s = outstring.find(""); + if (s != string::npos) { + const size_t s2 = outstring.find(""); + continue; + } + } + } + break; + } +} + +string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) { + char *nbuffer = 0; + int start = -1; + const char *found = (const char *)-1; + const char *outstr = (const char *)-1; + + while (start == -1) { + if (!noteLine.length() && fdn > -1) { + if (readline(fdn, &nbuffer)) return ""; // eof + noteLine = nbuffer; + } + outstr = noteLine.c_str(); + found = strstr(outstr, "{{"); + start = (found) ? (found - outstr) : -1; + // be sure we have at least one of these. We've found note lines without any actual notes + if (found) found = strstr(outstr, " -1) { + found = strstr(outstr, "}}"); + int end = (found) ? (found - outstr) : -1; + end++; + int book, chap; + string bkch = noteLine.substr(start+2, end-start-2); + sscanf(bkch.c_str(), "%d:%d", &book, &chap); + int vNumEnd = noteLine.find_first_of(" ", end); + int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str()); + if ((book != currentBookNo) || (chap != currentChapter) || (verse != currentVerse)) { + fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str()); + exit(-1); + } + } + else { + fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str()); + exit(-1); + } + + + + + + outstr = noteLine.c_str(); + string tag = (string)"<"+(string)nx+nStr+(string)">"; + found = strstr(outstr, tag.c_str()); + start = (found) ? (found - outstr) : -1; + string retVal = ""; + + if (start > -1) { + start += tag.length(); + found = strstr(outstr+start, " <"); + int end = (found) ? (found - outstr) : -1; + if (end<0) end = noteLine.length(); + retVal = noteLine.substr(start, end-start); + } + unicodeTicks(retVal); +#ifdef HAVESWORD + if (*nx == 'R') { + // } { get's deleted. e.g. {Luke} {9:10-17} + outstr = retVal.c_str(); + found = strstr(outstr, "} {"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + retVal.replace(start, 3, " "); + } + outstr = retVal.c_str(); + found = strstr(outstr, ";}"); + start = (found) ? (found - outstr) : -1; + if (start > -1) { + retVal.replace(start, 2, "};"); + } + VerseKey key = osisID.c_str(); +//std::cerr << osisID.c_str() << ": Convert to OSISRef: " << retVal.c_str(); + retVal = VerseKey::convertToOSIS(retVal.c_str(), &key); +//std::cerr << ": " << retVal.c_str(); + } +#endif + prepLine(retVal, 0, true); + if (nbuffer) + delete [] nbuffer; +//std::cerr << ": " << retVal.c_str() << "\n"; + return retVal; +} + -- cgit