summaryrefslogtreecommitdiffstats
path: root/modules/nasb2020/lockosis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'modules/nasb2020/lockosis.cpp')
-rw-r--r--modules/nasb2020/lockosis.cpp622
1 files changed, 248 insertions, 374 deletions
diff --git a/modules/nasb2020/lockosis.cpp b/modules/nasb2020/lockosis.cpp
index deaadbb..87904b3 100644
--- a/modules/nasb2020/lockosis.cpp
+++ b/modules/nasb2020/lockosis.cpp
@@ -140,6 +140,7 @@ void outTrailer();
void unicodeTicks(string &outstring);
void prepLine(string &outstring, int currentTestament, bool note);
string getNoteBody(int nfd, string &noteLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx);
+int replaceFirst(string &haystack, string needle, string replacement);
@@ -302,9 +303,13 @@ int main(int argc, char **argv) {
}
//<SH>
- if (!strncmp(outstring.c_str(), "<SH>", 4)) {
- string heading = outstring.c_str()+4;
- heading = heading.substr(0, heading.find("</SH>"));
+ if ((!strncmp(outstring.c_str(), "<SH>", 4)) || (!strncmp(outstring.c_str(), "<SHI>", 5))) {
+ bool shi = outstring.c_str()[3] == 'I';
+ if (shi) {
+ fprintf(stderr, "found shi.\n");
+ }
+ string heading = outstring.c_str()+(shi ? 5 : 4);
+ heading = heading.substr(0, heading.find(shi ? "</SHI>" : "</SH>"));
outstring = "";
if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
@@ -315,7 +320,9 @@ int main(int argc, char **argv) {
outstring += "</verse>\n";
inVerse = false;
}
- outstring += (string)"<title type=\"section\" subType=\"x-preverse\">" + heading + (string)"</title>";
+ outstring += (string)"<title type=\"section\"";
+ if (!shi) outstring += (string)" subType=\"x-preverse\"";
+ outstring += (string)">" + heading + (string)"</title>";
}
if (!strncmp(outstring.c_str(), "<SS>", 4)) {
string heading = (outstring.c_str()+4);
@@ -330,7 +337,7 @@ int main(int argc, char **argv) {
- // {{x:y}}
+ // {{x::y}}
// DUH, find_first_of looks for the first occurance of ANY single character of the supplied string
// int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it!
const char *outstr = outstring.c_str();
@@ -344,7 +351,7 @@ int main(int argc, char **argv) {
end++;
int testmt = 0, book = 0, chap = 0;
string bkch = outstring.substr(start+2, end-start-2);
- sscanf(bkch.c_str(), "%d:%d", &book, &chap);
+ sscanf(bkch.c_str(), "%d::%d", &book, &chap);
currentChapter = chap;
int vNumEnd = outstring.find_first_of(" ", end);
currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str());
@@ -354,7 +361,7 @@ int main(int argc, char **argv) {
book -= nasbMax[0];
}
if (currentBook != osisBooks[testmt][book-1]) {
- fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d:%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str());
+ fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d::%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str());
exit(-3);
}
char chapString[20], verseString[20];
@@ -397,6 +404,7 @@ int main(int argc, char **argv) {
string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N");
if (preChap) preChapNote = noteLine;
outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>");
+// outstring.replace(start, end-start+1, (string)"--note--");
continue;
}
}
@@ -404,56 +412,23 @@ int main(int argc, char **argv) {
// <RS>
- outstr = outstring.c_str();
- found = strstr(outstr, "<RS>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 4, (string)"<q who=\"Jesus\">");
- continue;
-
- }
+ if (replaceFirst(outstring, "<RS>", "<q who=\"Jesus\">") > -1) continue;
// <RS>
- outstr = outstring.c_str();
- found = strstr(outstr, "</RS>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 5, (string)"</q>");
- continue;
+ if (replaceFirst(outstring, "</RS>", "</q>") > -1) continue;
- }
// <RT>
- outstr = outstring.c_str();
- found = strstr(outstr, "<RT>");
- start = (found) ? (found - outstr) : -1;
+ if (replaceFirst(outstring, "<RT>", "<milestone type=\"x-RT\"/>") > -1) continue;
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"x-RT\"/>");
- continue;
+ if (replaceFirst(outstring, "<SHI>", "<title type=\"section\">") > -1) continue;
+ if (replaceFirst(outstring, "</SHI>", "</title>") > -1) continue;
- }
// <?>
- outstr = outstring.c_str();
- found = strstr(outstr, "<?>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 3, (string)"¿");
- continue;
-
- }
+ if (replaceFirst(outstring, "<?>", "¿") > -1) continue;
+
// <!>
- outstr = outstring.c_str();
- found = strstr(outstr, "<!>");
- start = (found) ? (found - outstr) : -1;
+ if (replaceFirst(outstring, "<!>", "¡") > -1) continue;
- if (start > -1) {
- outstring.replace(start, 3, (string)"¡");
- continue;
-
- }
outstr = outstring.c_str();
found = strstr(outstr, "<R");
start = (found) ? (found - outstr) : -1;
@@ -507,101 +482,40 @@ int main(int argc, char **argv) {
}
*/
// <,>
- outstr = outstring.c_str();
- found = strstr(outstr, "<,>");
- start = (found) ? (found - outstr) : -1;
+ if (replaceFirst(outstring, "<,>", "<milestone type=\"x-superiorComma\"/>") > -1) continue;
- if (start > -1) {
- outstring.replace(start, 3, (string)"<milestone type=\"x-superiorComma\"/>");
- continue;
-
- }
// <NA>
- outstr = outstring.c_str();
- found = strstr(outstr, "<NA>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"x-NA\"/>");
- continue;
+ if (replaceFirst(outstring, "<NA>", "<milestone type=\"x-superiorOne\"/>") > -1) continue;
- }
// <NB>
- outstr = outstring.c_str();
- found = strstr(outstr, "<NB>");
- start = (found) ? (found - outstr) : -1;
+ if (replaceFirst(outstring, "<NB>", "<milestone type=\"x-superiorTwo\"/>") > -1) continue;
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"x-NB\"/>");
- continue;
-
- }
// <NC>
- outstr = outstring.c_str();
- found = strstr(outstr, "<NC>");
- start = (found) ? (found - outstr) : -1;
+ if (replaceFirst(outstring, "<NC>", "<milestone type=\"x-superiorThree\"/>") > -1) continue;
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"x-NC\"/>");
- continue;
-
- }
// paragraph break <PM>
- outstr = outstring.c_str();
- found = strstr(outstr, "<PM>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PM\"/>");
- continue;
+ if (replaceFirst(outstring, "<PM>", "<milestone type=\"line\" subType=\"x-PM\"/>") > -1) continue;
- }
// poetry break <PN>
- outstr = outstring.c_str();
- found = strstr(outstr, "<PN>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"x-PN\" />");
- continue;
+ if (replaceFirst(outstring, "<PN>", "<milestone type=\"x-Poetry\" />") > -1) continue;
- }
// poetry break <PO>
- outstr = outstring.c_str();
- found = strstr(outstr, "<PO>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PO\" />");
- continue;
+ if (replaceFirst(outstring, "<PO>", "<milestone type=\"line\" subType=\"x-Poetry\" />") > -1) continue;
- }
- // poetry break <PE>
- outstr = outstring.c_str();
- found = strstr(outstr, "<PE>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PE\" />");
- continue;
+ // poetry break <PR>
+ if (replaceFirst(outstring, "<PR>", "<milestone type=\"x-PoetryEnd\" />") > -1) continue;
- }
// letter indent <HL>
- outstr = outstring.c_str();
- found = strstr(outstr, "<HL>");
- start = (found) ? (found - outstr) : -1;
-
- if (start > -1) {
- outstring.replace(start, 4, (string)"<milestone type=\"x-HL\" />");
- continue;
+ if (replaceFirst(outstring, "<HL>", "<milestone type=\"x-HL\" />") > -1) continue;
- }
+ // letter indent <HLL>
+ if (replaceFirst(outstring, "<HLL>", "<milestone type=\"line\" subType=\"x-HLL\" />") > -1) continue;
break;
}
int strongsStart = 0;
int transChangeStart = 0;
- bool strongsFound = false;
+ bool strongsStartFound = false;
bool intoken = false;
bool intag = false;
bool inNote = false;
@@ -610,12 +524,21 @@ int main(int argc, char **argv) {
string previousToken = "";
int tenseChange = -1;
// strongs numbers
- for (unsigned int i = 0; i < outstring.length(); i++) {
- if ((!inNote) && (!intoken) && (!intag) && (!strongsFound)) {
- if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
- strongsStart = i;
- strongsFound = true;
+ for (unsigned int i = 0; i < outstring.length(); ++i) {
+ if ((!strongsStartFound) && (!inNote) && (!intoken)) {
+ if (!intag) {
+ if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
+ strongsStart = i;
+ strongsStartFound = true;
+ }
+ }
+ else if (!strncmp(lastToken.c_str(), "hi", 2) && strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) {
+ if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
+ strongsStart = tokenStart - 1;
+ strongsStartFound = true;
+ }
}
+
}
if (outstring[i] =='*')
tenseChange = i;
@@ -633,48 +556,61 @@ int main(int argc, char **argv) {
}
/*
if (!strncmp(lastToken.c_str(), "seg", 3)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "divineName", 10)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
}
*/
if (!strncmp(lastToken.c_str(), "/divineName", 10)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "note", 4)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
inNote = true;
}
if (!strncmp(lastToken.c_str(), "/note", 5)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
inNote = false;
}
if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
}
- if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) {
- strongsFound = false;
+ if (!strncmp(lastToken.c_str(), "/q", 2)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 20)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+ if (!strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) {
+ strongsStartFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "/transChange", 12)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "milestone", 9)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "/seg", 4)) {
- strongsFound = false;
+ strongsStartFound = false;
strongsStart = i+1;
}
+ if (!strncmp(lastToken.c_str(), "verse", 5)) {
+ strongsStartFound = false;
+ strongsStart = i+1;
+ }
+
if ((!strncmp(lastToken.c_str(), "verse", 5))) {
intag = false;
}
@@ -712,7 +648,7 @@ int main(int argc, char **argv) {
i += lastToken.length() - 1; // (-1 because we're about to i++)
}
strongsStart = i+1;
- strongsFound = false;
+ strongsStartFound = false;
if (tenseChange > -1) {
// relocate because position may have changed from all the token inserts
const char *buf = outstring.c_str();
@@ -849,46 +785,55 @@ void unicodeTicks(string &outstring) {
}
}
+
+// return offset of occurence replace; otherwise -1
+int replaceFirst(string &haystack, string needle, string replacement) {
+ const char *outstr = haystack.c_str();
+ const char *found = strstr(outstr, needle.c_str());
+ int start = (found) ? (found - outstr) : -1;
+ if (start > -1) {
+ haystack.replace(start, needle.size(), replacement);
+ }
+ return start;
+}
+
+
void prepLine(string &outstring, int currentTestament, bool note) {
int end = 0;
while (1) {
// ------------------------------------------
-// redundant markers
+// redundant or unneeded or unknown markers
size_t s;
+
+ // <1EVA>
+ if (replaceFirst(outstring, "<1EVA>", "") > -1) continue;
+
+ // <1EVB>
+ if (replaceFirst(outstring, "<1EVB>", "") > -1) continue;
+
+ // <FA>
+ if (replaceFirst(outstring, "<FA>", "") > -1) continue;
+
+ // <PR>
+ if (replaceFirst(outstring, "<PR>", "") > -1) continue;
+
// <V>
- s = outstring.find("<V>");
- if (s != string::npos) {
- outstring.erase(s, 3);
- continue;
- }
+ if (replaceFirst(outstring, "<V>", "") > -1) continue;
+
+ // <T>
+ if (replaceFirst(outstring, "<T>", "") > -1) continue;
// <P>
- s = outstring.find("<P>");
- if (s != string::npos) {
- outstring.erase(s, 3);
- continue;
- }
+ if (replaceFirst(outstring, "<P>", "") > -1) continue;
// <C>
- s = outstring.find("<C>");
- if (s != string::npos) {
- outstring.erase(s, 3);
- continue;
- }
+ if (replaceFirst(outstring, "<C>", "") > -1) continue;
// <CC>
- s = outstring.find("<CC>");
- if (s != string::npos) {
- outstring.erase(s, 4);
- continue;
- }
+ if (replaceFirst(outstring, "<CC>", "") > -1) continue;
// <CP>
- s = outstring.find("<CP>");
- if (s != string::npos) {
- outstring.erase(s, 4);
- continue;
- }
+ if (replaceFirst(outstring, "<CP>", "") > -1) continue;
// <$F...>>
s = outstring.find("<$F");
@@ -897,14 +842,17 @@ void prepLine(string &outstring, int currentTestament, bool note) {
outstring.erase(s, e-s+2);
continue;
}
-// ----------------------------------------------
-
- // <A>
- s = outstring.find("<A>");
+ // <EOV>
+ s = outstring.find("<EOV>");
if (s != string::npos) {
- outstring.replace(s, 3, "<milestone type=\"line\" subType=\"x-A\"/>");
+ size_t e = outstring.find("</EOV>", s);
+ outstring.erase(s, e-s+6);
continue;
}
+// ----------------------------------------------
+
+ // <A>
+ if (replaceFirst(outstring, "<A>", "<milestone type=\"line\" subType=\"x-A\"/>") > -1) continue;
// ~“
char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0;
@@ -961,42 +909,42 @@ void prepLine(string &outstring, int currentTestament, bool note) {
target = "-«";
s = outstring.find(target);
if (s != string::npos) {
- outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>");
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"«\"/>");
continue;
}
// -»
target = "-»";
s = outstring.find(target);
if (s != string::npos) {
- outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>");
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"»\"/>");
continue;
}
// -“
target = "-“";
s = outstring.find(target);
if (s != string::npos) {
- outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"“\"/>");
continue;
}
// -”
target = "-”";
s = outstring.find(target);
if (s != string::npos) {
- outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>");
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"”\"/>");
continue;
}
// -‘
target = "-‘";
s = outstring.find(target);
if (s != string::npos) {
- outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"‘\"/>");
continue;
}
// -’
target = "-’";
s = outstring.find(target);
if (s != string::npos) {
- outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>");
+ outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"’\"/>");
continue;
}
@@ -1009,182 +957,70 @@ void prepLine(string &outstring, int currentTestament, bool note) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
continue;
}
-
+ if (replaceFirst(outstring, "<B>", "<hi type=\"bold\">") > -1) continue;
+ if (replaceFirst(outstring, "</B>", "</hi>") > -1) continue;
+
+ if (replaceFirst(outstring, "L<\\>{ORD}</>'<\\>{S}</>", "<seg><divineName>Lord's</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD’S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S} </>", "<seg><divineName>Lord’s</divineName></seg> ") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>ORD</>’<\\>S<MH3068></>", "<seg><divineName>Lord’s<MH3068></divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD,}</>", "<seg><divineName>Lord</divineName></seg>,") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L<\\>{ORD} </>", "<seg><divineName>Lord</divineName></seg> ") > -1) continue;
+ if (replaceFirst(outstring, "L}<\\>{ORD}</>{", "<seg><divineName>Lord</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "L}<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>}") > -1) continue;
+ if (replaceFirst(outstring, "S<\\>{EN~OR}</>", "<seg><divineName>Sen~or</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "S<\\>{EÑOR}</>", "<seg><divineName>Señor</divineName></seg>") > -1) continue;
+ if (replaceFirst(outstring, "Y<\\>{AH,}</>", "<seg><divineName>Yah</divineName></seg>,") > -1) continue;
+ if (replaceFirst(outstring, "Y<\\>{AH,} </>", "<seg><divineName>Yah</divineName></seg>, ") > -1) continue;
+ if (replaceFirst(outstring, "Y<\\>{AH}</>", "<seg><divineName>Yah</divineName></seg>") > -1) continue;
+
+ // Do these first before Daniel Inscriptions
+ // LB = add macron, only with 'a': ā
+ if (replaceFirst(outstring, "a<LB>", "ā") > -1) continue;
+ if (replaceFirst(outstring, "E<LE>", "Ē") > -1) continue;
+ if (replaceFirst(outstring, "e<LE>", "ē") > -1) continue;
+
+ if (replaceFirst(outstring, "MENE", "<hi type=\"inscription\">Mene</hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENE</>", "<hi type=\"inscription\">Mene</hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENE:</>", "<hi type=\"inscription\">Mene</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "TEKEL", "<hi type=\"inscription\">Tekel</hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKEL</>", "<hi type=\"inscription\">Tekel</hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKEL:</>", "<hi type=\"inscription\">Tekel</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "PERES", "<hi type=\"inscription\">Peres</hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERES</>", "<hi type=\"inscription\">Peres</hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERES:</>", "<hi type=\"inscription\">Peres</hi>:") > -1) continue;
+
+ if (replaceFirst(outstring, "MENĒ", "<hi type=\"inscription\">Menē</hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENĒ</>", "<hi type=\"inscription\">Menē</hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENĒ<MH4484></>", "<hi type=\"inscription\">Menē<MH4484></hi>") > -1) continue;
+ if (replaceFirst(outstring, "M<\\>ENĒ:</>", "<hi type=\"inscription\">Menē</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "TEKĒL", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKĒL</>", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKĒL<MH8625b></>", "<hi type=\"inscription\">Tekēl<MH8625b></hi>") > -1) continue;
+ if (replaceFirst(outstring, "T<\\>EKĒL:</>", "<hi type=\"inscription\">Tekēl</hi>:") > -1) continue;
+ if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
+ if (replaceFirst(outstring, "U<\\>PHARSIN<MH6537b></>", "<hi type=\"inscription\">Ufarsin<MH6537b></hi>") > -1) continue;
+ if (replaceFirst(outstring, "PERĒS", "<hi type=\"inscription\">Perēs</hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERĒS</>", "<hi type=\"inscription\">Perēs</hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERĒS<MH6537b></>", "<hi type=\"inscription\">Perēs<MH6537b></hi>") > -1) continue;
+ if (replaceFirst(outstring, "P<\\>ERĒS:</>", "<hi type=\"inscription\">Perēs</hi>:") > -1) continue;
+
+ if (replaceFirst(outstring, "H<\\>OLY<MH6944> TO THE</> L<\\>ORD<MH3068></>",
+ "<hi type=\"inscription\">Holy<MH6944> to the L<\\>ORD<MH3068></></hi>") > -1) continue;
const char *outstr = outstring.c_str();
- const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/");
+ const char *found = strstr(outstr+end, "<\\>");
int start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 14, "<seg><divineName>Lord's</divineName></seg>");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "L\\{ORD}/’\\{S}/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 16, "<seg><divineName>Lord’s</divineName></seg>");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "L\\{ORD,}/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 9, "<seg><divineName>Lord</divineName></seg>,");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "L\\{ORD}/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 8, "<seg><divineName>Lord</divineName></seg>");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "S\\{EN~OR}/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 10, "<seg><divineName>Sen~or</divineName></seg>");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "S\\{EÑOR}/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 10, "<seg><divineName>Señor</divineName></seg>");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "Y\\{AH,}/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 8, "<seg><divineName>Yah</divineName></seg>,");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "Y\\{AH}/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 7, "<seg><divineName>Yah</divineName></seg>");
- continue;
- }
- // is this really valid markup? should 'also be' be in small
- // caps? 3 { and only 2 } ?
- outstr = outstring.c_str();
- found = strstr(outstr, "L\\{ORD {also be}/}");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 18, "<seg><divineName>Lord</divineName></seg> also be}");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "L\\{ORD {give}/}");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 15, "<seg><divineName>Lord</divineName></seg> give}");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "L\\{ORD {bless}/}");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 16, "<seg><divineName>Lord</divineName></seg> bless}");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 73, "<seg><divineName>Lord</divineName></seg> are my Refuge; You have made the Most High your dwelling place}");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "MENE");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 4, "<hi type=\"inscription\">Mene</hi>");
- continue;
- }
- outstr = outstring.c_str();
- found = strstr(outstr, "M\\ENE/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 6, "<hi type=\"inscription\">Mene</hi>");
- continue;
- }
- found = strstr(outstr, "M\\ENE:/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 7, "<hi type=\"inscription\">Mene</hi>:");
- continue;
- }
- found = strstr(outstr, "TEKEL");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 5, "<hi type=\"inscription\">Tekel</hi>");
- continue;
- }
- found = strstr(outstr, "T\\EKEL/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 7, "<hi type=\"inscription\">Tekel</hi>");
- continue;
- }
- found = strstr(outstr, "T\\EKEL:/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 8, "<hi type=\"inscription\">Tekel</hi>:");
- continue;
- }
- found = strstr(outstr, "UPHARSIN");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 8, "<hi type=\"inscription\">Upharsin</hi>");
- continue;
- }
- found = strstr(outstr, "UFARSIN");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 7, "<hi type=\"inscription\">Ufarsin</hi>");
- continue;
- }
- found = strstr(outstr, "U\\FARSIN/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 9, "<hi type=\"inscription\">Ufarsin</hi>");
- continue;
- }
- found = strstr(outstr, "PERES");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 5, "<hi type=\"inscription\">Peres</hi>");
- continue;
- }
- found = strstr(outstr, "P\\ERES/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 7, "<hi type=\"inscription\">Peres</hi>");
- continue;
- }
- found = strstr(outstr, "P\\ERES:/");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 8, "<hi type=\"inscription\">Peres</hi>:");
- continue;
- }
- // LB ??? Don't have info on this. Assuming '-'
- outstr = outstring.c_str();
- found = strstr(outstr, "<LB>");
- start = (found) ? (found - outstr) : -1;
- if (start > -1) {
- outstring.replace(start, 4, "-");
- continue;
- }
-
- found = strstr(outstr+end, "\\");
- start = (found) ? (found - outstr) : -1;
if (start > -1) {
- for (;start;start--) {
+ for (--start;start;start--) {
if ((!std::isupper(outstring[start])) &&
(!strchr("\\/ ~", outstring[start]))) {
break;
@@ -1198,32 +1034,22 @@ void prepLine(string &outstring, int currentTestament, bool note) {
else {
outstring.insert(start, "<seg><divineName>");
start += 17;
- const char *b = outstring.c_str();
- const char *found = strstr(b, "L\\{ORD}/");
- int s = (found) ? (found - b) : -1;
- if (s > -1)
- outstring.replace(s, 8, "Lord");
- end = s+4;
+
+ int s = replaceFirst(outstring, "L<\\>{ORD}</>", "Lord");
+ if (s > -1) end = s+4;
}
+
+ // do small cap logic
bool lower = false;
- bool token = false;
- for (int charLen = 1;start < (int)outstring.length(); start+=charLen) {
+ string token = "";
+ for (int charLen = 1; start < (int)outstring.length(); start += charLen) {
const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start;
const unsigned char *endChar = startChar;
SW_u32 testChar = getUniCharFromUTF8(&endChar, true);
charLen = endChar - startChar; // set the size of the UTF-8 sequence
- if (!token) {
- if (testChar == '\\') {
- lower = true;
- outstring.erase(start, 1);
- start--;
- continue;
- }
- if (testChar == '/') {
- lower = false;
- outstring.erase(start, 1);
- end = start;
- start--;
+ if (!token.size()) {
+ if (testChar == '<') {
+ token = "<";
continue;
}
// what is this? It screws MENE MENE up in Daniel
@@ -1238,10 +1064,40 @@ void prepLine(string &outstring, int currentTestament, bool note) {
continue;
}
}
- if (testChar == '>')
- token = false;
- if (testChar == '<')
- token = true;
+ else {
+ token += testChar;
+
+ if (testChar == '>') {
+ if (token == "<\\>") {
+ lower = true;
+ outstring.erase(start-2, 3);
+ start -= 3;
+ }
+ if (token == "</>") {
+ lower = false;
+ outstring.erase(start-2, 3);
+ end = start - 2;
+ start -= 3;
+ unsigned int nextStrongs = outstring.find("<M");
+ unsigned int nextUp = outstring.find("</>");
+ if (nextStrongs != string::npos && nextUp != string::npos && nextStrongs < nextUp) {
+ break;
+ }
+ }
+ // end divineName if we hit a PO in the middle
+ if (token == "<PO>") {
+ break;
+ }
+ unsigned int s = token.find("<N");
+ if (s == string::npos || s > 0) s = token.find("<R");
+ if (s == 0 && token.size() > 2) {
+ if (StringMgr::getSystemStringMgr()->isDigit(token[2])) {
+ break;
+ }
+ }
+ token = "";
+ }
+ }
}
if (currentTestament) {
outstring.insert(end, "</seg>");
@@ -1254,6 +1110,19 @@ void prepLine(string &outstring, int currentTestament, bool note) {
continue;
}
+ // these are places where we unnecessarily stop and then start otPassage
+ // we could make the otPassage logic work better, but these exception clean
+ // thing up for now.
+ if (replaceFirst(outstring, "</seg>’<seg type=\"otPassage\">s", "’s") > -1) continue;
+ if (replaceFirst(outstring, "</seg>-<seg type=\"otPassage\">", "-") > -1) continue;
+ if (replaceFirst(outstring, "</seg>,<seg type=\"otPassage\">", ",") > -1) continue;
+ if (replaceFirst(outstring, "</seg>, <seg type=\"otPassage\">", ", ") > -1) continue;
+ if (replaceFirst(outstring, "</seg>! <seg type=\"otPassage\">", "! ") > -1) continue;
+ if (replaceFirst(outstring, "</seg>; <seg type=\"otPassage\">", "; ") > -1) continue;
+ if (replaceFirst(outstring, "</seg> <seg type=\"otPassage\">", " ") > -1) continue;
+ if (replaceFirst(outstring, "</seg>, ‘<seg type=\"otPassage\">", ", ‘") > -1) continue;
+ if (replaceFirst(outstring, "</seg>,’ <seg type=\"otPassage\">", ",’ ") > -1) continue;
+
if (note) {
outstr = outstring.c_str();
found = strstr(outstr, "{");
@@ -1278,6 +1147,9 @@ void prepLine(string &outstring, int currentTestament, bool note) {
}
}
}
+
+// if (replaceFirst(outstring, ")</hi>", "</hi>)") > -1) continue;
+
break;
}
}
@@ -1309,10 +1181,10 @@ string getNoteBody(int fdn, string &noteLine, string osisID, int currentBookNo,
end++;
int book, chap;
string bkch = noteLine.substr(start+2, end-start-2);
- sscanf(bkch.c_str(), "%d:%d", &book, &chap);
+ sscanf(bkch.c_str(), "%d::%d", &book, &chap);
int vNumEnd = noteLine.find_first_of(" ", end);
int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str());
- if ((book != currentBookNo) || (chap != currentChapter) || (verse != currentVerse)) {
+ if ((book != currentBookNo) || (chap != currentChapter) || ((verse != currentVerse) && /*kindof allow case in header before verse marker */ (verse != currentVerse+1))) {
fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str());
exit(-1);
}
@@ -1334,7 +1206,9 @@ string getNoteBody(int fdn, string &noteLine, string osisID, int currentBookNo,
if (start > -1) {
start += tag.length();
- found = strstr(outstr+start, " <");
+ const char *nFound = strstr(outstr+start, " <N");
+ const char *rFound = strstr(outstr+start, " <R");
+ found = (nFound && (!rFound || nFound < rFound)) ? nFound : rFound;
int end = (found) ? (found - outstr) : -1;
if (end<0) end = noteLine.length();
retVal = noteLine.substr(start, end-start);