#include <ctype.h>
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <clocale>
#include <locale>
#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif
#include <iostream>
#include <string>
#ifndef O_BINARY
#define O_BINARY 0
#endif
#ifdef HAVESWORD
#include <versekey.h>
#include <localemgr.h>
#include <stringmgr.h>
using namespace sword;
#endif
using std::string;
using std::cout;
using std::endl;
static const char *osisOTBooks[] = {
"Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg",
"Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr",
"2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov",
"Eccl", "Song", "Isa", "Jer", "Lam",
"Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic",
"Nah", "Hab", "Zeph", "Hag", "Zech", "Mal",
// extra-Biblical
"Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth",
"EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan",
"Ps151", "Sir", "Tob", "Wis"};
static const char *osisNTBooks[] = {
"Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor",
"2Cor", "Gal", "Eph", "Phil", "Col",
"1Thess", "2Thess", "1Tim", "2Tim", "Titus",
"Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John",
"3John", "Jude", "Rev"};
static const char **osisBooks[] = { osisOTBooks, osisNTBooks };
const char nasbMax[2] = {39, 27};
char readline(int fd, char **buf) {
char ch;
if (*buf)
delete [] *buf;
*buf = 0;
int len;
long index = lseek(fd, 0, SEEK_CUR);
// clean up any preceding white space
while ((len = read(fd, &ch, 1)) == 1) {
if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t'))
break;
else index++;
}
while (ch != 10) {
if ((len = read(fd, &ch, 1)) != 1)
break;
}
int size = (lseek(fd, 0, SEEK_CUR) - index) - 1;
*buf = new char [ (size+2) * 2 ];
if (size > 0) {
lseek(fd, index, SEEK_SET);
read(fd, *buf, size);
read(fd, &ch, 1); //pop terminating char
(*buf)[size] = 0;
// clean up any trailing junk on buf
int buflen = strlen(*buf);
for (char *it = *buf+(buflen-1); it > *buf; it--) {
if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t'))
break;
else *it = 0;
}
// convert all spanish characters to combined
for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) {
switch (*it) {
/*
case 0xE2 : // ‘
if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) {
memmove(it, it+1, buflen - (it-(unsigned char *)*buf));
buflen--;
it[0] = 0xcc;
it[1] = 0x80; // yeah, I know it's already 0x80, but that's just a coincidence
}
else {
fprintf(stderr, "oddity: %s\n", *buf);
exit(-4);
}
break;
*/
case 0x60 : // `
if (isalpha(it[-1])) {
memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1);
buflen++;
it[0] = 0xcc;
it[1] = 0x80;
}
else {
// fprintf(stderr, "oddity: %s\n", *buf);
// exit(-4);
}
break;
case 0x7E : // ~
memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1);
buflen++;
it[0] = 0xcc;
it[1] = 0x83;
break;
}
}
}
else **buf = 0;
return !len;
}
void outHeader();
void outTrailer();
void unicodeTicks(string &outstring);
void prepLine(string &outstring, int currentTestament, bool note);
string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx);
int main(int argc, char **argv) {
#ifdef HAVESWORD
LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName("es");
#endif
std::setlocale(LC_CTYPE, "");
// Let's test our command line arguments
if (argc < 2) {
// fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]);
fprintf(stderr, "usage: %s <biblefile> [notesfile]\n\n", argv[0]);
exit(-1);
}
// Let's see if we can open our input file
int fd = open(argv[1], O_RDONLY|O_BINARY);
if (fd < 0) {
fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]);
exit(-2);
}
int fdn = -1;
if (argc > 2) {
fdn = open(argv[2], O_RDONLY|O_BINARY);
if (fdn < 0) {
fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]);
exit(-2);
}
}
outHeader();
string header;
char *buffer = 0;
char *nbuffer = 0;
int result = 0;
string currentBook = "";
int currentBookNo = 0;
int currentTestament = 0;
int currentChapter = 0;
int currentVerse = 0;
bool inBook = false;
bool inChapter = false;
bool inVerse = false;
string noteLine = "";
string preChapNote = "";
string outstring;
result = readline(fd, &buffer);
string lookahead = buffer;
unicodeTicks(lookahead);
prepLine(lookahead, currentTestament, false);
do {
result = readline(fd, &buffer);
if (lookahead.length()) {
string savebuf = buffer;
if (buffer)
delete [] buffer;
buffer = new char [ lookahead.length() + 1];
strcpy(buffer, lookahead.c_str());
lookahead = savebuf;
unicodeTicks(lookahead);
prepLine(lookahead, currentTestament, false);
result = 0;
}
else if (!result) {
string savebuf = buffer;
result = readline(fd, &buffer);
lookahead = buffer;
unicodeTicks(lookahead);
prepLine(lookahead, currentTestament, false);
if (buffer)
delete [] buffer;
buffer = new char [ savebuf.length() + 1];
strcpy(buffer, savebuf.c_str());
result = 0;
}
outstring = buffer;
// BOOK NAMES <BN>
if (!strncmp(outstring.c_str(), "<BN>", 4)) {
string book = outstring.c_str()+4;
book = book.substr(0, book.find_first_of("<"));
outstring = "";
if (inVerse) {
outstring += "</verse>";
inVerse = false;
}
if (inChapter) {
outstring += "</chapter>";
inChapter = false;
}
if (inBook) {
outstring += "</div>";
inBook = false;
}
outstring += (string)"<div type=\"book\" osisID=\"";
VerseKey bookName(book.c_str());
if (bookName.popError()) {
fprintf(stderr, "error: %s: couldn't find book match for %s. Please check book array in conversion program.\n", argv[0], book.c_str());
exit(-3);
}
currentBook = bookName.getOSISBookName();
outstring += currentBook;
currentTestament = bookName.getTestament()-1;
outstring += (string)"\"><title type=\"main\" subType=\"x-Book\">" + book + "</title>";
inBook = true;
}
// CHAPTERS
//<SN>PSALM
if ((!strncmp(outstring.c_str(), "<CN>", 4)) || (!strncmp(outstring.c_str(), "<SN>", 4))) {
string chapterTitle = outstring.c_str()+4;
chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<"));
string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1);
outstring = "";
if (inVerse) {
outstring += "</verse>";
inVerse = false;
}
if (inChapter) {
outstring += "</chapter>";
inChapter = false;
}
outstring += (string)"<chapter osisID=\"" + currentBook + "." + chapter + "\">";
outstring += (string)"<title type=\"sub\" subType=\"x-Chapter\">" + chapterTitle + "</title>";
currentChapter = atoi(chapter.c_str());
inChapter = true;
currentVerse = 1;
}
//<SF>
if (!strncmp(outstring.c_str(), "<SF>", 4)) {
string heading = outstring.c_str()+4;
heading = heading.substr(0, heading.find("</SF>"));
outstring = "";
if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
lookahead.erase(0, 4);
outstring += "<milestone type=\"line\" subType=\"x-PM\"/>";
}
if (inVerse) {
outstring += "</verse>\n";
inVerse = false;
}
outstring += (string)"<title type=\"acrostic\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>";
}
//<SH>
if (!strncmp(outstring.c_str(), "<SH>", 4)) {
string heading = outstring.c_str()+4;
heading = heading.substr(0, heading.find("</SH>"));
outstring = "";
if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
lookahead.erase(0, 4);
outstring += "<milestone type=\"line\" subType=\"x-PM\"/>";
}
if (inVerse) {
outstring += "</verse>\n";
inVerse = false;
}
outstring += (string)"<title type=\"section\" subType=\"x-preverse\">" + heading + (string)"</title>";
}
if (!strncmp(outstring.c_str(), "<SS>", 4)) {
string heading = (outstring.c_str()+4);
heading = heading.substr(0, heading.find("</SS>"));
outstring = (string)"<title type=\"psalm\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>";
}
if (!strncmp(outstring.c_str(), "<SB>", 4)) {
string heading = (outstring.c_str()+4);
heading = heading.substr(0, heading.find("</SB>"));
outstring = (string)"<title type=\"scope\" subType=\"x-preverse\">" + heading + (string)"</title>";
}
// {{x:y}}
// DUH, find_first_of looks for the first occurance of ANY single character of the supplied string
// int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it!
const char *outstr = outstring.c_str();
const char *found = strstr(outstr, "{{");
int start = (found) ? (found - outstr) : -1;
// ---- end of whacked replacement
if (start > -1) {
found = strstr(outstr, "}}");
int end = (found) ? (found - outstr) : -1;
end++;
int testmt = 0, book = 0, chap = 0;
string bkch = outstring.substr(start+2, end-start-2);
sscanf(bkch.c_str(), "%d:%d", &book, &chap);
currentChapter = chap;
int vNumEnd = outstring.find_first_of(" ", end);
currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str());
currentBookNo = book;
if (book > nasbMax[0]) {
testmt = 1;
book -= nasbMax[0];
}
if (currentBook != osisBooks[testmt][book-1]) {
fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d:%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str());
exit(-3);
}
char chapString[20], verseString[20];
sprintf(chapString, "%d", currentChapter);
sprintf(verseString, "%d", currentVerse);
string newstring = "";
if (inVerse) {
newstring += "</verse>";
inVerse = false;
}
newstring += "<verse osisID=\"" + currentBook + (string)"." + (string)chapString + (string)"." + (string)verseString + (string) "\">";
outstring.replace(start, vNumEnd-start+1, newstring);
inVerse = true;
noteLine = preChapNote;
preChapNote = "";
}
// multiple occurances on a line stuff
while (1) {
// NOTE
outstr = outstring.c_str();
found = strstr(outstr, "<N");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
int end = outstring.find_first_of(">", start+1);
string nStr = outstring.substr(start+2, end-start-2);
// NOTE <N#>
if (isdigit(nStr.c_str()[0]) || strchr("ABCDEFG", nStr.c_str()[0])) {
bool preChap = strchr("A", nStr.c_str()[0]);
if (preChap) noteLine = "";
char chapString[20], verseString[20];
sprintf(chapString, "%d", currentChapter);
sprintf(verseString, "%d", currentVerse);
string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str();
string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N");
if (preChap) preChapNote = noteLine;
outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>");
continue;
}
}
// <RS>
outstr = outstring.c_str();
found = strstr(outstr, "<RS>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<q who=\"Jesus\">");
continue;
}
// <RS>
outstr = outstring.c_str();
found = strstr(outstr, "</RS>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 5, (string)"</q>");
continue;
}
// <RT>
outstr = outstring.c_str();
found = strstr(outstr, "<RT>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-RT\"/>");
continue;
}
// <?>
outstr = outstring.c_str();
found = strstr(outstr, "<?>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 3, (string)"¿");
continue;
}
// <!>
outstr = outstring.c_str();
found = strstr(outstr, "<!>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 3, (string)"¡");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "<R");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
int end = outstring.find_first_of(">", start+1);
string nStr = outstring.substr(start+2, end-start-2);
char chapString[20], verseString[20];
sprintf(chapString, "%d", currentChapter);
sprintf(verseString, "%d", currentVerse);
string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str();
string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R");
outstring.replace(start, end-start+1, (string)"<note type=\"crossReference\" osisID=\"" + osisID + (string)".xref." + nStr + (string) "\" n=\"" + nStr + (string)"\">" + noteBody + "</note>");
continue;
}
// transChange added {}
outstr = outstring.c_str();
found = strstr(outstr, "{");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, (string)"<transChange type=\"added\">");
size_t end = outstring.find_first_of("}", start+1);
if (end != string::npos) {
outstring.erase(end, 1);
}
else end = outstring.size()-1;
while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--;
outstring.insert(end+1, "</transChange>");
continue;
}
/*
// transChange tenseChange *
outstr = outstring.c_str();
found = strstr(outstr, "*");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, (string)"<transChange type=\"tenseChanged\">");
for (end = start + 34; (end < outstring.length()); end++) {
if ((!isalpha(outstring[end])) &&
(outstring[end] != '\''))
break;
}
outstring.replace(end, 1, "</transChange>");
continue;
}
*/
// <,>
outstr = outstring.c_str();
found = strstr(outstr, "<,>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 3, (string)"<milestone type=\"x-superiorComma\"/>");
continue;
}
// <NA>
outstr = outstring.c_str();
found = strstr(outstr, "<NA>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-NA\"/>");
continue;
}
// <NB>
outstr = outstring.c_str();
found = strstr(outstr, "<NB>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-NB\"/>");
continue;
}
// <NC>
outstr = outstring.c_str();
found = strstr(outstr, "<NC>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-NC\"/>");
continue;
}
// paragraph break <PM>
outstr = outstring.c_str();
found = strstr(outstr, "<PM>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PM\"/>");
continue;
}
// poetry break <PN>
outstr = outstring.c_str();
found = strstr(outstr, "<PN>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-PN\" />");
continue;
}
// poetry break <PO>
outstr = outstring.c_str();
found = strstr(outstr, "<PO>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PO\" />");
continue;
}
// poetry break <PE>
outstr = outstring.c_str();
found = strstr(outstr, "<PE>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PE\" />");
continue;
}
// letter indent <HL>
outstr = outstring.c_str();
found = strstr(outstr, "<HL>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-HL\" />");
continue;
}
break;
}
int strongsStart = 0;
int transChangeStart = 0;
bool strongsFound = false;
bool intoken = false;
bool intag = false;
bool inNote = false;
int tokenStart = 0;
string lastToken = "";
string previousToken = "";
int tenseChange = -1;
// strongs numbers
for (unsigned int i = 0; i < outstring.length(); i++) {
if ((!inNote) && (!intoken) && (!intag) && (!strongsFound)) {
if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
strongsStart = i;
strongsFound = true;
}
}
if (outstring[i] =='*')
tenseChange = i;
if (outstring[i] == '<') { tokenStart = i+1; intoken = true; }
if (outstring[i] == '>') {
intoken = false;
previousToken = lastToken;
lastToken = outstring.substr(tokenStart, i-tokenStart);
// Not completely safe, but works for current NASB data
if (strchr(lastToken.c_str(), '/'))
intag = false;
else intag = true;
if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) {
transChangeStart = i+1;
}
/*
if (!strncmp(lastToken.c_str(), "seg", 3)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "divineName", 10)) {
strongsFound = false;
strongsStart = i+1;
}
*/
if (!strncmp(lastToken.c_str(), "/divineName", 10)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "note", 4)) {
strongsFound = false;
strongsStart = i+1;
inNote = true;
}
if (!strncmp(lastToken.c_str(), "/note", 5)) {
strongsFound = false;
strongsStart = i+1;
inNote = false;
}
if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "/transChange", 12)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "milestone", 9)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "/seg", 4)) {
strongsFound = false;
strongsStart = i+1;
}
if ((!strncmp(lastToken.c_str(), "verse", 5))) {
intag = false;
}
if ( (!strncmp(lastToken.c_str(), "MG", 2)) ||
(!strncmp(lastToken.c_str(), "MH", 2))) {
// insert </w>
// fix tenseChange to be inside <w> so we can include a subset of the <w> content.
outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "</w></transChange>":"</w>"));
i = (tokenStart-1) + ((tenseChange > -1) ? 18:4);
// build <w ... > tag
char lang = lastToken[1]; // H or G
lastToken.replace(0, 1, "<w lemma=\"strong:");
while ((start = lastToken.find(", ")) > -1) {
lastToken.replace(start, 2, (string)" strong:" + lang);
}
lastToken += "\">";
intag = false;
if (tenseChange > -1) {
lastToken.insert(0, "<transChange type=\"tenseChange\">");
}
if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) {
outstring.insert(transChangeStart, lastToken);
intag = true;
i += lastToken.length() - 1; // (-1 because we're about to i++)
}
// insert our token
else {
outstring.insert(strongsStart, lastToken);
i += lastToken.length() - 1; // (-1 because we're about to i++)
}
strongsStart = i+1;
strongsFound = false;
if (tenseChange > -1) {
// relocate because position may have changed from all the token inserts
const char *buf = outstring.c_str();
tenseChange = (strchr(buf, '*') - buf);
outstring.erase(tenseChange, 1);
tenseChange = -1;
}
}
}
}
// clean up stuff that didn't work quite right
while (1) {
// divineName strongs tags misorderings
string target = "</w></divineName></seg>";
size_t s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "</divineName></seg></w>");
continue;
}
target = "</w>,</divineName></seg>";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "</divineName></seg></w>,");
continue;
}
break;
}
std::cout << outstring;
if (!result) std::cout << "\n";
}
while (!result);
outstring = "";
if (inVerse) {
outstring += "</verse>";
inVerse = false;
}
if (inChapter) {
outstring += "</chapter>";
inChapter = false;
}
if (inBook) {
outstring += "</div>";
inBook = false;
}
std::cout << outstring;
outTrailer();
// clean up our buffers that readline might have allocated
if (buffer)
delete [] buffer;
if (nbuffer)
delete [] nbuffer;
close(fd);
if (fdn > -1)
close(fdn);
}
void outHeader() {
std::cout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << "\n";
std::cout << "<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.1.9.xsd\">" << "\n";
std::cout << " <osisText osisIDWork=\"nasb\" xml:lang=\"en\">" << "\n";
std::cout << " <header>" << "\n";
std::cout << " <work osisWork=\"nasb\">" << "\n";
std::cout << " <title>NEW AMERICAN STANDARD BIBLE</title>" << "\n";
std::cout << " <identifier type=\"OSIS\">Bible.en.NASB.1995</identifier>" << "\n";
std::cout << " <rights>Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION</rights>" << "\n";
std::cout << " <refSystem>Bible</refSystem>" << "\n";
std::cout << " </work>" << "\n";
std::cout << " <work osisWork=\"strongs\">" << "\n";
std::cout << " </work>" << "\n";
std::cout << " </header>" << "\n";
}
void outTrailer() {
std::cout << "</osisText>\n";
std::cout << "</osis>\n";
}
void unicodeTicks(string &outstring) {
while (1) {
const char *outstr;
const char *found;
int start;
outstr = outstring.c_str();
found = strstr(outstr, "``");
char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0;
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 2, "“");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "`");
uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0;
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, uchar);
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "'");
uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0;
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, uchar);
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "\"");
uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0;
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, uchar);
continue;
}
break;
}
}
void prepLine(string &outstring, int currentTestament, bool note) {
int end = 0;
while (1) {
// ------------------------------------------
// redundant markers
size_t s;
// <V>
s = outstring.find("<V>");
if (s != string::npos) {
outstring.erase(s, 3);
continue;
}
// <P>
s = outstring.find("<P>");
if (s != string::npos) {
outstring.erase(s, 3);
continue;
}
// <C>
s = outstring.find("<C>");
if (s != string::npos) {
outstring.erase(s, 3);
continue;
}
// <CC>
s = outstring.find("<CC>");
if (s != string::npos) {
outstring.erase(s, 4);
continue;
}
// <CP>
s = outstring.find("<CP>");
if (s != string::npos) {
outstring.erase(s, 4);
continue;
}
// <$F...>>
s = outstring.find("<$F");
if (s != string::npos) {
size_t e = outstring.find(">>", s);
outstring.erase(s, e-s+2);
continue;
}
// ----------------------------------------------
// <A>
s = outstring.find("<A>");
if (s != string::npos) {
outstring.replace(s, 3, "<milestone type=\"line\" subType=\"x-A\"/>");
continue;
}
// ~“
char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0;
// string target = "~“";
string target = uchar;
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
continue;
}
// +«
target = "+«";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>");
continue;
}
// +»
target = "+»";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>");
continue;
}
// +“
target = "+“";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
continue;
}
// +”
target = "+”";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>");
continue;
}
// +‘
target = "+‘";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
continue;
}
// +’
target = "+’";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>");
continue;
}
// -«
target = "-«";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>");
continue;
}
// -»
target = "-»";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>");
continue;
}
// -“
target = "-“";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
continue;
}
// -”
target = "-”";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>");
continue;
}
// -‘
target = "-‘";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
continue;
}
// -’
target = "-’";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>");
continue;
}
// ~‘
uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x98; uchar[5]=0;
// target = "~‘";
target = uchar;
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
continue;
}
const char *outstr = outstring.c_str();
const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/");
int start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 14, "<seg><divineName>Lord's</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD}/’\\{S}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 16, "<seg><divineName>Lord’s</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD,}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 9, "<seg><divineName>Lord</divineName></seg>,");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "<seg><divineName>Lord</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "S\\{EN~OR}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 10, "<seg><divineName>Sen~or</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "S\\{EÑOR}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 10, "<seg><divineName>Señor</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "Y\\{AH,}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "<seg><divineName>Yah</divineName></seg>,");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "Y\\{AH}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 7, "<seg><divineName>Yah</divineName></seg>");
continue;
}
// is this really valid markup? should 'also be' be in small
// caps? 3 { and only 2 } ?
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD {also be}/}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 18, "<seg><divineName>Lord</divineName></seg> also be}");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD {give}/}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 15, "<seg><divineName>Lord</divineName></seg> give}");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD {bless}/}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 16, "<seg><divineName>Lord</divineName></seg> bless}");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 73, "<seg><divineName>Lord</divineName></seg> are my Refuge; You have made the Most High your dwelling place}");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "MENE");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, "<hi type=\"inscription\">Mene</hi>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "M\\ENE/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 6, "<hi type=\"inscription\">Mene</hi>");
continue;
}
found = strstr(outstr, "M\\ENE:/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 7, "<hi type=\"inscription\">Mene</hi>:");
continue;
}
found = strstr(outstr, "TEKEL");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 5, "<hi type=\"inscription\">Tekel</hi>");
continue;
}
found = strstr(outstr, "T\\EKEL/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 7, "<hi type=\"inscription\">Tekel</hi>");
continue;
}
found = strstr(outstr, "T\\EKEL:/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "<hi type=\"inscription\">Tekel</hi>:");
continue;
}
found = strstr(outstr, "UPHARSIN");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "<hi type=\"inscription\">Upharsin</hi>");
continue;
}
found = strstr(outstr, "UFARSIN");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 7, "<hi type=\"inscription\">Ufarsin</hi>");
continue;
}
found = strstr(outstr, "U\\FARSIN/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 9, "<hi type=\"inscription\">Ufarsin</hi>");
continue;
}
found = strstr(outstr, "PERES");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 5, "<hi type=\"inscription\">Peres</hi>");
continue;
}
found = strstr(outstr, "P\\ERES/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 7, "<hi type=\"inscription\">Peres</hi>");
continue;
}
found = strstr(outstr, "P\\ERES:/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "<hi type=\"inscription\">Peres</hi>:");
continue;
}
// LB ??? Don't have info on this. Assuming '-'
outstr = outstring.c_str();
found = strstr(outstr, "<LB>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, "-");
continue;
}
found = strstr(outstr+end, "\\");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
for (;start;start--) {
if ((!std::isupper(outstring[start])) &&
(!strchr("\\/ ~", outstring[start]))) {
break;
}
}
for (start++; outstring[start] == ' '; start++);
if (currentTestament) {
outstring.insert(start, "<seg type=\"otPassage\">");
start += 22;
}
else {
outstring.insert(start, "<seg><divineName>");
start += 17;
const char *b = outstring.c_str();
const char *found = strstr(b, "L\\{ORD}/");
int s = (found) ? (found - b) : -1;
if (s > -1)
outstring.replace(s, 8, "Lord");
end = s+4;
}
bool lower = false;
bool token = false;
for (int charLen = 1;start < (int)outstring.length(); start+=charLen) {
const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start;
const unsigned char *endChar = startChar;
SW_u32 testChar = getUniCharFromUTF8(&endChar, true);
charLen = endChar - startChar; // set the size of the UTF-8 sequence
if (!token) {
if (testChar == '\\') {
lower = true;
outstring.erase(start, 1);
start--;
continue;
}
if (testChar == '/') {
lower = false;
outstring.erase(start, 1);
end = start;
start--;
continue;
}
// what is this? It screws MENE MENE up in Daniel
// if (testChar == ':')
// break;
if (StringMgr::getSystemStringMgr()->isAlpha(testChar)) {
if (StringMgr::getSystemStringMgr()->isLower(testChar))
break;
if (lower)
outstring.replace(start, charLen, SWBuf((const char *)startChar, charLen).toLower());
continue;
}
}
if (testChar == '>')
token = false;
if (testChar == '<')
token = true;
}
if (currentTestament) {
outstring.insert(end, "</seg>");
end+=6;
}
else {
outstring.insert(end, "</divineName></seg>");
end+=19;
}
continue;
}
if (note) {
outstr = outstring.c_str();
found = strstr(outstr, "{");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, "<hi type=\"italic\">");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, "</hi>");
continue;
}
s = outstring.find("</reference></hi>");
if (s != string::npos) {
const size_t s2 = outstring.find("<hi type=\"italic\"><reference");
if (s2 == string::npos) {
outstring.replace(s, 17, "</hi></reference>");
continue;
}
}
}
break;
}
}
string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) {
char *nbuffer = 0;
int start = -1;
const char *found = (const char *)-1;
const char *outstr = (const char *)-1;
while (start == -1) {
if (!noteLine.length() && fdn > -1) {
if (readline(fdn, &nbuffer)) return ""; // eof
noteLine = nbuffer;
}
outstr = noteLine.c_str();
found = strstr(outstr, "{{");
start = (found) ? (found - outstr) : -1;
// be sure we have at least one of these. We've found note lines without any actual notes
if (found) found = strstr(outstr, "<R");
if (!found) found = strstr(outstr, "<N");
if (!found) start = -1;
if (start == -1) noteLine = "";
}
if (start > -1) {
found = strstr(outstr, "}}");
int end = (found) ? (found - outstr) : -1;
end++;
int book, chap;
string bkch = noteLine.substr(start+2, end-start-2);
sscanf(bkch.c_str(), "%d:%d", &book, &chap);
int vNumEnd = noteLine.find_first_of(" ", end);
int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str());
if ((book != currentBookNo) || (chap != currentChapter) || (verse != currentVerse)) {
fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str());
exit(-1);
}
}
else {
fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str());
exit(-1);
}
outstr = noteLine.c_str();
string tag = (string)"<"+(string)nx+nStr+(string)">";
found = strstr(outstr, tag.c_str());
start = (found) ? (found - outstr) : -1;
string retVal = "";
if (start > -1) {
start += tag.length();
found = strstr(outstr+start, " <");
int end = (found) ? (found - outstr) : -1;
if (end<0) end = noteLine.length();
retVal = noteLine.substr(start, end-start);
}
unicodeTicks(retVal);
#ifdef HAVESWORD
if (*nx == 'R') {
// } { get's deleted. e.g. {Luke} {9:10-17}
outstr = retVal.c_str();
found = strstr(outstr, "} {");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
retVal.replace(start, 3, " ");
}
outstr = retVal.c_str();
found = strstr(outstr, ";}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
retVal.replace(start, 2, "};");
}
VerseKey key = osisID.c_str();
//std::cerr << osisID.c_str() << ": Convert to OSISRef: " << retVal.c_str();
retVal = VerseKey::convertToOSIS(retVal.c_str(), &key);
//std::cerr << ": " << retVal.c_str();
}
#endif
prepLine(retVal, 0, true);
if (nbuffer)
delete [] nbuffer;
//std::cerr << ": " << retVal.c_str() << "\n";
return retVal;
}