#include <ctype.h>
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif
#include <iostream>
#include <string>
#ifndef O_BINARY
#define O_BINARY 0
#endif
#ifdef HAVESWORD
#include <versekey.h>
using namespace sword;
#endif
using std::string;
using std::cout;
using std::endl;
static const char *osisOTBooks[] = {
"Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg",
"Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr",
"2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov",
"Eccl", "Song", "Isa", "Jer", "Lam",
"Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic",
"Nah", "Hab", "Zeph", "Hag", "Zech", "Mal",
// extra-Biblical
"Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth",
"EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan",
"Ps151", "Sir", "Tob", "Wis"};
static const char *osisNTBooks[] = {
"Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor",
"2Cor", "Gal", "Eph", "Phil", "Col",
"1Thess", "2Thess", "1Tim", "2Tim", "Titus",
"Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John",
"3John", "Jude", "Rev"};
static const char **osisBooks[] = { osisOTBooks, osisNTBooks };
const char osisMax[2] = {57, 27};
static const char *nasbOTBooks[] = {
"GENESIS", "EXODO", "LEVITICO", "NUMEROS", "DEUTERONOMIO", "JOSUE", "JUECES",
"RUT", "1 SAMUEL", "2 SAMUEL", "1 REYES", "2 REYES", "1 CRONICAS", "2 CRONICAS",
"ESDRAS", "NEHEMIAS", "ESTER", "JOB", "LOS SALMOS", "LOS PROVERBIOS", "ECLESIASTES",
"CANTARES", "ISAIAS", "JEREMIAS", "LAMENTACIONES", "EZEQUIEL", "DANIEL", "OSEAS",
"JOEL", "AMOS", "ABDIAS", "JONAS", "MIQUEAS", "NAHUM", "HABACUC",
"SOFONIAS", "HAGEO", "ZACARIAS", "MALAQUIAS"
/*
"GENESIS", "EXODUS", "LEVITICUS", "NUMBERS", "DEUTERONOMY", "JOSHUA", "JUDGES",
"RUTH", "1 SAMUEL", "2 SAMUEL", "1 KINGS", "2 KINGS", "1 CHRONICLES", "2 CHRONICLES",
"EZRA", "NEHEMIAH", "ESTHER", "JOB", "PSALMS", "PROVERBS", "ECCLESIASTES",
"SONG OF SOLOMON", "ISAIAH", "JEREMIAH", "LAMENTATIONS", "EZEKIEL", "DANIEL", "HOSEA",
"JOEL", "AMOS", "OBADIAH", "JONAH", "MICAH", "NAHUM", "HABAKKUK",
"ZEPHANIAH", "HAGGAI", "ZECHARIAH", "MALACHI"
*/
};
static const char *nasbNTBooks[] = {
"SAN MATEO", "SAN MARCOS", "SAN LUCAS", "SAN JUAN", "HECHOS", "ROMANOS", "1 CORINTIOS",
"2 CORINTIOS", "GALATAS", "EFESIOS", "FILIPENSES", "COLOSENSES", "1 TESALONICENSES", "2 TESALONICENSES",
"1 TIMOTEO", "2 TIMOTEO", "TITO", "FILEMON", "HEBREOS", "SANTIAGO", "1 SAN PEDRO",
"2 SAN PEDRO", "1 SAN JUAN", "2 SAN JUAN", "3 SAN JUAN", "SAN JUDAS", "EL APOCALIPSIS"
/*
"MATTHEW", "MARK", "LUKE", "JOHN", "ACTS", "ROMANS", "1 CORINTHIANS",
"2 CORINTHIANS", "GALATIANS", "EPHESIANS", "PHILIPPIANS", "COLOSSIANS", "1 THESSALONIANS", "2 THESSALONIANS",
"1 TIMOTHY", "2 TIMOTHY", "TITUS", "PHILEMON", "HEBREWS", "JAMES", "1 PETER",
"2 PETER", "1 JOHN", "2 JOHN", "3 JOHN", "JUDE", "REVELATION"
*/
};
static const char **nasbBooks[] = { nasbOTBooks, nasbNTBooks };
const char nasbMax[2] = {39, 27};
char readline(int fd, char **buf) {
char ch;
if (*buf)
delete [] *buf;
*buf = 0;
int len;
long index = lseek(fd, 0, SEEK_CUR);
// clean up any preceding white space
while ((len = read(fd, &ch, 1)) == 1) {
if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t'))
break;
else index++;
}
while (ch != 10) {
if ((len = read(fd, &ch, 1)) != 1)
break;
}
int size = (lseek(fd, 0, SEEK_CUR) - index) - 1;
*buf = new char [ (size+2) * 2 ];
if (size > 0) {
lseek(fd, index, SEEK_SET);
read(fd, *buf, size);
read(fd, &ch, 1); //pop terminating char
(*buf)[size] = 0;
// clean up any trailing junk on buf
int buflen = strlen(*buf);
for (char *it = *buf+(buflen-1); it > *buf; it--) {
if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t'))
break;
else *it = 0;
}
// convert all spanish characters to combined
for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) {
switch (*it) {
/*
case 0xE2 : // ‘
if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) {
memmove(it, it+1, buflen - (it-(unsigned char *)*buf));
buflen--;
it[0] = 0xcc;
it[1] = 0x80; // yeah, I know it's already 0x80, but that's just a coincidence
}
else {
fprintf(stderr, "oddity: %s\n", *buf);
exit(-4);
}
break;
*/
case 0x60 : // `
if (isalpha(it[-1])) {
memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1);
buflen++;
it[0] = 0xcc;
it[1] = 0x80;
}
else {
// fprintf(stderr, "oddity: %s\n", *buf);
// exit(-4);
}
break;
case 0x7E : // ~
memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1);
buflen++;
it[0] = 0xcc;
it[1] = 0x83;
break;
}
}
}
else **buf = 0;
return !len;
}
void outHeader();
void outTrailer();
void unicodeTicks(string &outstring);
void prepLine(string &outstring, int currentTestament, bool note);
string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx);
int main(int argc, char **argv) {
// Let's test our command line arguments
if (argc < 2) {
// fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]);
fprintf(stderr, "usage: %s <biblefile> [notesfile]\n\n", argv[0]);
exit(-1);
}
// Let's see if we can open our input file
int fd = open(argv[1], O_RDONLY|O_BINARY);
if (fd < 0) {
fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]);
exit(-2);
}
int fdn = -1;
if (argc > 2) {
fdn = open(argv[2], O_RDONLY|O_BINARY);
if (fdn < 0) {
fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]);
exit(-2);
}
}
outHeader();
string header;
char *buffer = 0;
char *nbuffer = 0;
int result = 0;
string currentBook = "";
int currentBookNo = 0;
int currentTestament = 0;
int currentChapter = 0;
int currentVerse = 0;
bool inBook = false;
bool inChapter = false;
bool inVerse = false;
string noteLine = "";
string outstring;
result = readline(fd, &buffer);
string lookahead = buffer;
unicodeTicks(lookahead);
prepLine(lookahead, currentTestament, false);
do {
int i, j;
result = readline(fd, &buffer);
if (lookahead.length()) {
string savebuf = buffer;
if (buffer)
delete [] buffer;
buffer = new char [ lookahead.length() + 1];
strcpy(buffer, lookahead.c_str());
lookahead = savebuf;
unicodeTicks(lookahead);
prepLine(lookahead, currentTestament, false);
result = 0;
}
else if (!result) {
string savebuf = buffer;
result = readline(fd, &buffer);
lookahead = buffer;
unicodeTicks(lookahead);
prepLine(lookahead, currentTestament, false);
if (buffer)
delete [] buffer;
buffer = new char [ savebuf.length() + 1];
strcpy(buffer, savebuf.c_str());
result = 0;
}
outstring = buffer;
// BOOK NAMES <BN>
if (!strncmp(outstring.c_str(), "<BN>", 4)) {
string book = outstring.c_str()+4;
book = book.substr(0, book.find_first_of("<"));
outstring = "";
if (inVerse) {
outstring += "</verse>";
inVerse = false;
}
if (inChapter) {
outstring += "</chapter>";
inChapter = false;
}
if (inBook) {
outstring += "</div>";
inBook = false;
}
outstring += (string)"<div type=\"book\" osisID=\"";
for (i = 0; i < 2; i++) {
for (j = 0; j < nasbMax[i]; j++) {
if (book == nasbBooks[i][j]) {
currentBook = osisBooks[i][j];
outstring += currentBook;
currentTestament = i;
break;
}
}
if (j < nasbMax[i])
break;
}
if (i > 1) {
fprintf(stderr, "error: %s: couldn't find book match for %s. Please check book array in conversion program.\n", argv[0], book.c_str());
exit(-3);
}
outstring += (string)"\"><title type=\"main\" subType=\"x-Book\">" + book + "</title>";
inBook = true;
}
// CHAPTERS
//<SN>PSALM
if ((!strncmp(outstring.c_str(), "<CN>", 4)) || (!strncmp(outstring.c_str(), "<SN>", 4))) {
string chapterTitle = outstring.c_str()+4;
chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<"));
string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1);
outstring = "";
if (inVerse) {
outstring += "</verse>";
inVerse = false;
}
if (inChapter) {
outstring += "</chapter>";
inChapter = false;
}
outstring += (string)"<chapter osisID=\"" + currentBook + "." + chapter + "\">";
outstring += (string)"<title type=\"sub\" subType=\"x-Chapter\">" + chapterTitle + "</title>";
currentChapter = atoi(chapter.c_str());
inChapter = true;
}
//<SF>
if (!strncmp(outstring.c_str(), "<SF>", 4)) {
string heading = outstring.c_str()+4;
heading = heading.substr(0, heading.find("</SF>"));
outstring = "";
if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
lookahead.erase(0, 4);
outstring += "<milestone type=\"line\" subType=\"x-PM\"/>";
}
if (inVerse) {
outstring += "</verse>\n";
inVerse = false;
}
outstring += (string)"<title type=\"acrostic\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>";
}
//<SH>
if (!strncmp(outstring.c_str(), "<SH>", 4)) {
string heading = outstring.c_str()+4;
heading = heading.substr(0, heading.find("</SH>"));
outstring = "";
if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
lookahead.erase(0, 4);
outstring += "<milestone type=\"line\" subType=\"x-PM\"/>";
}
if (inVerse) {
outstring += "</verse>\n";
inVerse = false;
}
outstring += (string)"<title type=\"section\" subType=\"x-preverse\">" + heading + (string)"</title>";
}
if (!strncmp(outstring.c_str(), "<SS>", 4)) {
string heading = (outstring.c_str()+4);
heading = heading.substr(0, heading.find("</SS>"));
outstring = (string)"<title type=\"psalm\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>";
}
if (!strncmp(outstring.c_str(), "<SB>", 4)) {
string heading = (outstring.c_str()+4);
heading = heading.substr(0, heading.find("</SB>"));
outstring = (string)"<title type=\"scope\" subType=\"x-preverse\">" + heading + (string)"</title>";
}
// {{x:y}}
// DUH, find_first_of looks for the first occurance of ANY single character of the supplied string
// int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it!
const char *outstr = outstring.c_str();
const char *found = strstr(outstr, "{{");
int start = (found) ? (found - outstr) : -1;
// ---- end of whacked replacement
if (start > -1) {
found = strstr(outstr, "}}");
int end = (found) ? (found - outstr) : -1;
end++;
int testmt = 0, book, chap;
string bkch = outstring.substr(start+2, end-start-2);
sscanf(bkch.c_str(), "%d:%d", &book, &chap);
currentChapter = chap;
int vNumEnd = outstring.find_first_of(" ", end);
currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str());
currentBookNo = book;
if (book > nasbMax[0]) {
testmt = 1;
book -= nasbMax[0];
}
if (currentBook != osisBooks[testmt][book-1]) {
fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d:%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str());
exit(-3);
}
char chapString[20], verseString[20];
sprintf(chapString, "%d", currentChapter);
sprintf(verseString, "%d", currentVerse);
string newstring = "";
if (inVerse) {
newstring += "</verse>";
inVerse = false;
}
newstring += "<verse osisID=\"" + currentBook + (string)"." + (string)chapString + (string)"." + (string)verseString + (string) "\">";
outstring.replace(start, vNumEnd-start+1, newstring);
inVerse = true;
noteLine = "";
}
// multiple occurances on a line stuff
while (1) {
// NOTE
outstr = outstring.c_str();
found = strstr(outstr, "<N");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
int end = outstring.find_first_of(">", start+1);
string nStr = outstring.substr(start+2, end-start-2);
// NOTE <N#>
if (isdigit(nStr.c_str()[0])) {
char chapString[20], verseString[20];
sprintf(chapString, "%d", currentChapter);
sprintf(verseString, "%d", currentVerse);
string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str();
string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N");
outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>");
continue;
}
}
// <RS>
outstr = outstring.c_str();
found = strstr(outstr, "<RS>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<q who=\"Jesus\">");
continue;
}
// <RS>
outstr = outstring.c_str();
found = strstr(outstr, "</RS>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 5, (string)"</q>");
continue;
}
// <RT>
outstr = outstring.c_str();
found = strstr(outstr, "<RT>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-RT\"/>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "<R");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
int end = outstring.find_first_of(">", start+1);
string nStr = outstring.substr(start+2, end-start-2);
char chapString[20], verseString[20];
sprintf(chapString, "%d", currentChapter);
sprintf(verseString, "%d", currentVerse);
string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str();
string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R");
outstring.replace(start, end-start+1, (string)"<note type=\"crossReference\" osisID=\"" + osisID + (string)".xref." + nStr + (string) "\" n=\"" + nStr + (string)"\">" + noteBody + "</note>");
continue;
}
// transChange added {}
outstr = outstring.c_str();
found = strstr(outstr, "{");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, (string)"<transChange type=\"added\">");
int end = outstring.find_first_of("}", start+1);
outstring.erase(end, 1);
while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--;
outstring.insert(end+1, "</transChange>");
continue;
}
/*
// transChange tenseChange *
outstr = outstring.c_str();
found = strstr(outstr, "*");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, (string)"<transChange type=\"tenseChanged\">");
for (end = start + 34; (end < outstring.length()); end++) {
if ((!isalpha(outstring[end])) &&
(outstring[end] != '\''))
break;
}
outstring.replace(end, 1, "</transChange>");
continue;
}
*/
// <,>
outstr = outstring.c_str();
found = strstr(outstr, "<,>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 3, (string)"<milestone type=\"x-superiorComma\"/>");
continue;
}
// <NA>
outstr = outstring.c_str();
found = strstr(outstr, "<NA>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-NA\"/>");
continue;
}
// <NB>
outstr = outstring.c_str();
found = strstr(outstr, "<NB>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-NB\"/>");
continue;
}
// <NC>
outstr = outstring.c_str();
found = strstr(outstr, "<NC>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-NC\"/>");
continue;
}
// paragraph break <PM>
outstr = outstring.c_str();
found = strstr(outstr, "<PM>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PM\"/>");
continue;
}
// poetry break <PN>
outstr = outstring.c_str();
found = strstr(outstr, "<PN>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-PN\" />");
continue;
}
// poetry break <PO>
outstr = outstring.c_str();
found = strstr(outstr, "<PO>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PO\" />");
continue;
}
// poetry break <PE>
outstr = outstring.c_str();
found = strstr(outstr, "<PE>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"line\" subType=\"x-PE\" />");
continue;
}
// letter indent <HL>
outstr = outstring.c_str();
found = strstr(outstr, "<HL>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, (string)"<milestone type=\"x-HL\" />");
continue;
}
break;
}
int strongsStart = 0;
int transChangeStart = 0;
bool strongsFound = false;
bool intoken = false;
bool intag = false;
bool inNote = false;
int tokenStart = 0;
string lastToken = "";
string previousToken = "";
int tenseChange = -1;
// strongs numbers
for (int i = 0; i < outstring.length(); i++) {
if ((!inNote) && (!intoken) && (!intag) && (!strongsFound)) {
if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
strongsStart = i;
strongsFound = true;
}
}
if (outstring[i] =='*')
tenseChange = i;
if (outstring[i] == '<') { tokenStart = i+1; intoken = true; }
if (outstring[i] == '>') {
intoken = false;
previousToken = lastToken;
lastToken = outstring.substr(tokenStart, i-tokenStart);
// Not completely safe, but works for current NASB data
if (strchr(lastToken.c_str(), '/'))
intag = false;
else intag = true;
if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) {
transChangeStart = i+1;
}
/*
if (!strncmp(lastToken.c_str(), "seg", 3)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "divineName", 10)) {
strongsFound = false;
strongsStart = i+1;
}
*/
if (!strncmp(lastToken.c_str(), "/divineName", 10)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "note", 4)) {
strongsFound = false;
strongsStart = i+1;
inNote = true;
}
if (!strncmp(lastToken.c_str(), "/note", 5)) {
strongsFound = false;
strongsStart = i+1;
inNote = false;
}
if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 19)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "/transChange", 12)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "milestone", 9)) {
strongsFound = false;
strongsStart = i+1;
}
if (!strncmp(lastToken.c_str(), "/seg", 4)) {
strongsFound = false;
strongsStart = i+1;
}
if ((!strncmp(lastToken.c_str(), "verse", 5))) {
intag = false;
}
if ( (!strncmp(lastToken.c_str(), "MG", 2)) ||
(!strncmp(lastToken.c_str(), "MH", 2))) {
// insert </w>
// fix tenseChange to be inside <w> so we can include a subset of the <w> content.
outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "</w></transChange>":"</w>"));
i = (tokenStart-1) + ((tenseChange > -1) ? 18:4);
// build <w ... > tag
char lang = lastToken[1]; // H or G
lastToken.replace(0, 1, "<w lemma=\"strong:");
while ((start = lastToken.find(", ")) > -1) {
lastToken.replace(start, 2, (string)" strong:" + lang);
}
lastToken += "\">";
intag = false;
if (tenseChange > -1) {
lastToken.insert(0, "<transChange type=\"tenseChange\">");
}
if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) {
outstring.insert(transChangeStart, lastToken);
intag = true;
i += lastToken.length() - 1; // (-1 because we're about to i++)
}
// insert our token
else {
outstring.insert(strongsStart, lastToken);
i += lastToken.length() - 1; // (-1 because we're about to i++)
}
strongsStart = i+1;
strongsFound = false;
if (tenseChange > -1) {
// relocate because position may have changed from all the token inserts
const char *buf = outstring.c_str();
tenseChange = (strchr(buf, '*') - buf);
outstring.erase(tenseChange, 1);
tenseChange = -1;
}
}
}
}
// clean up stuff that didn't work quite right
while (1) {
// divineName strongs tags misorderings
string target = "</w></divineName></seg>";
size_t s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "</divineName></seg></w>");
continue;
}
target = "</w>,</divineName></seg>";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "</divineName></seg></w>,");
continue;
}
break;
}
std::cout << outstring;
if (!result) std::cout << "\n";
}
while (!result);
outstring = "";
if (inVerse) {
outstring += "</verse>";
inVerse = false;
}
if (inChapter) {
outstring += "</chapter>";
inChapter = false;
}
if (inBook) {
outstring += "</div>";
inBook = false;
}
std::cout << outstring;
outTrailer();
// clean up our buffers that readline might have allocated
if (buffer)
delete [] buffer;
if (nbuffer)
delete [] nbuffer;
close(fd);
if (fdn > -1)
close(fdn);
}
void outHeader() {
std::cout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << "\n";
std::cout << "<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.1.9.xsd\">" << "\n";
std::cout << " <osisText osisIDWork=\"nasb\" xml:lang=\"en\">" << "\n";
std::cout << " <header>" << "\n";
std::cout << " <work osisWork=\"nasb\">" << "\n";
std::cout << " <title>NEW AMERICAN STANDARD BIBLE</title>" << "\n";
std::cout << " <identifier type=\"OSIS\">Bible.en.NASB.1995</identifier>" << "\n";
std::cout << " <rights>Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION</rights>" << "\n";
std::cout << " <refSystem>Bible</refSystem>" << "\n";
std::cout << " </work>" << "\n";
std::cout << " <work osisWork=\"strongs\">" << "\n";
std::cout << " </work>" << "\n";
std::cout << " </header>" << "\n";
}
void outTrailer() {
std::cout << "</osisText>\n";
std::cout << "</osis>\n";
}
void unicodeTicks(string &outstring) {
int end = 0;
while (1) {
const char *outstr;
const char *found;
int start;
outstr = outstring.c_str();
found = strstr(outstr, "``");
char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0;
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 2, uchar);
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "`");
uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0;
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, uchar);
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "'");
uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0;
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, uchar);
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "\"");
uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0;
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, uchar);
continue;
}
break;
}
}
void prepLine(string &outstring, int currentTestament, bool note) {
int end = 0;
while (1) {
// ------------------------------------------
// redundant markers
size_t s;
// <V>
s = outstring.find("<V>");
if (s != string::npos) {
outstring.erase(s, 3);
continue;
}
// <P>
s = outstring.find("<P>");
if (s != string::npos) {
outstring.erase(s, 3);
continue;
}
// <C>
s = outstring.find("<C>");
if (s != string::npos) {
outstring.erase(s, 3);
continue;
}
// <CC>
s = outstring.find("<CC>");
if (s != string::npos) {
outstring.erase(s, 4);
continue;
}
// <CP>
s = outstring.find("<CP>");
if (s != string::npos) {
outstring.erase(s, 4);
continue;
}
// <$F...>>
s = outstring.find("<$F");
if (s != string::npos) {
size_t e = outstring.find(">>", s);
outstring.erase(s, e-s+2);
continue;
}
// ----------------------------------------------
// <A>
s = outstring.find("<A>");
if (s != string::npos) {
outstring.replace(s, 3, "<milestone type=\"line\" subType=\"x-A\"/>");
continue;
}
// ~“
string target = "~“";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
continue;
}
// ~‘
target = "~‘";
s = outstring.find(target);
if (s != string::npos) {
outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
continue;
}
const char *outstr = outstring.c_str();
const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/");
int start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 14, "<seg><divineName>Lord's</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD}/’\\{S}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 16, "<seg><divineName>Lord’s</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD,}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 9, "<seg><divineName>Lord</divineName></seg>,");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "<seg><divineName>Lord</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "S\\{EN~OR}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 10, "<seg><divineName>Sen~or</divineName></seg>");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "Y\\{AH,}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "<seg><divineName>Yah</divineName></seg>,");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "Y\\{AH}/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 7, "<seg><divineName>Yah</divineName></seg>");
continue;
}
// is this really valid markup? should 'also be' be in small
// caps? 3 { and only 2 } ?
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD {also be}/}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 18, "<seg><divineName>Lord</divineName></seg> also be}");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD {give}/}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 15, "<seg><divineName>Lord</divineName></seg> give}");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD {bless}/}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 16, "<seg><divineName>Lord</divineName></seg> bless}");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 73, "<seg><divineName>Lord</divineName></seg> are my Refuge; You have made the Most High your dwelling place}");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "M\\ENE/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 6, "M<hi type=\"x-smallcaps\">ene</hi>");
continue;
}
found = strstr(outstr, "M\\ENE:/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 7, "M<hi type=\"x-smallcaps\">ene</hi>:");
continue;
}
found = strstr(outstr, "T\\EKEL/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 7, "T<hi type=\"x-smallcaps\">ekel</hi>");
continue;
}
found = strstr(outstr, "T\\EKEL:/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "T<hi type=\"x-smallcaps\">ekel</hi>:");
continue;
}
found = strstr(outstr, "U\\FARSIN/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 9, "U<hi type=\"x-smallcaps\">farsin</hi>");
continue;
}
found = strstr(outstr, "P\\ERES:/");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 8, "P<hi type=\"x-smallcaps\">eres</hi>:");
continue;
}
// LB ??? Don't have info on this. Assuming '-'
outstr = outstring.c_str();
found = strstr(outstr, "<LB>");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 4, "-");
continue;
}
found = strstr(outstr+end, "\\");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
for (;start;start--) {
if ((!isupper(outstring[start])) &&
(!strchr("\\/ ~", outstring[start]))) {
break;
}
}
for (start++; outstring[start] == ' '; start++);
if (currentTestament) {
outstring.insert(start, "<seg type=\"otPassage\">");
start += 22;
}
else {
outstring.insert(start, "<seg><divineName>");
start += 17;
const char *b = outstring.c_str();
const char *found = strstr(b, "L\\{ORD}/");
int s = (found) ? (found - b) : -1;
if (s > -1)
outstring.replace(s, 8, "Lord");
end = s+4;
}
bool lower = false;
bool token = false;
for (;start < outstring.length(); start++) {
if (!token) {
if (outstring[start] == '\\') {
lower = true;
outstring.erase(start, 1);
start--;
continue;
}
if (outstring[start] == '/') {
lower = false;
outstring.erase(start, 1);
end = start;
start--;
continue;
}
// what is this? It screws MENE MENE up in Daniel
// if (outstring[start] == ':')
// break;
if (isalpha(outstring[start])) {
if (islower(outstring[start]))
break;
if (lower)
outstring[start] = tolower(outstring[start]);
continue;
}
}
if (outstring[start] == '>')
token = false;
if (outstring[start] == '<')
token = true;
}
if (currentTestament) {
outstring.insert(end, "</seg>");
end+=6;
}
else {
outstring.insert(end, "</divineName></seg>");
end+=19;
}
continue;
}
if (note) {
outstr = outstring.c_str();
found = strstr(outstr, "{");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, "<hi type=\"italic\">");
continue;
}
outstr = outstring.c_str();
found = strstr(outstr, "}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
outstring.replace(start, 1, "</hi>");
continue;
}
s = outstring.find("</reference></hi>");
if (s != string::npos) {
outstring.replace(s, 17, "</hi></reference>");
continue;
}
}
break;
}
}
string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) {
char *nbuffer = 0;
int start = -1;
const char *found = (const char *)-1;
const char *outstr = (const char *)-1;
while (start == -1) {
if (!noteLine.length() && fdn > -1) {
if (readline(fdn, &nbuffer)) return ""; // eof
noteLine = nbuffer;
}
outstr = noteLine.c_str();
found = strstr(outstr, "{{");
start = (found) ? (found - outstr) : -1;
if (start == -1) noteLine = "";
}
if (start > -1) {
found = strstr(outstr, "}}");
int end = (found) ? (found - outstr) : -1;
end++;
int testmt = 0, book, chap;
string bkch = noteLine.substr(start+2, end-start-2);
sscanf(bkch.c_str(), "%d:%d", &book, &chap);
int vNumEnd = noteLine.find_first_of(" ", end);
int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str());
if ((book != currentBookNo) || (chap != currentChapter) || (verse != currentVerse)) {
fprintf(stderr, "Not correct note line(%d:%d, %d:%d, %d:%d): %s\n\n", currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str());
exit(-1);
}
}
else {
fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str());
exit(-1);
}
outstr = noteLine.c_str();
string tag = (string)"<"+(string)nx+nStr+(string)">";
found = strstr(outstr, tag.c_str());
start = (found) ? (found - outstr) : -1;
string retVal = "";
if (start > -1) {
start += tag.length();
found = strstr(outstr+start, " <");
int end = (found) ? (found - outstr) : -1;
if (end<0) end = noteLine.length();
retVal = noteLine.substr(start, end-start);
}
unicodeTicks(retVal);
#ifdef HAVESWORD
if (*nx == 'R') {
// } { get's deleted. e.g. {Luke} {9:10-17}
outstr = retVal.c_str();
found = strstr(outstr, "} {");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
retVal.replace(start, 3, " ");
}
outstr = retVal.c_str();
found = strstr(outstr, ";}");
start = (found) ? (found - outstr) : -1;
if (start > -1) {
retVal.replace(start, 2, "};");
}
VerseKey key = osisID.c_str();
retVal = VerseKey::convertToOSIS(retVal.c_str(), &key);
}
#endif
prepLine(retVal, 0, true);
if (nbuffer)
delete [] nbuffer;
return retVal;
}