path: root/modules/nasb2020/lockosis.cpp



#include <ctype.h>
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <clocale>
#include <locale>


#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif

#include <iostream>
#include <string>

#ifndef O_BINARY
#define O_BINARY 0
#endif

#ifdef HAVESWORD
#include <versekey.h>
#include <localemgr.h>
#include <stringmgr.h>
using namespace sword;
#endif

using std::string;
using std::cout;
using std::endl;

static const char *osisOTBooks[] = {
	"Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg",
	"Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr",
	"2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov",
	"Eccl", "Song", "Isa", "Jer", "Lam",
	"Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic",
	"Nah", "Hab", "Zeph", "Hag", "Zech", "Mal",
// extra-Biblical
	"Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth",
	"EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan",
	"Ps151", "Sir", "Tob", "Wis"};
static const char *osisNTBooks[] = {
	"Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor",
	"2Cor", "Gal", "Eph", "Phil", "Col",
	"1Thess", "2Thess", "1Tim", "2Tim", "Titus",
	"Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John",
	"3John", "Jude", "Rev"};
static const char **osisBooks[] = { osisOTBooks, osisNTBooks };

const char nasbMax[2] = {39, 27};

char readline(int fd, char **buf) {
	char ch;
	if (*buf)
		delete [] *buf;
	*buf = 0;
	int len;


	long index = lseek(fd, 0, SEEK_CUR);
	// clean up any preceding white space
	while ((len = read(fd, &ch, 1)) == 1) {
		if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t'))
			break;
		else index++;
	}


	while (ch != 10) {
        if ((len = read(fd, &ch, 1)) != 1)
			break;
	}
	
	int size = (lseek(fd, 0, SEEK_CUR) - index) - 1;

	*buf = new char [ (size+2) * 2 ];

	if (size > 0) {
		lseek(fd, index, SEEK_SET);
		read(fd, *buf, size);
		read(fd, &ch, 1);   //pop terminating char
		(*buf)[size] = 0;

		// clean up any trailing junk on buf
		int buflen = strlen(*buf);
		for (char *it = *buf+(buflen-1); it > *buf; it--) {
			if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t'))
				break;
			else *it = 0;
		}
		// convert all spanish characters to combined
		for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) {
			switch (*it) {
/*
			case 0xE2 :	// ‘
				if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) {
					memmove(it, it+1, buflen - (it-(unsigned char *)*buf));
					buflen--;
					it[0] = 0xcc;
					it[1] = 0x80;		// yeah, I know it's already 0x80, but that's just a coincidence
				}
				else {
					fprintf(stderr, "oddity: %s\n", *buf);
					exit(-4);
				}
				break;
*/
			case 0x60 : 	// `
				if (isalpha(it[-1])) {
					memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1);
					buflen++;
					it[0] = 0xcc;
					it[1] = 0x80;
				}
				else {
//					fprintf(stderr, "oddity: %s\n", *buf);
//					exit(-4);
				}
				break;
			case 0x7E : 	// ~
				memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1);
				buflen++;
				it[0] = 0xcc;
				it[1] = 0x83;
				break;
			}
		}
	}
	else **buf = 0;
	return !len;
}


void outHeader();
void outTrailer();
void unicodeTicks(string &outstring);
void prepLine(string &outstring, int currentTestament, bool note);
string getNoteBody(int nfd, string &noteLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx);
int replaceFirst(string &haystack, string needle, string replacement);


int main(int argc, char **argv) {

#ifdef HAVESWORD
	LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName("es");
#endif

	std::setlocale(LC_CTYPE, "");

	// Let's test our command line arguments
	if (argc < 2) {
//		fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]);
		fprintf(stderr, "usage: %s <biblefile> [notesfile]\n\n", argv[0]);
		exit(-1);
	}


	// Let's see if we can open our input file
	int fd = open(argv[1], O_RDONLY|O_BINARY);
	if (fd < 0) {
		fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]);
		exit(-2);
	}

	int fdn = -1;
	if (argc > 2) {
		fdn = open(argv[2], O_RDONLY|O_BINARY);
		if (fdn < 0) {
			fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]);
			exit(-2);
		}
	}

	outHeader();

	string header;
	char *buffer = 0;
	char *nbuffer = 0;
	int result = 0;
	string currentBook = "";
	int currentBookNo = 0; 
	int currentTestament = 0; 
	int currentChapter = 0; 
	int currentVerse = 0; 
	bool inBook = false;
	bool inChapter = false;
	bool inVerse = false;
	string noteLine = "";
	string preChapNote = "";
	string outstring;
	result = readline(fd, &buffer);
	string lookahead = buffer;
	unicodeTicks(lookahead);
	prepLine(lookahead, currentTestament, false);
	do {
		result = readline(fd, &buffer);
		if (lookahead.length()) {
			string savebuf = buffer;
			if (buffer)
				delete [] buffer;
			buffer = new char [ lookahead.length() + 1];
			strcpy(buffer, lookahead.c_str());
			lookahead = savebuf;
			unicodeTicks(lookahead);
			prepLine(lookahead, currentTestament, false);
			result = 0;
		}
		else if (!result) {
			string savebuf = buffer;
			result = readline(fd, &buffer);
			lookahead = buffer;
			unicodeTicks(lookahead);
			prepLine(lookahead, currentTestament, false);


			if (buffer)
				delete [] buffer;
			buffer = new char [ savebuf.length() + 1];
			strcpy(buffer, savebuf.c_str());
			result = 0;
		}
			
		outstring = buffer;


		// BOOK NAMES  <BN>
		if (!strncmp(outstring.c_str(), "<BN>", 4)) {
			string book = outstring.c_str()+4;
			book = book.substr(0, book.find_first_of("<"));
			outstring = "";
			if (inVerse) {
				outstring += "</verse>";
				inVerse = false;
			}
			if (inChapter) {
				outstring += "</chapter>";
				inChapter = false;
			}
			if (inBook) {
				outstring += "</div>";
				inBook = false;
			}
			outstring += (string)"<div type=\"book\" osisID=\"";

			VerseKey bookName(book.c_str());
			if (bookName.popError()) {
				fprintf(stderr, "error: %s: couldn't find book match for %s.  Please check book array in conversion program.\n", argv[0], book.c_str());
				exit(-3);
			}
			currentBook = bookName.getOSISBookName();
			outstring += currentBook;
			currentTestament = bookName.getTestament()-1;

			outstring += (string)"\"><title type=\"main\" subType=\"x-Book\">" + book + "</title>";
			inBook = true;
		}


		// CHAPTERS
		//<SN>PSALM
		if ((!strncmp(outstring.c_str(), "<CN>", 4)) || (!strncmp(outstring.c_str(), "<SN>", 4))) {
			string chapterTitle = outstring.c_str()+4;
			chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<"));
			string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1);
			outstring = "";
			if (inVerse) {
				outstring += "</verse>";
				inVerse = false;
			}
			if (inChapter) {
				outstring += "</chapter>";
				inChapter = false;
			}
			outstring += (string)"<chapter osisID=\"" + currentBook + "." + chapter + "\">";
			outstring += (string)"<title type=\"sub\" subType=\"x-Chapter\">" + chapterTitle + "</title>";
			currentChapter = atoi(chapter.c_str());
			inChapter = true;
			currentVerse = 1;
		}
		
		//<SF>
		if (!strncmp(outstring.c_str(), "<SF>", 4)) {
			string heading = outstring.c_str()+4;
			heading = heading.substr(0, heading.find("</SF>"));
			outstring = "";
				
			if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
				lookahead.erase(0, 4);
				outstring += "<milestone type=\"line\" subType=\"x-PM\"/>";
			}
			if (inVerse) {
				outstring += "</verse>\n";
				inVerse = false;
			}
			outstring += (string)"<title type=\"acrostic\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>";
		}

		//<SH>
		if ((!strncmp(outstring.c_str(), "<SH>", 4)) || (!strncmp(outstring.c_str(), "<SHI>", 5))) {
			bool shi = outstring.c_str()[3] == 'I';
			if (shi) {
				fprintf(stderr, "found shi.\n");
			}
			string heading = outstring.c_str()+(shi ? 5 : 4);
			heading = heading.substr(0, heading.find(shi ? "</SHI>" : "</SH>"));
			outstring = "";
				
			if (!strncmp(lookahead.c_str(), "<PM>", 4)) {
				lookahead.erase(0, 4);
				outstring += "<milestone type=\"line\" subType=\"x-PM\"/>";
			}
			if (inVerse) {
				outstring += "</verse>\n";
				inVerse = false;
			}
			outstring += (string)"<title type=\"section\"";
			if (!shi) outstring += (string)" subType=\"x-preverse\"";
			outstring += (string)">" + heading + (string)"</title>";
		}
		if (!strncmp(outstring.c_str(), "<SS>", 4)) {
			string heading = (outstring.c_str()+4);
			heading = heading.substr(0, heading.find("</SS>"));
			outstring = (string)"<title type=\"psalm\" canonical=\"true\" subType=\"x-preverse\">" + heading + (string)"</title>";
		}
		if (!strncmp(outstring.c_str(), "<SB>", 4)) {
			string heading = (outstring.c_str()+4);
			heading = heading.substr(0, heading.find("</SB>"));
			outstring = (string)"<title type=\"scope\" subType=\"x-preverse\">" + heading + (string)"</title>";
		}


		// {{x::y}}
		// DUH, find_first_of looks for the first occurance of ANY single character of the supplied string
//		int start = outstring.find_first_of("{{");	// this is whacked and fails on ">[{.." Try it!
		const char *outstr = outstring.c_str();
		const char *found = strstr(outstr, "{{");
		int start = (found) ? (found - outstr) : -1;
// ---- end of whacked replacement

		if (start > -1) {
			found = strstr(outstr, "}}");
			int end = (found) ? (found - outstr) : -1;
			end++;
			int testmt = 0, book = 0, chap = 0;
			string bkch = outstring.substr(start+2, end-start-2);
			sscanf(bkch.c_str(), "%d::%d", &book, &chap);
			currentChapter = chap;
			int vNumEnd = outstring.find_first_of(" ", end);
			currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str());
			currentBookNo = book;
			if (book > nasbMax[0]) {
				testmt = 1;
				book -= nasbMax[0];
			}
			if (currentBook != osisBooks[testmt][book-1]) {
				fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d::%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str());
				exit(-3);
			}
			char chapString[20], verseString[20];
			sprintf(chapString, "%d", currentChapter);
			sprintf(verseString, "%d", currentVerse);
			string newstring = "";
			if (inVerse) {
				newstring += "</verse>";
				inVerse = false;
			}
			newstring += "<verse osisID=\"" + currentBook + (string)"." + (string)chapString + (string)"." + (string)verseString + (string) "\">";
			outstring.replace(start, vNumEnd-start+1, newstring);
			inVerse = true;
			noteLine = preChapNote;
			preChapNote = "";
		}

		
		// multiple occurances on a line stuff
		while (1) {

			// NOTE
			outstr = outstring.c_str();
			found = strstr(outstr, "<N");
			start = (found) ? (found - outstr) : -1;

			if (start > -1) {
				int end = outstring.find_first_of(">", start+1);
				string nStr = outstring.substr(start+2, end-start-2);

				// NOTE <N#>
				if (isdigit(nStr.c_str()[0]) || strchr("ABCDEFG", nStr.c_str()[0])) {
					bool preChap = strchr("A", nStr.c_str()[0]);
					if (preChap) noteLine = "";
					char chapString[20], verseString[20];
					sprintf(chapString, "%d", currentChapter);
					sprintf(verseString, "%d", currentVerse);
					string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str();
					string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N");
					if (preChap) preChapNote = noteLine;
					outstring.replace(start, end-start+1, (string)"<note type=\"explanation\" osisID=\"" + osisID + (string)".note." + nStr + (string) "\" n=\"" + nStr + (string) "\">" + noteBody + "</note>");
//					outstring.replace(start, end-start+1, (string)"--note--");
				continue;
				}
			}


			//  <RS>
			if (replaceFirst(outstring, "<RS>", "<q who=\"Jesus\">") > -1) continue;

			//  <RS>
			if (replaceFirst(outstring, "</RS>", "</q>") > -1) continue;
				
			//  <RT>
			if (replaceFirst(outstring, "<RT>", "<milestone type=\"x-RT\"/>") > -1) continue;

			if (replaceFirst(outstring, "<SHI>", "<title type=\"section\">") > -1) continue;
			if (replaceFirst(outstring, "</SHI>", "</title>") > -1) continue;
				
			//  <?>
			if (replaceFirst(outstring, "<?>", "¿") > -1) continue;
			
			//  <!>
			if (replaceFirst(outstring, "<!>", "¡") > -1) continue;

			outstr = outstring.c_str();
			found = strstr(outstr, "<R");
			start = (found) ? (found - outstr) : -1;

			if (start > -1) {
				int end = outstring.find_first_of(">", start+1);
				string nStr = outstring.substr(start+2, end-start-2);

				char chapString[20], verseString[20];
				sprintf(chapString, "%d", currentChapter);
				sprintf(verseString, "%d", currentVerse);
				string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str();
				string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R");
				outstring.replace(start, end-start+1, (string)"<note type=\"crossReference\" osisID=\"" + osisID + (string)".xref." + nStr + (string) "\" n=\"" + nStr + (string)"\">" + noteBody + "</note>");
				continue;
			}
			// transChange added {}
			outstr = outstring.c_str();
			found = strstr(outstr, "{");
			start = (found) ? (found - outstr) : -1;

			if (start > -1) {
				outstring.replace(start, 1, (string)"<transChange type=\"added\">");
				size_t end = outstring.find_first_of("}", start+1);
				if (end != string::npos) {
					outstring.erase(end, 1);
				}
				else end = outstring.size()-1;
				while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--;
				outstring.insert(end+1, "</transChange>");
				
				continue;
				
			}
/*
			// transChange tenseChange *
			outstr = outstring.c_str();
			found = strstr(outstr, "*");
			start = (found) ? (found - outstr) : -1;

			if (start > -1) {
				outstring.replace(start, 1, (string)"<transChange type=\"tenseChanged\">");
				for (end = start + 34; (end < outstring.length()); end++) {
					if ((!isalpha(outstring[end])) &&
							(outstring[end] != '\''))
						break;
				}
				outstring.replace(end, 1, "</transChange>");
				continue;
				
			}
*/
			//  <,>
			if (replaceFirst(outstring, "<,>", "<milestone type=\"x-superiorComma\"/>") > -1) continue;

			//  <NA>
			if (replaceFirst(outstring, "<NA>", "<milestone type=\"x-superiorOne\"/>") > -1) continue;
				
			//  <NB>
			if (replaceFirst(outstring, "<NB>", "<milestone type=\"x-superiorTwo\"/>") > -1) continue;

			//  <NC>
			if (replaceFirst(outstring, "<NC>", "<milestone type=\"x-superiorThree\"/>") > -1) continue;

			// paragraph break <PM>
			if (replaceFirst(outstring, "<PM>", "<milestone type=\"line\" subType=\"x-PM\"/>") > -1) continue;
				
			// poetry break <PN>
			if (replaceFirst(outstring, "<PN>", "<milestone type=\"x-Poetry\" />") > -1) continue;
				
			// poetry break <PO>
			if (replaceFirst(outstring, "<PO>", "<milestone type=\"line\" subType=\"x-Poetry\" />") > -1) continue;
				
			// poetry break <PR>
			if (replaceFirst(outstring, "<PR>", "<milestone type=\"x-PoetryEnd\" />") > -1) continue;
				
			// letter indent <HL>
			if (replaceFirst(outstring, "<HL>", "<milestone type=\"x-HL\" />") > -1) continue;
				
			// letter indent <HLL>
			if (replaceFirst(outstring, "<HLL>", "<milestone type=\"line\" subType=\"x-HLL\" />") > -1) continue;
			break;
		}

		int strongsStart = 0;
		int transChangeStart = 0;
		bool strongsStartFound = false;
		bool intoken = false;
		bool intag = false;
		bool inNote = false;
		int tokenStart = 0;
		string lastToken = "";
		string previousToken = "";
		int tenseChange = -1;
		// strongs numbers
		for (unsigned int i = 0; i < outstring.length(); ++i) {
			if ((!strongsStartFound) && (!inNote) && (!intoken)) {
				if (!intag) {
					if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
						strongsStart = i;
						strongsStartFound = true;
					}
				}
				else if (!strncmp(lastToken.c_str(), "hi", 2) && strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) {
					if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) {
						strongsStart = tokenStart - 1;
						strongsStartFound = true;
					}
				}

			}
			if (outstring[i] =='*')
				tenseChange = i;
			if (outstring[i] == '<') { tokenStart = i+1; intoken = true; }
			if (outstring[i] == '>') {
				intoken = false;
				previousToken = lastToken;
				lastToken = outstring.substr(tokenStart, i-tokenStart);
				// Not completely safe, but works for current NASB data
				if (strchr(lastToken.c_str(), '/'))
					intag = false;
				else intag = true;
				if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) {
					transChangeStart = i+1;
				}
/*
				if (!strncmp(lastToken.c_str(), "seg", 3)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "divineName", 10)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
*/
				if (!strncmp(lastToken.c_str(), "/divineName", 10)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "note", 4)) {
					strongsStartFound = false;
					strongsStart = i+1;
					inNote = true;
				}
				if (!strncmp(lastToken.c_str(), "/note", 5)) {
					strongsStartFound = false;
					strongsStart = i+1;
					inNote = false;
				}
				if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "/q", 2)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 20)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "/transChange", 12)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "milestone", 9)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "/seg", 4)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}
				if (!strncmp(lastToken.c_str(), "verse", 5)) {
					strongsStartFound = false;
					strongsStart = i+1;
				}

				if ((!strncmp(lastToken.c_str(), "verse", 5))) {
					intag = false;
				}

				if (            (!strncmp(lastToken.c_str(), "MG", 2)) ||
						(!strncmp(lastToken.c_str(), "MH", 2))) {

					// insert </w>
					// fix tenseChange to be inside <w> so we can include a subset of the <w> content.
					outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "</w></transChange>":"</w>"));
					i = (tokenStart-1) + ((tenseChange > -1) ? 18:4);
					
					// build <w ... > tag
					char lang = lastToken[1];	// H or G
					lastToken.replace(0, 1, "<w lemma=\"strong:");
					while ((start = lastToken.find(", ")) > -1) {
						lastToken.replace(start, 2, (string)" strong:" + lang);
					}
					lastToken += "\">";
					intag = false;


					if (tenseChange > -1) {
						lastToken.insert(0, "<transChange type=\"tenseChange\">");
					}
					if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) {
						outstring.insert(transChangeStart, lastToken);
						intag = true;
						i += lastToken.length() - 1; // (-1 because we're about to i++)
					}

					// insert our token
					else {
						outstring.insert(strongsStart, lastToken);
						i += lastToken.length() - 1; // (-1 because we're about to i++)
					}
					strongsStart = i+1;
					strongsStartFound = false;
					if (tenseChange > -1) {
						// relocate because position may have changed from all the token inserts
						const char *buf = outstring.c_str();
						tenseChange = (strchr(buf, '*') - buf);
						outstring.erase(tenseChange, 1);
						tenseChange = -1;
					}
				}
			}
		}


		// clean up stuff that didn't work quite right
		while (1) {

			// divineName strongs tags misorderings
			string target = "</w></divineName></seg>";
			size_t s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "</divineName></seg></w>");
				continue;
			}
			target = "</w>,</divineName></seg>";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "</divineName></seg></w>,");
				continue;
			}

			break;
		}


		std::cout << outstring;
		if (!result) std::cout << "\n";
	}
	while (!result);
	outstring = "";
	if (inVerse) {
		outstring += "</verse>";
		inVerse = false;
	}
	if (inChapter) {
		outstring += "</chapter>";
		inChapter = false;
	}
	if (inBook) {
		outstring += "</div>";
		inBook = false;
	}
	std::cout << outstring;

	outTrailer();

	// clean up our buffers that readline might have allocated
	if (buffer)
		delete [] buffer;
	if (nbuffer)
		delete [] nbuffer;

	close(fd);

	if (fdn > -1)
		close(fdn);
}

void outHeader() {

std::cout << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << "\n";
std::cout << "<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.1.9.xsd\">" << "\n";
std::cout << "     <osisText osisIDWork=\"nasb\" xml:lang=\"en\">" << "\n";
std::cout << "          <header>" << "\n";
std::cout << "               <work osisWork=\"nasb\">" << "\n";
std::cout << "                    <title>NEW AMERICAN STANDARD BIBLE</title>" << "\n";
std::cout << "                    <identifier type=\"OSIS\">Bible.en.NASB.1995</identifier>" << "\n";
std::cout << "                    <rights>Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION</rights>" << "\n";
std::cout << "                    <refSystem>Bible</refSystem>" << "\n";
std::cout << "               </work>" << "\n";
std::cout << "               <work osisWork=\"strongs\">" << "\n";
std::cout << "               </work>" << "\n";
std::cout << "          </header>" << "\n";
 
}

void outTrailer() {
	std::cout << "</osisText>\n";
	std::cout << "</osis>\n";
}

void unicodeTicks(string &outstring) {

		while (1) {
			const char *outstr;
			const char *found;
			int start;

			outstr = outstring.c_str();
			found = strstr(outstr, "``");
			char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0;
			start = (found) ? (found - outstr) : -1;
			if (start > -1) {
				outstring.replace(start, 2, "“");
				continue;
			}

			outstr = outstring.c_str();
			found = strstr(outstr, "`");
			uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0;
			start = (found) ? (found - outstr) : -1;
			if (start > -1) {
				outstring.replace(start, 1, uchar);
				continue;
			}

			outstr = outstring.c_str();
			found = strstr(outstr, "'");
			uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0;
			start = (found) ? (found - outstr) : -1;
			if (start > -1) {
				outstring.replace(start, 1, uchar);
				continue;
			}

			outstr = outstring.c_str();
			found = strstr(outstr, "\"");
			uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0;
			start = (found) ? (found - outstr) : -1;
			if (start > -1) {
				outstring.replace(start, 1, uchar);
				continue;
			}
				
			break;
		}
}


// return offset of occurence replace; otherwise -1
int replaceFirst(string &haystack, string needle, string replacement) {
	const char *outstr = haystack.c_str();
	const char *found = strstr(outstr, needle.c_str());
	int start = (found) ? (found - outstr) : -1;
	if (start > -1) {
		haystack.replace(start, needle.size(), replacement);
	}
	return start;
}


void prepLine(string &outstring, int currentTestament, bool note) {
	int end = 0;
	while (1) {
// ------------------------------------------
// redundant or unneeded or unknown markers
			size_t s;

			//  <1EVA>
			if (replaceFirst(outstring, "<1EVA>", "") > -1) continue;

			//  <1EVB>
			if (replaceFirst(outstring, "<1EVB>", "") > -1) continue;

			//  <FA>
			if (replaceFirst(outstring, "<FA>", "") > -1) continue;

			//  <PR>
			if (replaceFirst(outstring, "<PR>", "") > -1) continue;

			//  <V>
			if (replaceFirst(outstring, "<V>", "") > -1) continue;

			//  <T>
			if (replaceFirst(outstring, "<T>", "") > -1) continue;

			//  <P>
			if (replaceFirst(outstring, "<P>", "") > -1) continue;

			//  <C>
			if (replaceFirst(outstring, "<C>", "") > -1) continue;

			//  <CC>
			if (replaceFirst(outstring, "<CC>", "") > -1) continue;

			//  <CP>
			if (replaceFirst(outstring, "<CP>", "") > -1) continue;

			//  <$F...>>
			s = outstring.find("<$F");
			if (s != string::npos) {
				size_t e = outstring.find(">>", s);
				outstring.erase(s, e-s+2);
				continue;
			}
			//  <EOV>
			s = outstring.find("<EOV>");
			if (s != string::npos) {
				size_t e = outstring.find("</EOV>", s);
				outstring.erase(s, e-s+6);
				continue;
			}
// ----------------------------------------------

			//  <A>
			if (replaceFirst(outstring, "<A>", "<milestone type=\"line\" subType=\"x-A\"/>") > -1) continue;

			// ~“
			char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0;
//			string target = "~“";
			string target = uchar;
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
				continue;
			}
			// +«
			target = "+«";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"«\"/>");
				continue;
			}
			// +»
			target = "+»";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"»\"/>");
				continue;
			}
			// +“
			target = "+“";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
				continue;
			}
			// +”
			target = "+”";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"”\"/>");
				continue;
			}
			// +‘
			target = "+‘";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
				continue;
			}
			// +’
			target = "+’";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"’\"/>");
				continue;
			}
			// -«
			target = "-«";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"«\"/>");
				continue;
			}
			// -»
			target = "-»";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"»\"/>");
				continue;
			}
			// -“
			target = "-“";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"“\"/>");
				continue;
			}
			// -”
			target = "-”";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"”\"/>");
				continue;
			}
			// -‘
			target = "-‘";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"‘\"/>");
				continue;
			}
			// -’
			target = "-’";
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" subType=\"optional\" marker=\"’\"/>");
				continue;
			}

			// ~‘
			uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x98; uchar[5]=0;
//			target = "~‘";
			target = uchar;
			s = outstring.find(target);
			if (s != string::npos) {
				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
				continue;
			}
			if (replaceFirst(outstring, "<B>", "<hi type=\"bold\">") > -1) continue;
			if (replaceFirst(outstring, "</B>", "</hi>") > -1) continue;

			if (replaceFirst(outstring, "L<\\>{ORD}</>'<\\>{S}</>", "<seg><divineName>Lord's</divineName></seg>") > -1) continue;
			if (replaceFirst(outstring, "L<\\>{ORD’S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue;
			if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S}</>", "<seg><divineName>Lord’s</divineName></seg>") > -1) continue;
			if (replaceFirst(outstring, "L<\\>{ORD}</>’<\\>{S} </>", "<seg><divineName>Lord’s</divineName></seg> ") > -1) continue;
			if (replaceFirst(outstring, "L<\\>ORD</>’<\\>S<MH3068></>", "<seg><divineName>Lord’s<MH3068></divineName></seg>") > -1) continue;
			if (replaceFirst(outstring, "L<\\>{ORD,}</>", "<seg><divineName>Lord</divineName></seg>,") > -1) continue;
			if (replaceFirst(outstring, "L<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>") > -1) continue;
			if (replaceFirst(outstring, "L<\\>{ORD} </>", "<seg><divineName>Lord</divineName></seg> ") > -1) continue;
			if (replaceFirst(outstring, "L}<\\>{ORD}</>{", "<seg><divineName>Lord</divineName></seg>") > -1) continue;
			if (replaceFirst(outstring, "L}<\\>{ORD}</>", "<seg><divineName>Lord</divineName></seg>}") > -1) continue;
			if (replaceFirst(outstring, "S<\\>{EN~OR}</>", "<seg><divineName>Sen~or</divineName></seg>") > -1) continue;
			if (replaceFirst(outstring, "S<\\>{EÑOR}</>", "<seg><divineName>Señor</divineName></seg>") > -1) continue;
			if (replaceFirst(outstring, "Y<\\>{AH,}</>", "<seg><divineName>Yah</divineName></seg>,") > -1) continue;
			if (replaceFirst(outstring, "Y<\\>{AH,} </>", "<seg><divineName>Yah</divineName></seg>, ") > -1) continue;
			if (replaceFirst(outstring, "Y<\\>{AH}</>", "<seg><divineName>Yah</divineName></seg>") > -1) continue;

			// Do these first before Daniel Inscriptions
			// LB = add macron, only with 'a':  ā
			if (replaceFirst(outstring, "a<LB>", "ā") > -1) continue;
			if (replaceFirst(outstring, "E<LE>", "Ē") > -1) continue;
			if (replaceFirst(outstring, "e<LE>", "ē") > -1) continue;

			if (replaceFirst(outstring, "M<\\>ENĒ<MH4484></>", "<hi type=\"inscription\">Menē<MH4484></hi>") > -1) continue;
			if (replaceFirst(outstring, "MENĒ<MH4484>", "<hi type=\"inscription\">Menē<MH4484></hi>") > -1) continue;

			// these are probably legacy
			if (replaceFirst(outstring, "M<\\>ENĒ:</>", "<hi type=\"inscription\">Menē</hi>:") > -1) continue;
			if (replaceFirst(outstring, "M<\\>ENE</>", "<hi type=\"inscription\">Mene</hi>") > -1) continue;
			if (replaceFirst(outstring, "M<\\>ENE:</>", "<hi type=\"inscription\">Mene</hi>:") > -1) continue;
			if (replaceFirst(outstring, "M<\\>ENĒ</>", "<hi type=\"inscription\">Menē</hi>") > -1) continue;
			if (replaceFirst(outstring, "MENĒ", "<hi type=\"inscription\">Menē</hi>") > -1) continue;
			if (replaceFirst(outstring, "MENE", "<hi type=\"inscription\">Mene</hi>") > -1) continue;
			// end of probably legacy

			if (replaceFirst(outstring, "T<\\>EKĒL<MH8625b></>", "<hi type=\"inscription\">Tekēl<MH8625b></hi>") > -1) continue;
			if (replaceFirst(outstring, "TEKĒL<MH8625b>", "<hi type=\"inscription\">Tekēl<MH8625b></hi>") > -1) continue;

			// these are probably legacy
			if (replaceFirst(outstring, "TEKEL", "<hi type=\"inscription\">Tekel</hi>") > -1) continue;
			if (replaceFirst(outstring, "T<\\>EKEL</>", "<hi type=\"inscription\">Tekel</hi>") > -1) continue;
			if (replaceFirst(outstring, "T<\\>EKEL:</>", "<hi type=\"inscription\">Tekel</hi>:") > -1) continue;
			if (replaceFirst(outstring, "T<\\>EKĒL</>", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue;
			if (replaceFirst(outstring, "T<\\>EKĒL:</>", "<hi type=\"inscription\">Tekēl</hi>:") > -1) continue;
			if (replaceFirst(outstring, "TEKĒL", "<hi type=\"inscription\">Tekēl</hi>") > -1) continue;
			// end of probably legacy
			
			if (replaceFirst(outstring, "U<\\>PHARSIN<MH6537b></>", "<hi type=\"inscription\">Upharsin<MH6537b></hi>") > -1) continue;
			if (replaceFirst(outstring, "UPHARSIN<MH6537b>", "<hi type=\"inscription\">Upharsin<MH6537b></hi>") > -1) continue;

			// these are probably legacy
			if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue;
			if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
			if (replaceFirst(outstring, "U<\\>FARSIN</>", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
			if (replaceFirst(outstring, "UPHARSIN", "<hi type=\"inscription\">Upharsin</hi>") > -1) continue;
			if (replaceFirst(outstring, "UFARSIN", "<hi type=\"inscription\">Ufarsin</hi>") > -1) continue;
			// end of probably legacy

			if (replaceFirst(outstring, "P<\\>ERĒS<MH6537b></>", "<hi type=\"inscription\">Perēs<MH6537b></hi>") > -1) continue;
			if (replaceFirst(outstring, "PERĒS<MH6537b>", "<hi type=\"inscription\">Perēs<MH6537b></hi>") > -1) continue;

			// these are probably legacy
			if (replaceFirst(outstring, "PERES", "<hi type=\"inscription\">Peres</hi>") > -1) continue;
			if (replaceFirst(outstring, "P<\\>ERES</>", "<hi type=\"inscription\">Peres</hi>") > -1) continue;
			if (replaceFirst(outstring, "P<\\>ERES:</>", "<hi type=\"inscription\">Peres</hi>:") > -1) continue;
			if (replaceFirst(outstring, "PERĒS", "<hi type=\"inscription\">Perēs</hi>") > -1) continue;
			if (replaceFirst(outstring, "P<\\>ERĒS</>", "<hi type=\"inscription\">Perēs</hi>") > -1) continue;
			if (replaceFirst(outstring, "P<\\>ERĒS:</>", "<hi type=\"inscription\">Perēs</hi>:") > -1) continue;
			// end of probably legacy

			if (replaceFirst(outstring, "H<\\>OLY<MH6944> TO THE</> L<\\>ORD<MH3068></>",
				"<hi type=\"inscription\">Holy<MH6944> to the L<\\>ORD<MH3068></></hi>") > -1) continue;

			const char *outstr = outstring.c_str();
			const char *found = strstr(outstr+end, "<\\>");
			int start = (found) ? (found - outstr) : -1;

			if (start > -1) {
				for (--start;start;start--) {
					if ((!std::isupper(outstring[start])) &&
							(!strchr("\\/ ~", outstring[start]))) {
						break;
					}
				}
				for (start++; outstring[start] == ' '; start++);
				if (currentTestament) {
					outstring.insert(start, "<seg type=\"otPassage\">");
					start += 22;
				}
				else {
					outstring.insert(start, "<seg><divineName>");
					start += 17;

					int s = replaceFirst(outstring, "L<\\>{ORD}</>", "Lord");
					if (s > -1) end = s+4;
				}

				// do small cap logic
				bool lower = false;
				string token = "";
				for (int charLen = 1; start < (int)outstring.length(); start += charLen) {
					const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start;
					const unsigned char *endChar = startChar;
					SW_u32 testChar = getUniCharFromUTF8(&endChar, true);
					charLen = endChar - startChar;	// set the size of the UTF-8 sequence
					if (!token.size()) {
						if (testChar == '<') {
							token = "<";
							continue;
						}
						// what is this?  It screws MENE MENE up in Daniel
//						if (testChar == ':')
//							break;

						if (StringMgr::getSystemStringMgr()->isAlpha(testChar)) {
							if (StringMgr::getSystemStringMgr()->isLower(testChar))
								break;
							if (lower)
								outstring.replace(start, charLen, SWBuf((const char *)startChar, charLen).toLower());
							continue;
						}
					}
					else {
						token += testChar;

						if (testChar == '>') {
							if (token == "<\\>") {
								lower = true;
								outstring.erase(start-2, 3);
								start -= 3;
							}
							if (token == "</>") {
								lower = false;
								outstring.erase(start-2, 3);
								end = start - 2;
								start -= 3;
								unsigned int nextStrongs = outstring.find("<M");
								unsigned int nextUp = outstring.find("</>");
								if (nextStrongs != string::npos && nextUp != string::npos && nextStrongs < nextUp) {
									break;
								}
							}
							// end divineName if we hit a PO in the middle
							if (token == "<PO>") {
								break;
							}
							unsigned int s = token.find("<N");
							if (s == string::npos || s > 0) s = token.find("<R");
							if (s == 0 && token.size() > 2) {
								if (StringMgr::getSystemStringMgr()->isDigit(token[2])) {
									break;
								}
							}
							token = "";
						}
					}
				}
				if (currentTestament) {
					outstring.insert(end, "</seg>");
					end+=6;
				}
				else {
					outstring.insert(end, "</divineName></seg>");
					end+=19;
				}
				continue;
			}

			// these are places where we unnecessarily stop and then start otPassage
			// we could make the otPassage logic work better, but these exception clean
			// thing up for now.
			if (replaceFirst(outstring, "</seg>’<seg type=\"otPassage\">s", "’s") > -1) continue;
			if (replaceFirst(outstring, "</seg>-<seg type=\"otPassage\">", "-") > -1) continue;
			if (replaceFirst(outstring, "</seg>,<seg type=\"otPassage\">", ",") > -1) continue;
			if (replaceFirst(outstring, "</seg>, <seg type=\"otPassage\">", ", ") > -1) continue;
			if (replaceFirst(outstring, "</seg>! <seg type=\"otPassage\">", "! ") > -1) continue;
			if (replaceFirst(outstring, "</seg>; <seg type=\"otPassage\">", "; ") > -1) continue;
			if (replaceFirst(outstring, "</seg> <seg type=\"otPassage\">", " ") > -1) continue;
			if (replaceFirst(outstring, "</seg>, ‘<seg type=\"otPassage\">", ", ‘") > -1) continue;
			if (replaceFirst(outstring, "</seg>,’ <seg type=\"otPassage\">", ",’ ") > -1) continue;

			if (note) {
				outstr = outstring.c_str();
				found = strstr(outstr, "{");
				start = (found) ? (found - outstr) : -1;
				if (start > -1) {
					outstring.replace(start, 1, "<hi type=\"italic\">");
					continue;
				}
				outstr = outstring.c_str();
				found = strstr(outstr, "}");
				start = (found) ? (found - outstr) : -1;
				if (start > -1) {
					outstring.replace(start, 1, "</hi>");
					continue;
				}
				s = outstring.find("</reference></hi>");
				if (s != string::npos) {
					const size_t s2 = outstring.find("<hi type=\"italic\"><reference");
					if (s2 == string::npos) {
						outstring.replace(s, 17, "</hi></reference>");
						continue;
					}
				}
			}

//			if (replaceFirst(outstring, ")</hi>", "</hi>)") > -1) continue;

			break;
		}
}

string getNoteBody(int fdn, string &noteLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) {
	char *nbuffer = 0;
	int start = -1;
	const char *found = (const char *)-1;
	const char *outstr = (const char *)-1;

	while (start == -1) {
		if (!noteLine.length() && fdn > -1) {
			if (readline(fdn, &nbuffer)) return "";	// eof
			noteLine = nbuffer;
		}
		outstr = noteLine.c_str();
		found = strstr(outstr, "{{");
		start = (found) ? (found - outstr) : -1;
		// be sure we have at least one of these.  We've found note lines without any actual notes
		if (found) found = strstr(outstr, "<R");
		if (!found) found = strstr(outstr, "<N");
		if (!found) start = -1;
		if (start == -1) noteLine = "";
	}

	if (start > -1) {
		found = strstr(outstr, "}}");
		int end = (found) ? (found - outstr) : -1;
		end++;
		int book, chap;
		string bkch = noteLine.substr(start+2, end-start-2);
		sscanf(bkch.c_str(), "%d::%d", &book, &chap);
		int vNumEnd = noteLine.find_first_of(" ", end);
		int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str());
		if ((book != currentBookNo) || (chap != currentChapter) || ((verse != currentVerse) && /*kindof allow case in header before verse marker */ (verse != currentVerse+1))) {
			fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str());
			exit(-1);
		}
	}
	else {
		fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str());
		exit(-1);
	}

	
	outstr = noteLine.c_str();
	string tag = (string)"<"+(string)nx+nStr+(string)">";
	found = strstr(outstr, tag.c_str());
	start = (found) ? (found - outstr) : -1;
	string retVal = "";

		if (start > -1) {
			start += tag.length();
			const char *nFound = strstr(outstr+start, " <N");
			const char *rFound = strstr(outstr+start, " <R");
			found = (nFound && (!rFound || nFound < rFound)) ? nFound : rFound;
			int end = (found) ? (found - outstr) : -1;
			if (end<0) end = noteLine.length();
			retVal = noteLine.substr(start, end-start);
		}
	unicodeTicks(retVal);
#ifdef HAVESWORD
	if (*nx == 'R')	 {
		// } { get's deleted. e.g.  {Luke} {9:10-17}
		outstr = retVal.c_str();
		found = strstr(outstr, "} {");
		start = (found) ? (found - outstr) : -1;
		if (start > -1) {
			retVal.replace(start, 3, " ");
		}
		outstr = retVal.c_str();
		found = strstr(outstr, ";}");
		start = (found) ? (found - outstr) : -1;
		if (start > -1) {
			retVal.replace(start, 2, "};");
		}
		VerseKey key = osisID.c_str();
//std::cerr << osisID.c_str() << ": Convert to OSISRef: " << retVal.c_str();
		retVal = VerseKey::convertToOSIS(retVal.c_str(), &key);
//std::cerr << ": " << retVal.c_str();
	}
#endif
	prepLine(retVal, 0, true);
	if (nbuffer)
		delete [] nbuffer;
//std::cerr << ": " << retVal.c_str() << "\n";
	return retVal;
}