summaryrefslogblamecommitdiffstats
path: root/flashtools/flash.cpp
blob: 49468bc15832258c6d31adae50c240fc82c3ddb9 (plain) (tree)


































































































































                                                                                                                                               
#include <map>
#include <vector>
#include <iostream>
#include <sstream>

#include <swmgr.h>
#include <swbuf.h>
#include <swmodule.h>
#include <utf8utf16.h>

using namespace sword;
using namespace std;

class Word {
public:
Word()
	: utf8("")
	, strong(0)
	, freq(0)
	, kjvTrans("")
{}
SWBuf utf8;
int strong;
int freq;
// from stongs lex
SWBuf kjvTrans;
// computed ourselves
map<SWBuf, int> kjvFreq;
};

string itoa(int v) { stringstream str; str << v; return str.str(); }

bool compareFreq(const Word *w1, const Word *w2) {
	return w1->freq > w2->freq;
}

bool compareKJVFreq(const map<SWBuf, int>::const_iterator &i1, const map<SWBuf, int>::const_iterator &i2) {
	return i1->second > i2->second;
}

SWBuf prettyKJVFreq(map<SWBuf, int> &in) {
	SWBuf retVal;
	vector<map<SWBuf, int>::const_iterator> sorted;
	for (map<SWBuf, int>::const_iterator it = in.begin(); it != in.end(); it++) {
		// combine cap words with lowercase, if exists
		if (toupper(it->first[0]) == it->first[0]) {
			SWBuf key = it->first;
			key[0] = tolower(key[0]);
			if (key != it->first) {
				map<SWBuf, int>::iterator i = in.find(key);
				if (i != in.end()) {
					i->second += it->second;
					// don't include us in the list cuz we added out freq to another
					continue;
				}
			}
		}
		sorted.push_back(it);
	}
	sort(sorted.begin(), sorted.end(), compareKJVFreq);
	for (vector<map<SWBuf, int>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
		if (retVal.size()) retVal += "; ";
		retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second);
	}
	return retVal;
}

SWBuf escapedUTF8(SWBuf inText) {
	static UTF8UTF16 convert;
	convert.processText(inText);
	SWBuf retBuf;
	for (unsigned short *i = (unsigned short *)inText.getRawData(); *i; i++) {
		if (*i < 128) {
			retBuf += (char)*i;
		}
		else {
			retBuf.appendFormatted("\\u%.4x", *i);
			// change hex alpha values to upper case
			for (int i = retBuf.size()-1; i > retBuf.size() - 4; i--) {
				retBuf[i] = toupper(retBuf[i]);
			}
		}
	}
	return retBuf;
}


int main(int argc, char **argv)
{
	
	SWMgr manager;
	SWModule *bible;
	SWConfig utf8("gwords.conf");
	SWConfig defs("gdefs.conf");
	map<int, Word> wordList;

	bible = manager.getModule("KJV");

	for (bible->setKey("matt.1.1"); !bible->Error(); (*bible)++) {
		bible->RenderText();		// force an entry lookup to resolve key to something in the index

		AttributeList &words = bible->getEntryAttributes()["Word"];
		for (AttributeList::iterator word = words.begin(); word != words.end(); word++) {
			SWBuf strong = word->second["Lemma"];
			SWBuf text = word->second["Text"];
			text.trim();
			if (!text.size()) text = "[Untranslated]";
			strong << 1;
			wordList[atoi(strong.c_str())].freq++;
			wordList[atoi(strong.c_str())].kjvFreq[text]++;
//			cout << strong << "\n";
		}
	}
	vector<Word *> sorted;
	for (map<int, Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
		it->second.strong = it->first;
		it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()];
		it->second.utf8 = utf8["words"][itoa(it->first).c_str()];
		sorted.push_back(&it->second);
	}
	
	sort(sorted.begin(), sorted.end(), compareFreq);

	for (vector<Word *>::iterator it = sorted.begin(); it != sorted.end(); it++) {
		Word *w = (*it);
//		cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
		cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
	}
	std::cout << std::endl;
	return 0;
}