#include <map>
#include <vector>
#include <iostream>
#include <sstream>
#include <swmgr.h>
#include <swbuf.h>
#include <swmodule.h>
#include <utf8utf16.h>
using namespace sword;
using namespace std;
class Word {
public:
Word()
: utf8("")
, strong(0)
, freq(0)
, kjvTrans("")
{}
SWBuf utf8;
int strong;
int freq;
// from stongs lex
SWBuf kjvTrans;
// computed ourselves
map<SWBuf, int> kjvFreq;
};
string itoa(int v) { stringstream str; str << v; return str.str(); }
bool compareFreq(const Word *w1, const Word *w2) {
return w1->freq > w2->freq;
}
bool compareKJVFreq(const map<SWBuf, int>::const_iterator &i1, const map<SWBuf, int>::const_iterator &i2) {
return i1->second > i2->second;
}
SWBuf prettyKJVFreq(map<SWBuf, int> &in) {
SWBuf retVal;
vector<map<SWBuf, int>::const_iterator> sorted;
for (map<SWBuf, int>::const_iterator it = in.begin(); it != in.end(); it++) {
// combine cap words with lowercase, if exists
if (toupper(it->first[0]) == it->first[0]) {
SWBuf key = it->first;
key[0] = tolower(key[0]);
if (key != it->first) {
map<SWBuf, int>::iterator i = in.find(key);
if (i != in.end()) {
i->second += it->second;
// don't include us in the list cuz we added out freq to another
continue;
}
}
}
sorted.push_back(it);
}
sort(sorted.begin(), sorted.end(), compareKJVFreq);
for (vector<map<SWBuf, int>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
if (retVal.size()) retVal += "; ";
retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second);
}
return retVal;
}
SWBuf escapedUTF8(SWBuf inText) {
static UTF8UTF16 convert;
convert.processText(inText);
SWBuf retBuf;
for (unsigned short *i = (unsigned short *)inText.getRawData(); *i; i++) {
if (*i < 128) {
retBuf += (char)*i;
}
else {
retBuf.appendFormatted("\\u%.4x", *i);
// change hex alpha values to upper case
for (int i = retBuf.size()-1; i > retBuf.size() - 4; i--) {
retBuf[i] = toupper(retBuf[i]);
}
}
}
return retBuf;
}
int main(int argc, char **argv)
{
SWMgr manager;
SWModule *bible;
SWConfig utf8("gwords.conf");
SWConfig defs("gdefs.conf");
map<int, Word> wordList;
bible = manager.getModule("KJV");
for (bible->setKey("matt.1.1"); !bible->Error(); (*bible)++) {
bible->RenderText(); // force an entry lookup to resolve key to something in the index
AttributeList &words = bible->getEntryAttributes()["Word"];
for (AttributeList::iterator word = words.begin(); word != words.end(); word++) {
SWBuf strong = word->second["Lemma"];
SWBuf text = word->second["Text"];
text.trim();
if (!text.size()) text = "[Untranslated]";
strong << 1;
wordList[atoi(strong.c_str())].freq++;
wordList[atoi(strong.c_str())].kjvFreq[text]++;
// cout << strong << "\n";
}
}
vector<Word *> sorted;
for (map<int, Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
it->second.strong = it->first;
it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()];
it->second.utf8 = utf8["words"][itoa(it->first).c_str()];
sorted.push_back(&it->second);
}
sort(sorted.begin(), sorted.end(), compareFreq);
for (vector<Word *>::iterator it = sorted.begin(); it != sorted.end(); it++) {
Word *w = (*it);
// cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
}
std::cout << std::endl;
return 0;
}