#include #include #include #include #include #include #include #include using namespace sword; using namespace std; class Word { public: Word() : utf8("") , strong(0) , freq(0) , kjvTrans("") {} SWBuf utf8; int strong; int freq; // from stongs lex SWBuf kjvTrans; // computed ourselves map kjvFreq; }; string itoa(int v) { stringstream str; str << v; return str.str(); } bool compareFreq(const Word *w1, const Word *w2) { return w1->freq > w2->freq; } bool compareKJVFreq(const map::const_iterator &i1, const map::const_iterator &i2) { return i1->second > i2->second; } SWBuf prettyKJVFreq(map &in) { SWBuf retVal; vector::const_iterator> sorted; for (map::const_iterator it = in.begin(); it != in.end(); it++) { // combine cap words with lowercase, if exists if (toupper(it->first[0]) == it->first[0]) { SWBuf key = it->first; key[0] = tolower(key[0]); if (key != it->first) { map::iterator i = in.find(key); if (i != in.end()) { i->second += it->second; // don't include us in the list cuz we added out freq to another continue; } } } sorted.push_back(it); } sort(sorted.begin(), sorted.end(), compareKJVFreq); for (vector::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) { if (retVal.size()) retVal += "; "; retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second); } return retVal; } SWBuf escapedUTF8(SWBuf inText) { static UTF8UTF16 convert; convert.processText(inText); SWBuf retBuf; for (unsigned short *i = (unsigned short *)inText.getRawData(); *i; i++) { if (*i < 128) { retBuf += (char)*i; } else { retBuf.appendFormatted("\\u%.4x", *i); // change hex alpha values to upper case for (int i = retBuf.size()-1; i > retBuf.size() - 4; i--) { retBuf[i] = toupper(retBuf[i]); } } } return retBuf; } int main(int argc, char **argv) { SWMgr manager; SWModule *bible; SWConfig utf8("gwords.conf"); SWConfig defs("gdefs.conf"); map wordList; bible = manager.getModule("KJV"); for (bible->setKey("matt.1.1"); !bible->Error(); (*bible)++) { bible->RenderText(); // force an entry lookup to resolve key to something in the index AttributeList &words = bible->getEntryAttributes()["Word"]; for (AttributeList::iterator word = words.begin(); word != words.end(); word++) { SWBuf strong = word->second["Lemma"]; SWBuf text = word->second["Text"]; text.trim(); if (!text.size()) text = "[Untranslated]"; strong << 1; wordList[atoi(strong.c_str())].freq++; wordList[atoi(strong.c_str())].kjvFreq[text]++; // cout << strong << "\n"; } } vector sorted; for (map::iterator it = wordList.begin(); it != wordList.end(); it++) { it->second.strong = it->first; it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()]; it->second.utf8 = utf8["words"][itoa(it->first).c_str()]; sorted.push_back(&it->second); } sort(sorted.begin(), sorted.end(), compareFreq); for (vector::iterator it = sorted.begin(); it != sorted.end(); it++) { Word *w = (*it); // cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n"; cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n"; } std::cout << std::endl; return 0; }