From cc0ac51d3a7e4dd8ea61a0279ec31d429436da38 Mon Sep 17 00:00:00 2001 From: "Troy A. Griffitts" Date: Tue, 22 May 2007 18:24:38 +0000 Subject: Added facility to collate words from sword module git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@85 07627401-56e2-0310-80f4-f8cd0041bdcd --- flashtools/flash.cpp | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 flashtools/flash.cpp (limited to 'flashtools/flash.cpp') diff --git a/flashtools/flash.cpp b/flashtools/flash.cpp new file mode 100644 index 0000000..49468bc --- /dev/null +++ b/flashtools/flash.cpp @@ -0,0 +1,131 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace sword; +using namespace std; + +class Word { +public: +Word() + : utf8("") + , strong(0) + , freq(0) + , kjvTrans("") +{} +SWBuf utf8; +int strong; +int freq; +// from stongs lex +SWBuf kjvTrans; +// computed ourselves +map kjvFreq; +}; + +string itoa(int v) { stringstream str; str << v; return str.str(); } + +bool compareFreq(const Word *w1, const Word *w2) { + return w1->freq > w2->freq; +} + +bool compareKJVFreq(const map::const_iterator &i1, const map::const_iterator &i2) { + return i1->second > i2->second; +} + +SWBuf prettyKJVFreq(map &in) { + SWBuf retVal; + vector::const_iterator> sorted; + for (map::const_iterator it = in.begin(); it != in.end(); it++) { + // combine cap words with lowercase, if exists + if (toupper(it->first[0]) == it->first[0]) { + SWBuf key = it->first; + key[0] = tolower(key[0]); + if (key != it->first) { + map::iterator i = in.find(key); + if (i != in.end()) { + i->second += it->second; + // don't include us in the list cuz we added out freq to another + continue; + } + } + } + sorted.push_back(it); + } + sort(sorted.begin(), sorted.end(), compareKJVFreq); + for (vector::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) { + if (retVal.size()) retVal += "; "; + retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second); + } + return retVal; +} + +SWBuf escapedUTF8(SWBuf inText) { + static UTF8UTF16 convert; + convert.processText(inText); + SWBuf retBuf; + for (unsigned short *i = (unsigned short *)inText.getRawData(); *i; i++) { + if (*i < 128) { + retBuf += (char)*i; + } + else { + retBuf.appendFormatted("\\u%.4x", *i); + // change hex alpha values to upper case + for (int i = retBuf.size()-1; i > retBuf.size() - 4; i--) { + retBuf[i] = toupper(retBuf[i]); + } + } + } + return retBuf; +} + + +int main(int argc, char **argv) +{ + + SWMgr manager; + SWModule *bible; + SWConfig utf8("gwords.conf"); + SWConfig defs("gdefs.conf"); + map wordList; + + bible = manager.getModule("KJV"); + + for (bible->setKey("matt.1.1"); !bible->Error(); (*bible)++) { + bible->RenderText(); // force an entry lookup to resolve key to something in the index + + AttributeList &words = bible->getEntryAttributes()["Word"]; + for (AttributeList::iterator word = words.begin(); word != words.end(); word++) { + SWBuf strong = word->second["Lemma"]; + SWBuf text = word->second["Text"]; + text.trim(); + if (!text.size()) text = "[Untranslated]"; + strong << 1; + wordList[atoi(strong.c_str())].freq++; + wordList[atoi(strong.c_str())].kjvFreq[text]++; +// cout << strong << "\n"; + } + } + vector sorted; + for (map::iterator it = wordList.begin(); it != wordList.end(); it++) { + it->second.strong = it->first; + it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()]; + it->second.utf8 = utf8["words"][itoa(it->first).c_str()]; + sorted.push_back(&it->second); + } + + sort(sorted.begin(), sorted.end(), compareFreq); + + for (vector::iterator it = sorted.begin(); it != sorted.end(); it++) { + Word *w = (*it); +// cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n"; + cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n"; + } + std::cout << std::endl; + return 0; +} -- cgit