summaryrefslogtreecommitdiffstats
path: root/flashtools/flash.cpp
diff options
context:
space:
mode:
authorTroy A. Griffitts <scribe@crosswire.org>2007-05-22 18:24:38 +0000
committerTroy A. Griffitts <scribe@crosswire.org>2007-05-22 18:24:38 +0000
commitcc0ac51d3a7e4dd8ea61a0279ec31d429436da38 (patch)
tree9de56099ce641eec45e535c28225453869bebd32 /flashtools/flash.cpp
parentae3edd5f218a2fa6be1d104965c221076f750f48 (diff)
downloadsword-tools-cc0ac51d3a7e4dd8ea61a0279ec31d429436da38.tar.gz
Added facility to collate words from sword module
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@85 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'flashtools/flash.cpp')
-rw-r--r--flashtools/flash.cpp131
1 files changed, 131 insertions, 0 deletions
diff --git a/flashtools/flash.cpp b/flashtools/flash.cpp
new file mode 100644
index 0000000..49468bc
--- /dev/null
+++ b/flashtools/flash.cpp
@@ -0,0 +1,131 @@
+#include <map>
+#include <vector>
+#include <iostream>
+#include <sstream>
+
+#include <swmgr.h>
+#include <swbuf.h>
+#include <swmodule.h>
+#include <utf8utf16.h>
+
+using namespace sword;
+using namespace std;
+
+class Word {
+public:
+Word()
+ : utf8("")
+ , strong(0)
+ , freq(0)
+ , kjvTrans("")
+{}
+SWBuf utf8;
+int strong;
+int freq;
+// from stongs lex
+SWBuf kjvTrans;
+// computed ourselves
+map<SWBuf, int> kjvFreq;
+};
+
+string itoa(int v) { stringstream str; str << v; return str.str(); }
+
+bool compareFreq(const Word *w1, const Word *w2) {
+ return w1->freq > w2->freq;
+}
+
+bool compareKJVFreq(const map<SWBuf, int>::const_iterator &i1, const map<SWBuf, int>::const_iterator &i2) {
+ return i1->second > i2->second;
+}
+
+SWBuf prettyKJVFreq(map<SWBuf, int> &in) {
+ SWBuf retVal;
+ vector<map<SWBuf, int>::const_iterator> sorted;
+ for (map<SWBuf, int>::const_iterator it = in.begin(); it != in.end(); it++) {
+ // combine cap words with lowercase, if exists
+ if (toupper(it->first[0]) == it->first[0]) {
+ SWBuf key = it->first;
+ key[0] = tolower(key[0]);
+ if (key != it->first) {
+ map<SWBuf, int>::iterator i = in.find(key);
+ if (i != in.end()) {
+ i->second += it->second;
+ // don't include us in the list cuz we added out freq to another
+ continue;
+ }
+ }
+ }
+ sorted.push_back(it);
+ }
+ sort(sorted.begin(), sorted.end(), compareKJVFreq);
+ for (vector<map<SWBuf, int>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
+ if (retVal.size()) retVal += "; ";
+ retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second);
+ }
+ return retVal;
+}
+
+SWBuf escapedUTF8(SWBuf inText) {
+ static UTF8UTF16 convert;
+ convert.processText(inText);
+ SWBuf retBuf;
+ for (unsigned short *i = (unsigned short *)inText.getRawData(); *i; i++) {
+ if (*i < 128) {
+ retBuf += (char)*i;
+ }
+ else {
+ retBuf.appendFormatted("\\u%.4x", *i);
+ // change hex alpha values to upper case
+ for (int i = retBuf.size()-1; i > retBuf.size() - 4; i--) {
+ retBuf[i] = toupper(retBuf[i]);
+ }
+ }
+ }
+ return retBuf;
+}
+
+
+int main(int argc, char **argv)
+{
+
+ SWMgr manager;
+ SWModule *bible;
+ SWConfig utf8("gwords.conf");
+ SWConfig defs("gdefs.conf");
+ map<int, Word> wordList;
+
+ bible = manager.getModule("KJV");
+
+ for (bible->setKey("matt.1.1"); !bible->Error(); (*bible)++) {
+ bible->RenderText(); // force an entry lookup to resolve key to something in the index
+
+ AttributeList &words = bible->getEntryAttributes()["Word"];
+ for (AttributeList::iterator word = words.begin(); word != words.end(); word++) {
+ SWBuf strong = word->second["Lemma"];
+ SWBuf text = word->second["Text"];
+ text.trim();
+ if (!text.size()) text = "[Untranslated]";
+ strong << 1;
+ wordList[atoi(strong.c_str())].freq++;
+ wordList[atoi(strong.c_str())].kjvFreq[text]++;
+// cout << strong << "\n";
+ }
+ }
+ vector<Word *> sorted;
+ for (map<int, Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
+ it->second.strong = it->first;
+ it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()];
+ it->second.utf8 = utf8["words"][itoa(it->first).c_str()];
+ sorted.push_back(&it->second);
+ }
+
+ sort(sorted.begin(), sorted.end(), compareFreq);
+
+ for (vector<Word *>::iterator it = sorted.begin(); it != sorted.end(); it++) {
+ Word *w = (*it);
+// cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
+ cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
+ }
+ std::cout << std::endl;
+ return 0;
+}