summaryrefslogtreecommitdiffstats
path: root/flashtools/flash.cpp
blob: 48cd0789deaf8b1db8c08c945a1b114adbbd48b4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include <map>
#include <vector>
#include <iostream>
#include <sstream>

#include <swmgr.h>
#include <swbuf.h>
#include <swmodule.h>
#include <utf8utf16.h>

using namespace sword;
using namespace std;

class Word {
public:
Word()
	: utf8("")
	, strong(0)
	, freq(0)
	, kjvTrans("")
{}
SWBuf utf8;
int strong;
int freq;
// from stongs lex
SWBuf kjvTrans;
// computed ourselves
map<SWBuf, int> kjvFreq;
};

string itoa(int v) { stringstream str; str << v; return str.str(); }

bool compareFreq(const Word *w1, const Word *w2) {
	return w1->freq > w2->freq;
}

bool compareKJVFreq(const map<SWBuf, int>::const_iterator &i1, const map<SWBuf, int>::const_iterator &i2) {
	return i1->second > i2->second;
}

SWBuf prettyKJVFreq(map<SWBuf, int> &in) {
	SWBuf retVal;
	vector<map<SWBuf, int>::const_iterator> sorted;
	for (map<SWBuf, int>::const_iterator it = in.begin(); it != in.end(); it++) {
		// combine cap words with lowercase, if exists
		if (toupper(it->first[0]) == it->first[0]) {
			SWBuf key = it->first;
			key[0] = tolower(key[0]);
			if (key != it->first) {
				map<SWBuf, int>::iterator i = in.find(key);
				if (i != in.end()) {
					i->second += it->second;
					// don't include us in the list cuz we added out freq to another
					continue;
				}
			}
		}
		sorted.push_back(it);
	}
	sort(sorted.begin(), sorted.end(), compareKJVFreq);
	for (vector<map<SWBuf, int>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
		if (retVal.size()) retVal += "; ";
		retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second);
	}
	return retVal;
}

SWBuf escapedUTF8(SWBuf inText) {
	static UTF8UTF16 convert;
	convert.processText(inText);
	SWBuf retBuf;
	for (unsigned short *i = (unsigned short *)inText.getRawData(); *i; i++) {
		if (*i < 128) {
			retBuf += (char)*i;
		}
		else {
			retBuf.appendFormatted("\\u%.4x", *i);
			// change hex alpha values to upper case
			for (int i = retBuf.size()-1; i > retBuf.size() - 4; i--) {
				retBuf[i] = toupper(retBuf[i]);
			}
		}
	}
	return retBuf;
}


int main(int argc, char **argv)
{
	
	SWMgr manager;
	SWModule *bible;
	SWConfig utf8("gwords.conf");
	SWConfig defs("gdefs.conf");
	map<int, Word> wordList;

	bible = manager.getModule("KJV");

	for (bible->setKey("matt.1.1"); !bible->Error(); (*bible)++) {
		bible->RenderText();		// force an entry lookup to resolve key to something in the index

		AttributeList &words = bible->getEntryAttributes()["Word"];
		for (AttributeList::iterator word = words.begin(); word != words.end(); word++) {
			SWBuf strong = word->second["Lemma"];
			SWBuf text = word->second["Text"];
			text.trim();
			// trim punctuation from end
			while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1);
			if (!text.size()) text = "[Untranslated]";
			strong << 1;
			wordList[atoi(strong.c_str())].freq++;
			wordList[atoi(strong.c_str())].kjvFreq[text]++;
//			cout << strong << "\n";
		}
	}
	vector<Word *> sorted;
	for (map<int, Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
		it->second.strong = it->first;
		it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()];
		it->second.utf8 = utf8["words"][itoa(it->first).c_str()];
		sorted.push_back(&it->second);
	}
	
	sort(sorted.begin(), sorted.end(), compareFreq);

	for (vector<Word *>::iterator it = sorted.begin(); it != sorted.end(); it++) {
		Word *w = (*it);
//		cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
		cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
	}
	std::cout << std::endl;
	return 0;
}