summaryrefslogtreecommitdiffstats
path: root/flashtools
diff options
context:
space:
mode:
authorTroy A. Griffitts <scribe@crosswire.org>2007-09-02 18:27:12 +0000
committerTroy A. Griffitts <scribe@crosswire.org>2007-09-02 18:27:12 +0000
commitb11ba6b03dfe8ecdcaa8a71747df6da1fcf21112 (patch)
tree52c10ba77e33db35cf7beb9bdc1e9d8d2993e268 /flashtools
parent50905796dd888f347111ed05da4cce83efe4d315 (diff)
downloadsword-tools-b11ba6b03dfe8ecdcaa8a71747df6da1fcf21112.tar.gz
Started rework of flash tools to work with
combined lemma of the new KJV module rev. git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@95 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'flashtools')
-rw-r--r--flashtools/Makefile2
-rw-r--r--flashtools/flash.cpp259
2 files changed, 180 insertions, 81 deletions
diff --git a/flashtools/Makefile b/flashtools/Makefile
index b8683c9..dc41e0a 100644
--- a/flashtools/Makefile
+++ b/flashtools/Makefile
@@ -1,7 +1,9 @@
TARGETS= flash
all: $(TARGETS)
+ mkdir -p hebFreq hebFreqKJV greekFreq greekFreqKJV
clean:
+ rm -rf hebFreq hebFreqKJV greekFreq greekFreqKJV
rm $(TARGETS)
.cpp:
diff --git a/flashtools/flash.cpp b/flashtools/flash.cpp
index eedc959..b4fd726 100644
--- a/flashtools/flash.cpp
+++ b/flashtools/flash.cpp
@@ -14,46 +14,55 @@
using namespace sword;
using namespace std;
+class PhraseCount {
+public:
+ PhraseCount()
+ : count(0)
+ {}
+ int count;
+ vector<SWBuf> with;
+};
+
class Word {
public:
Word()
: utf8("")
- , strong(0)
+ , strong("")
, freq(0)
- , kjvTrans("")
+ , def("")
{}
SWBuf utf8;
- int strong;
+ SWBuf strong;
int freq;
// from stongs lex
- SWBuf kjvTrans;
+ SWBuf def;
// computed ourselves
- map<SWBuf, int> kjvFreq;
+ map<SWBuf, PhraseCount> kjvFreq;
};
string itoa(int v) { stringstream str; str << v; return str.str(); }
-bool compareFreq(const Word *w1, const Word *w2) {
- return w1->freq > w2->freq;
+bool compareFreq(const Word &w1, const Word &w2) {
+ return w1.freq > w2.freq;
}
-bool compareKJVFreq(const map<SWBuf, int>::const_iterator &i1, const map<SWBuf, int>::const_iterator &i2) {
- return i1->second > i2->second;
+bool compareKJVFreq(const map<SWBuf, PhraseCount>::const_iterator &i1, const map<SWBuf, PhraseCount>::const_iterator &i2) {
+ return i1->second.count > i2->second.count;
}
-SWBuf prettyKJVFreq(map<SWBuf, int> &in) {
+SWBuf prettyKJVFreq(map<SWBuf, PhraseCount> in) {
SWBuf retVal;
- vector<map<SWBuf, int>::const_iterator> sorted;
- for (map<SWBuf, int>::const_iterator it = in.begin(); it != in.end(); it++) {
+ vector<map<SWBuf, PhraseCount>::const_iterator> sorted;
+ for (map<SWBuf, PhraseCount>::const_iterator it = in.begin(); it != in.end(); it++) {
// combine cap words with lowercase, if exists
- if (toupper(it->first[0]) == it->first[0] && it->first != "God" && it->first != "Lord") {
- SWBuf key = it->first;
- key[0] = tolower(key[0]);
- if (key != it->first) {
- map<SWBuf, int>::iterator i = in.find(key);
+ SWBuf k = it->first;
+ if (k.size() && toupper(k[0]) == k[0] && k != "God" && k != "Lord") {
+ k[0] = tolower(k[0]);
+ if (k != it->first) {
+ map<SWBuf, PhraseCount>::iterator i = in.find(k);
if (i != in.end()) {
- i->second += it->second;
- // don't include us in the list cuz we added out freq to another
+ i->second.count += it->second.count;
+ // don't include us in the list cuz we added our freq to another
continue;
}
}
@@ -61,9 +70,17 @@ SWBuf prettyKJVFreq(map<SWBuf, int> &in) {
sorted.push_back(it);
}
sort(sorted.begin(), sorted.end(), compareKJVFreq);
- for (vector<map<SWBuf, int>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
+ for (vector<map<SWBuf, PhraseCount>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
if (retVal.size()) retVal += "; ";
- retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second);
+ // prepend 'with other strongs' if present
+ if ((*it)->second.with.size()) {
+ retVal += "[+";
+ for (int i = 0; i < (*it)->second.with.size(); i++) {
+ retVal.appendFormatted(" %s", (*it)->second.with[i].c_str());
+ }
+ retVal += " ] ";
+ }
+ retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second.count);
}
return retVal;
}
@@ -88,49 +105,67 @@ SWBuf escapedUTF8(SWBuf inText) {
}
-void outputCSV(vector<Word *> &wordList) {
- for (vector<Word *>::iterator it = wordList.begin(); it != wordList.end(); it++) {
- Word *w = (*it);
+void outputCSV(vector<Word> &wordList) {
+ for (vector<Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
+ Word &w = (*it);
// cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
- cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "|" << w->kjvTrans << "\n";
+ cout << w.freq << "|" << w.utf8.c_str() << "|" << w.strong << "|" << prettyKJVFreq(w.kjvFreq).c_str() << "|" << w.def << "\n";
}
std::cout << std::endl;
}
-void outputFlash(vector<Word *> &wordList, int maxPerLesson) {
+/**
+ * output our flashcard .flash file format
+ *
+ * wordList - duh
+ * outputDir - directory path where to write files, e.g. "./hebFreq"
+ * kjvFreq - if true, process KJV translation frequencies and use these as
+ * the word answers; otherwise, use short strongs defs.
+ * maxPerLesson - maximum number of words per lesson
+ *
+ */
+void outputFlash(const vector<Word> &wordList, const char *outputDir = ".", bool kjvFreq = true, int maxPerLesson = 25) {
ThMLPlain strip;
ofstream ofile;
- int wordCount = 0;
+ int wordCount = 0;
int lessonNumber = 0;
- int startFreq = 0;
- int lastFreq = 0;
+ int startFreq = 0;
+ int lastFreq = 0;
- vector<Word *>::iterator it = wordList.begin();
+ vector<Word>::const_iterator it = wordList.begin();
while (it != wordList.end()) {
- Word *w = (*it);
+ const Word &w = (*it);
if (!wordCount) {
- SWBuf fname = "lesson";
+ SWBuf fname = outputDir;
+ fname += "/lesson";
fname.appendFormatted("%d", lessonNumber);
fname += ".flash";
ofile.open(fname);
- startFreq = w->freq;
+ startFreq = w.freq;
}
- // use if you want answers as KJV phrases
- SWBuf answers = prettyKJVFreq(w->kjvFreq);
- if (answers.size() > 200) answers.size(200);
-
- // use if you would rather have short strongs
-// SWBuf answers = w->kjvTrans;
-// strip.processText(answers); // remove html tags
-// answers.replaceBytes("\n\r", ' '); // remove newlines
+ SWBuf word = w.utf8;
+ word.trim();
+ SWBuf answers = "";
+ answers.trim();
+ // if we want answers as KJV phrases
+ if (kjvFreq) {
+ answers = prettyKJVFreq(w.kjvFreq);
+ if (answers.size() > 200) answers.size(200);
+ }
+ // if we would rather have short strongs
+ else {
+ SWBuf answers = w.def;
+ strip.processText(answers); // remove html tags
+ answers.replaceBytes("\n\r", ' '); // remove newlines
+ }
// be sure we have both a word and an answer
- if (w->utf8.trim().size() && answers.trim().size()) {
- ofile << "word" << wordCount << "=" << escapedUTF8(w->utf8) << "\n";
+ if (word.size() && answers.size()) {
+ ofile << "word" << wordCount << "=" << escapedUTF8(word) << "\n";
ofile << "answers" << wordCount << "=" << answers << "\n";
- lastFreq = w->freq;
+ lastFreq = w.freq;
wordCount++;
}
@@ -149,53 +184,115 @@ void outputFlash(vector<Word *> &wordList, int maxPerLesson) {
}
}
-
-int main(int argc, char **argv)
-{
-
+/**
+ * do the work
+ *
+ * range - the range of verses to process (e.g. "gen-mal")
+ * addAll - if we should add all words in our lexicon for the testaments
+ * included in the range even if they don't exist in the text
+ *
+ */
+vector<Word> processWords(const char *range, bool addAll = true) {
SWMgr manager;
- SWModule *bible = manager.getModule("KJV");
- map<int, Word> wordList;
+ SWModule &bible = *manager.getModule("KJV");
+ map<SWBuf, Word> wordList;
- SWConfig utf8("hwords.conf");
- SWConfig defs("hdefs.conf");
-// SWConfig utf8("gwords.conf");
-// SWConfig defs("gdefs.conf");
+ SWConfig hutf8("hwords.conf");
+ SWConfig hdefs("hdefs.conf");
+ SWConfig gutf8("gwords.conf");
+ SWConfig gdefs("gdefs.conf");
- for (bible->setKey("gen.1.1"); ((VerseKey*)bible->getKey())->Testament() == 1; (*bible)++) {
-// for (bible->setKey("mat.1.1"); !bible->Error(); (*bible)++) {
- bible->RenderText(); // force an entry lookup to resolve key to something in the index
+ VerseKey parser;
+ ListKey r = parser.ParseVerseList(range, 0, true);
+ r.Persist(true);
+ bible.setKey(r);
+ for (bible = TOP; !bible.Error(); bible++) {
+ bible.RenderText(); // force an entry lookup to resolve key to something in the index
- AttributeList &words = bible->getEntryAttributes()["Word"];
+ AttributeList &words = bible.getEntryAttributes()["Word"];
for (AttributeList::iterator word = words.begin(); word != words.end(); word++) {
- SWBuf strong = word->second["Lemma"];
- SWBuf text = word->second["Text"];
- text.trim();
- // trim punctuation from end
- while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1);
- if (!text.size()) text = "[Untranslated]";
- strong << 1;
- wordList[atoi(strong.c_str())].freq++;
- wordList[atoi(strong.c_str())].kjvFreq[text]++;
-// cout << strong << "\n";
+ SWBuf partCount = word->second["PartCount"];
+ int parts = atoi(partCount.c_str());
+ if (parts < 1) parts = 1;
+
+ // build a list of all lemmas for use later in 'with'
+ // i.e. 'translated xxx with Gnnnn1, Gnnnn2'
+ list<SWBuf> lemmas;
+ for (int i = 1; i <= parts; i++) {
+ SWBuf lemKey = "Lemma";
+ if (parts > 1) lemKey.appendFormatted(".%d", i);
+ lemmas.push_back(word->second[lemKey]);
+ }
+
+ for (int i = 1; i <= parts; i++) {
+ SWBuf lemKey = "Lemma";
+ if (parts > 1) lemKey.appendFormatted(".%d", i);
+ SWBuf strong = word->second[lemKey];
+ SWBuf text = word->second["Text"];
+ if ((parts > 2) && (strong == "G3588")) {
+ text = "[article]";
+ }
+ else {
+ text.trim();
+ // trim punctuation from end
+ while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1);
+ if (!text.size()) text = "[Untranslated]";
+ }
+ wordList[strong].kjvFreq[text].count++;
+ if (parts > 1) {
+ list<SWBuf> withoutMe = lemmas;
+ withoutMe.remove(strong);
+ wordList[strong].kjvFreq[text].with = vector<SWBuf>(withoutMe.begin(), withoutMe.end());
+ }
+ wordList[strong].freq++;
+ }
}
}
- // first use utf8 list to iterate and add utf8 entries.\
- // this assures we have an entry for every word, even it it is not
- // present in the module
- for (ConfigEntMap::iterator it = utf8["words"].begin(); it != utf8["words"].end(); it++) {
- wordList[atoi(it->first)].utf8 = it->second;
+
+ if (addAll) {
+ // first use utf8 list to iterate and add utf8 entries.\
+ // this assures we have an entry for every word, even if it is not
+ // present in the module
+ r = TOP;
+ if (VerseKey(r).Testament() == 1) {
+ for (ConfigEntMap::iterator it = hutf8["words"].begin(); it != hutf8["words"].end(); it++) {
+ wordList[(SWBuf)"H"+it->first].utf8 = it->second;
+ }
+ }
+ r = BOTTOM;
+ if (VerseKey(r).Testament() == 2) {
+ for (ConfigEntMap::iterator it = gutf8["words"].begin(); it != gutf8["words"].end(); it++) {
+ wordList[(SWBuf)"G"+it->first].utf8 = it->second;
+ }
+ }
}
- vector<Word *> sorted;
- for (map<int, Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
- it->second.strong = it->first;
- it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()];
- sorted.push_back(&it->second);
+
+ vector<Word> sorted;
+ for (map<SWBuf, Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
+ // pull strongs key from map and populate Word
+ SWBuf s = it->first;
+ it->second.strong = s;
+ // populate lex defs
+ it->second.def = (s[0] == 'G') ?
+ gdefs["defs"][(s << 1).c_str()] :
+ hdefs["defs"][(s << 1).c_str()];
+ // put only word in sorted container
+ sorted.push_back(it->second);
}
-
sort(sorted.begin(), sorted.end(), compareFreq);
-// outputCSV(sorted);
- outputFlash(sorted, 25);
+
+ return sorted;
+}
+
+
+int main(int argc, char **argv)
+{
+ outputFlash(processWords("gen-mal"), "hebFreqKJV" , true);
+ outputFlash(processWords("gen-mal"), "hebFreq" , false);
+ outputFlash(processWords("mat-rev"), "greekFreqKJV", true);
+ outputFlash(processWords("mat-rev"), "greekFreq" , false);
+// outputCSV(processWords("mat-rev"));
+
return 0;
}