From b11ba6b03dfe8ecdcaa8a71747df6da1fcf21112 Mon Sep 17 00:00:00 2001 From: "Troy A. Griffitts" Date: Sun, 2 Sep 2007 18:27:12 +0000 Subject: Started rework of flash tools to work with combined lemma of the new KJV module rev. git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@95 07627401-56e2-0310-80f4-f8cd0041bdcd --- flashtools/Makefile | 2 + flashtools/flash.cpp | 259 +++++++++++++++++++++++++++++++++++---------------- 2 files changed, 180 insertions(+), 81 deletions(-) (limited to 'flashtools') diff --git a/flashtools/Makefile b/flashtools/Makefile index b8683c9..dc41e0a 100644 --- a/flashtools/Makefile +++ b/flashtools/Makefile @@ -1,7 +1,9 @@ TARGETS= flash all: $(TARGETS) + mkdir -p hebFreq hebFreqKJV greekFreq greekFreqKJV clean: + rm -rf hebFreq hebFreqKJV greekFreq greekFreqKJV rm $(TARGETS) .cpp: diff --git a/flashtools/flash.cpp b/flashtools/flash.cpp index eedc959..b4fd726 100644 --- a/flashtools/flash.cpp +++ b/flashtools/flash.cpp @@ -14,46 +14,55 @@ using namespace sword; using namespace std; +class PhraseCount { +public: + PhraseCount() + : count(0) + {} + int count; + vector with; +}; + class Word { public: Word() : utf8("") - , strong(0) + , strong("") , freq(0) - , kjvTrans("") + , def("") {} SWBuf utf8; - int strong; + SWBuf strong; int freq; // from stongs lex - SWBuf kjvTrans; + SWBuf def; // computed ourselves - map kjvFreq; + map kjvFreq; }; string itoa(int v) { stringstream str; str << v; return str.str(); } -bool compareFreq(const Word *w1, const Word *w2) { - return w1->freq > w2->freq; +bool compareFreq(const Word &w1, const Word &w2) { + return w1.freq > w2.freq; } -bool compareKJVFreq(const map::const_iterator &i1, const map::const_iterator &i2) { - return i1->second > i2->second; +bool compareKJVFreq(const map::const_iterator &i1, const map::const_iterator &i2) { + return i1->second.count > i2->second.count; } -SWBuf prettyKJVFreq(map &in) { +SWBuf prettyKJVFreq(map in) { SWBuf retVal; - vector::const_iterator> sorted; - for (map::const_iterator it = in.begin(); it != in.end(); it++) { + vector::const_iterator> sorted; + for (map::const_iterator it = in.begin(); it != in.end(); it++) { // combine cap words with lowercase, if exists - if (toupper(it->first[0]) == it->first[0] && it->first != "God" && it->first != "Lord") { - SWBuf key = it->first; - key[0] = tolower(key[0]); - if (key != it->first) { - map::iterator i = in.find(key); + SWBuf k = it->first; + if (k.size() && toupper(k[0]) == k[0] && k != "God" && k != "Lord") { + k[0] = tolower(k[0]); + if (k != it->first) { + map::iterator i = in.find(k); if (i != in.end()) { - i->second += it->second; - // don't include us in the list cuz we added out freq to another + i->second.count += it->second.count; + // don't include us in the list cuz we added our freq to another continue; } } @@ -61,9 +70,17 @@ SWBuf prettyKJVFreq(map &in) { sorted.push_back(it); } sort(sorted.begin(), sorted.end(), compareKJVFreq); - for (vector::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) { + for (vector::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) { if (retVal.size()) retVal += "; "; - retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second); + // prepend 'with other strongs' if present + if ((*it)->second.with.size()) { + retVal += "[+"; + for (int i = 0; i < (*it)->second.with.size(); i++) { + retVal.appendFormatted(" %s", (*it)->second.with[i].c_str()); + } + retVal += " ] "; + } + retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second.count); } return retVal; } @@ -88,49 +105,67 @@ SWBuf escapedUTF8(SWBuf inText) { } -void outputCSV(vector &wordList) { - for (vector::iterator it = wordList.begin(); it != wordList.end(); it++) { - Word *w = (*it); +void outputCSV(vector &wordList) { + for (vector::iterator it = wordList.begin(); it != wordList.end(); it++) { + Word &w = (*it); // cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n"; - cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "|" << w->kjvTrans << "\n"; + cout << w.freq << "|" << w.utf8.c_str() << "|" << w.strong << "|" << prettyKJVFreq(w.kjvFreq).c_str() << "|" << w.def << "\n"; } std::cout << std::endl; } -void outputFlash(vector &wordList, int maxPerLesson) { +/** + * output our flashcard .flash file format + * + * wordList - duh + * outputDir - directory path where to write files, e.g. "./hebFreq" + * kjvFreq - if true, process KJV translation frequencies and use these as + * the word answers; otherwise, use short strongs defs. + * maxPerLesson - maximum number of words per lesson + * + */ +void outputFlash(const vector &wordList, const char *outputDir = ".", bool kjvFreq = true, int maxPerLesson = 25) { ThMLPlain strip; ofstream ofile; - int wordCount = 0; + int wordCount = 0; int lessonNumber = 0; - int startFreq = 0; - int lastFreq = 0; + int startFreq = 0; + int lastFreq = 0; - vector::iterator it = wordList.begin(); + vector::const_iterator it = wordList.begin(); while (it != wordList.end()) { - Word *w = (*it); + const Word &w = (*it); if (!wordCount) { - SWBuf fname = "lesson"; + SWBuf fname = outputDir; + fname += "/lesson"; fname.appendFormatted("%d", lessonNumber); fname += ".flash"; ofile.open(fname); - startFreq = w->freq; + startFreq = w.freq; } - // use if you want answers as KJV phrases - SWBuf answers = prettyKJVFreq(w->kjvFreq); - if (answers.size() > 200) answers.size(200); - - // use if you would rather have short strongs -// SWBuf answers = w->kjvTrans; -// strip.processText(answers); // remove html tags -// answers.replaceBytes("\n\r", ' '); // remove newlines + SWBuf word = w.utf8; + word.trim(); + SWBuf answers = ""; + answers.trim(); + // if we want answers as KJV phrases + if (kjvFreq) { + answers = prettyKJVFreq(w.kjvFreq); + if (answers.size() > 200) answers.size(200); + } + // if we would rather have short strongs + else { + SWBuf answers = w.def; + strip.processText(answers); // remove html tags + answers.replaceBytes("\n\r", ' '); // remove newlines + } // be sure we have both a word and an answer - if (w->utf8.trim().size() && answers.trim().size()) { - ofile << "word" << wordCount << "=" << escapedUTF8(w->utf8) << "\n"; + if (word.size() && answers.size()) { + ofile << "word" << wordCount << "=" << escapedUTF8(word) << "\n"; ofile << "answers" << wordCount << "=" << answers << "\n"; - lastFreq = w->freq; + lastFreq = w.freq; wordCount++; } @@ -149,53 +184,115 @@ void outputFlash(vector &wordList, int maxPerLesson) { } } - -int main(int argc, char **argv) -{ - +/** + * do the work + * + * range - the range of verses to process (e.g. "gen-mal") + * addAll - if we should add all words in our lexicon for the testaments + * included in the range even if they don't exist in the text + * + */ +vector processWords(const char *range, bool addAll = true) { SWMgr manager; - SWModule *bible = manager.getModule("KJV"); - map wordList; + SWModule &bible = *manager.getModule("KJV"); + map wordList; - SWConfig utf8("hwords.conf"); - SWConfig defs("hdefs.conf"); -// SWConfig utf8("gwords.conf"); -// SWConfig defs("gdefs.conf"); + SWConfig hutf8("hwords.conf"); + SWConfig hdefs("hdefs.conf"); + SWConfig gutf8("gwords.conf"); + SWConfig gdefs("gdefs.conf"); - for (bible->setKey("gen.1.1"); ((VerseKey*)bible->getKey())->Testament() == 1; (*bible)++) { -// for (bible->setKey("mat.1.1"); !bible->Error(); (*bible)++) { - bible->RenderText(); // force an entry lookup to resolve key to something in the index + VerseKey parser; + ListKey r = parser.ParseVerseList(range, 0, true); + r.Persist(true); + bible.setKey(r); + for (bible = TOP; !bible.Error(); bible++) { + bible.RenderText(); // force an entry lookup to resolve key to something in the index - AttributeList &words = bible->getEntryAttributes()["Word"]; + AttributeList &words = bible.getEntryAttributes()["Word"]; for (AttributeList::iterator word = words.begin(); word != words.end(); word++) { - SWBuf strong = word->second["Lemma"]; - SWBuf text = word->second["Text"]; - text.trim(); - // trim punctuation from end - while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1); - if (!text.size()) text = "[Untranslated]"; - strong << 1; - wordList[atoi(strong.c_str())].freq++; - wordList[atoi(strong.c_str())].kjvFreq[text]++; -// cout << strong << "\n"; + SWBuf partCount = word->second["PartCount"]; + int parts = atoi(partCount.c_str()); + if (parts < 1) parts = 1; + + // build a list of all lemmas for use later in 'with' + // i.e. 'translated xxx with Gnnnn1, Gnnnn2' + list lemmas; + for (int i = 1; i <= parts; i++) { + SWBuf lemKey = "Lemma"; + if (parts > 1) lemKey.appendFormatted(".%d", i); + lemmas.push_back(word->second[lemKey]); + } + + for (int i = 1; i <= parts; i++) { + SWBuf lemKey = "Lemma"; + if (parts > 1) lemKey.appendFormatted(".%d", i); + SWBuf strong = word->second[lemKey]; + SWBuf text = word->second["Text"]; + if ((parts > 2) && (strong == "G3588")) { + text = "[article]"; + } + else { + text.trim(); + // trim punctuation from end + while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1); + if (!text.size()) text = "[Untranslated]"; + } + wordList[strong].kjvFreq[text].count++; + if (parts > 1) { + list withoutMe = lemmas; + withoutMe.remove(strong); + wordList[strong].kjvFreq[text].with = vector(withoutMe.begin(), withoutMe.end()); + } + wordList[strong].freq++; + } } } - // first use utf8 list to iterate and add utf8 entries.\ - // this assures we have an entry for every word, even it it is not - // present in the module - for (ConfigEntMap::iterator it = utf8["words"].begin(); it != utf8["words"].end(); it++) { - wordList[atoi(it->first)].utf8 = it->second; + + if (addAll) { + // first use utf8 list to iterate and add utf8 entries.\ + // this assures we have an entry for every word, even if it is not + // present in the module + r = TOP; + if (VerseKey(r).Testament() == 1) { + for (ConfigEntMap::iterator it = hutf8["words"].begin(); it != hutf8["words"].end(); it++) { + wordList[(SWBuf)"H"+it->first].utf8 = it->second; + } + } + r = BOTTOM; + if (VerseKey(r).Testament() == 2) { + for (ConfigEntMap::iterator it = gutf8["words"].begin(); it != gutf8["words"].end(); it++) { + wordList[(SWBuf)"G"+it->first].utf8 = it->second; + } + } } - vector sorted; - for (map::iterator it = wordList.begin(); it != wordList.end(); it++) { - it->second.strong = it->first; - it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()]; - sorted.push_back(&it->second); + + vector sorted; + for (map::iterator it = wordList.begin(); it != wordList.end(); it++) { + // pull strongs key from map and populate Word + SWBuf s = it->first; + it->second.strong = s; + // populate lex defs + it->second.def = (s[0] == 'G') ? + gdefs["defs"][(s << 1).c_str()] : + hdefs["defs"][(s << 1).c_str()]; + // put only word in sorted container + sorted.push_back(it->second); } - sort(sorted.begin(), sorted.end(), compareFreq); -// outputCSV(sorted); - outputFlash(sorted, 25); + + return sorted; +} + + +int main(int argc, char **argv) +{ + outputFlash(processWords("gen-mal"), "hebFreqKJV" , true); + outputFlash(processWords("gen-mal"), "hebFreq" , false); + outputFlash(processWords("mat-rev"), "greekFreqKJV", true); + outputFlash(processWords("mat-rev"), "greekFreq" , false); +// outputCSV(processWords("mat-rev")); + return 0; } -- cgit