From b417226d898c600740b148e55d536b27e60534b7 Mon Sep 17 00:00:00 2001 From: "Troy A. Griffitts" Date: Sun, 2 Sep 2007 20:00:05 +0000 Subject: reworked 'with' vector to go with phrase, instead of count added more comments git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@96 07627401-56e2-0310-80f4-f8cd0041bdcd --- flashtools/flash.cpp | 98 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 24 deletions(-) (limited to 'flashtools') diff --git a/flashtools/flash.cpp b/flashtools/flash.cpp index b4fd726..71d4e42 100644 --- a/flashtools/flash.cpp +++ b/flashtools/flash.cpp @@ -14,15 +14,43 @@ using namespace sword; using namespace std; -class PhraseCount { +// used to hold a KJV translation phrase for a greek/hebrew word +// and any greek/hebrew words combined to make this KJV phrase +// e.g. hO QEOS = QEOS: [+ hO ]: God +class Phrase { public: - PhraseCount() - : count(0) + Phrase() + : phrase("") {} - int count; + SWBuf phrase; vector with; + inline bool operator ==(const Phrase &other) const { return !compare(other); } + inline bool operator !=(const Phrase &other) const { return compare(other); } + inline bool operator > (const Phrase &other) const { return compare(other) > 0; } + inline bool operator < (const Phrase &other) const { return compare(other) < 0; } + inline bool operator <=(const Phrase &other) const { return compare(other) <= 0; } + inline bool operator >=(const Phrase &other) const { return compare(other) >= 0; } + + int compare(const Phrase &right) const { + int c = phrase.compare(right.phrase); + if (c) return c; + vector::const_iterator lit = with.begin(); + vector::const_iterator rit = right.with.begin(); + while (lit != with.end() && rit != right.with.end()) { + c = lit->compare(*rit); + if (c) return c; + lit++; rit++; + } + if (lit != with.end()) return 1; + if (rit != right.with.end()) return -1; + return 0; + } }; +// KJV phrases and their occurance frequency +typedef map KJVPhrases; + +// primary result class class Word { public: Word() @@ -31,37 +59,50 @@ public: , freq(0) , def("") {} + + // lexical form of this word in utf8 greek/hebrew SWBuf utf8; + + // strongs number for this word (e.g. G3588) SWBuf strong; + + // frequency of occurance in the iterated text int freq; - // from stongs lex + + // definition pulled from short strongs def SWBuf def; - // computed ourselves - map kjvFreq; + + // kjv translation phrases and their frequencies + KJVPhrases kjvFreq; }; + string itoa(int v) { stringstream str; str << v; return str.str(); } + bool compareFreq(const Word &w1, const Word &w2) { return w1.freq > w2.freq; } -bool compareKJVFreq(const map::const_iterator &i1, const map::const_iterator &i2) { - return i1->second.count > i2->second.count; + +bool compareKJVFreq(const KJVPhrases::const_iterator &i1, const KJVPhrases::const_iterator &i2) { + return i1->second > i2->second; } -SWBuf prettyKJVFreq(map in) { + +// sort and pretty up all the KJV phrases for a word into a nice output buffer +SWBuf prettyKJVFreq(KJVPhrases in) { SWBuf retVal; - vector::const_iterator> sorted; - for (map::const_iterator it = in.begin(); it != in.end(); it++) { + vector sorted; + for (KJVPhrases::const_iterator it = in.begin(); it != in.end(); it++) { // combine cap words with lowercase, if exists - SWBuf k = it->first; - if (k.size() && toupper(k[0]) == k[0] && k != "God" && k != "Lord") { - k[0] = tolower(k[0]); + Phrase k = it->first; + if (k.phrase.size() && toupper(k.phrase[0]) == k.phrase[0] && k.phrase != "God" && k.phrase != "Lord") { + k.phrase[0] = tolower(k.phrase[0]); if (k != it->first) { - map::iterator i = in.find(k); + KJVPhrases::iterator i = in.find(k); if (i != in.end()) { - i->second.count += it->second.count; + i->second += it->second; // don't include us in the list cuz we added our freq to another continue; } @@ -70,21 +111,24 @@ SWBuf prettyKJVFreq(map in) { sorted.push_back(it); } sort(sorted.begin(), sorted.end(), compareKJVFreq); - for (vector::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) { + for (vector::const_iterator it = sorted.begin(); it != sorted.end(); it++) { if (retVal.size()) retVal += "; "; // prepend 'with other strongs' if present - if ((*it)->second.with.size()) { + if ((*it)->first.with.size()) { retVal += "[+"; - for (int i = 0; i < (*it)->second.with.size(); i++) { - retVal.appendFormatted(" %s", (*it)->second.with[i].c_str()); + for (int i = 0; i < (*it)->first.with.size(); i++) { + retVal.appendFormatted(" %s", (*it)->first.with[i].c_str()); } retVal += " ] "; } - retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second.count); + retVal.appendFormatted("%s (%d)", (*it)->first.phrase.c_str(), (*it)->second); } return retVal; } + +// take utf8 text and spit out equiv. text substituting escaped codes for multibyte chars +// java .properties files wants this format (flashcard .flash lessons use this format) SWBuf escapedUTF8(SWBuf inText) { static UTF8UTF16 convert; convert.processText(inText); @@ -105,6 +149,7 @@ SWBuf escapedUTF8(SWBuf inText) { } +// output a simple CSV ('|' separated really) format for importing into OOo or excel void outputCSV(vector &wordList) { for (vector::iterator it = wordList.begin(); it != wordList.end(); it++) { Word &w = (*it); @@ -184,12 +229,14 @@ void outputFlash(const vector &wordList, const char *outputDir = ".", bool } } + /** * do the work * * range - the range of verses to process (e.g. "gen-mal") * addAll - if we should add all words in our lexicon for the testaments * included in the range even if they don't exist in the text + * (useful for generating complete OT or NT strongs word lists) * */ vector processWords(const char *range, bool addAll = true) { @@ -238,12 +285,15 @@ vector processWords(const char *range, bool addAll = true) { while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1); if (!text.size()) text = "[Untranslated]"; } - wordList[strong].kjvFreq[text].count++; + Phrase p; + p.phrase = text; if (parts > 1) { + // lets build our 'with' list excluding ourselves list withoutMe = lemmas; withoutMe.remove(strong); - wordList[strong].kjvFreq[text].with = vector(withoutMe.begin(), withoutMe.end()); + p.with = vector(withoutMe.begin(), withoutMe.end()); } + wordList[strong].kjvFreq[p]++; wordList[strong].freq++; } } -- cgit