Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

greeklexattribs.cpp

00001 /******************************************************************************
00002  *
00003  * greeklexattribs -    SWFilter decendant to set entry attributes for greek
00004  *      lexicons
00005  */
00006 
00007 
00008 #include <stdlib.h>
00009 #include <string.h>
00010 #include <greeklexattribs.h>
00011 #include <swmodule.h>
00012 
00013 
00014 GreekLexAttribs::GreekLexAttribs() {
00015 }
00016 
00017 
00018 char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) {
00019 
00020         if (module->isProcessEntryAttributes()) {
00021                 char *from;
00022                 bool inAV = false;
00023                 string phrase;
00024                 string freq;
00025                 char val[128], *valto;
00026                 char wordstr[7];
00027                 char *currentPhrase = 0, *ch = 0;
00028                 char *currentPhraseEnd = 0;
00029                 int number = 0;
00030 
00031 
00032                 for (from = text; *from; from++) {
00033                         if (inAV) {
00034                                 if (currentPhrase == 0) {
00035                                         if (isalpha(*from))
00036                                                 currentPhrase = from;
00037                                 }
00038                                 else {
00039                                         if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) {
00040                                                 if (*from == '<') {
00041                                                         if (!currentPhraseEnd)
00042                                                                 currentPhraseEnd = from - 1;
00043                                                         for (; *from && *from != '>'; from++) {
00044                                                                 if (!strncmp(from, "value=\"", 7)) {
00045                                                                         valto = val;
00046                                                                         from += 7;
00047                                                                         for (unsigned int i = 0; from[i] != '\"' && i < 127; i++)
00048                                                                                 *valto++ = from[i];
00049                                                                         *valto = 0;
00050                                                                         sprintf(wordstr, "%03d", number+1);
00051                                                                         module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val;
00052                                                                         from += strlen(val);
00053                                                                 }
00054                                                         }
00055                                                         continue;
00056                                                 }
00057 
00058                                                 phrase = "";
00059                                                 phrase.append(currentPhrase, (int)(((currentPhraseEnd)?currentPhraseEnd:from) - currentPhrase)-1);
00060                                                 currentPhrase = from;
00061                                                 while (*from && isdigit(*from)) from++;
00062                                                 freq = "";
00063                                                 freq.append(currentPhrase, (int)(from - currentPhrase));
00064                                                 if ((freq.length() > 0) && (phrase.length() > 0)) {
00065                                                         sprintf(wordstr, "%03d", ++number);
00066                                                         if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) {
00067                                                                 string tmp = phrase.substr(0, phrase.find_first_of("("));
00068                                                                 phrase.erase(phrase.find_first_of("("), 1);
00069                                                                 phrase.erase(phrase.find_first_of(")"), 1);
00070                                                                 phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
00071                                                                 module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase;
00072                                                                 phrase = tmp;
00073                                                         }
00074                                                         phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
00075                                                         freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1);
00076                                                         module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase;
00077                                                         module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq;
00078                                                         currentPhrase = 0;
00079                                                         currentPhraseEnd = 0;
00080                                                 }
00081                                         }
00082                                 }
00083                                 if (*from == ';') inAV = false;
00084 
00085                         }
00086                         else if (!strncmp(from, "AV-", 3)) {
00087                                 inAV = true;
00088                                 from+=2;
00089                         }
00090                 }
00091         }
00092         return 0;
00093 }
00094 
00095 

Generated on Thu Jun 20 22:12:59 2002 for The Sword Project by doxygen1.2.15