/****************************************************************************** * * greeklexattribs - SWFilter decendant to set entry attributes for greek * lexicons */ #include #include #include #include #include GreekLexAttribs::GreekLexAttribs() { } char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { if (module->isProcessEntryAttributes()) { char *from; bool inAV = false; string phrase; string freq; char val[128], *valto; char wordstr[7]; char *currentPhrase = 0, *ch = 0; char *currentPhraseEnd = 0; int number = 0; for (from = text; *from; from++) { if (inAV) { if (currentPhrase == 0) { if (isalpha(*from)) currentPhrase = from; } else { if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) { if (*from == '<') { if (!currentPhraseEnd) currentPhraseEnd = from - 1; for (; *from && *from != '>'; from++) { if (!strncmp(from, "value=\"", 7)) { valto = val; from += 7; for (unsigned int i = 0; from[i] != '\"' && i < 127; i++) *valto++ = from[i]; *valto = 0; sprintf(wordstr, "%03d", number+1); module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val; from += strlen(val); } } continue; } phrase = ""; phrase.append(currentPhrase, (int)(((currentPhraseEnd)?currentPhraseEnd:from) - currentPhrase)-1); currentPhrase = from; while (*from && isdigit(*from)) from++; freq = ""; freq.append(currentPhrase, (int)(from - currentPhrase)); if ((freq.length() > 0) && (phrase.length() > 0)) { sprintf(wordstr, "%03d", ++number); if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) { string tmp = phrase.substr(0, phrase.find_first_of("(")); phrase.erase(phrase.find_first_of("("), 1); phrase.erase(phrase.find_first_of(")"), 1); phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase; phrase = tmp; } phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1); freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1); module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase; module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq; currentPhrase = 0; currentPhraseEnd = 0; } } } if (*from == ';') inAV = false; } else if (!strncmp(from, "AV-", 3)) { inAV = true; from+=2; } } } return 0; }