00001
00002
00003
00004
00005
00006
00007
00008 #include <stdlib.h>
00009 #include <string.h>
00010 #include <greeklexattribs.h>
00011 #include <swmodule.h>
00012
00013
00014 GreekLexAttribs::GreekLexAttribs() {
00015 }
00016
00017
00018 char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) {
00019
00020 if (module->isProcessEntryAttributes()) {
00021 char *from;
00022 bool inAV = false;
00023 string phrase;
00024 string freq;
00025 char val[128], *valto;
00026 char wordstr[7];
00027 char *currentPhrase = 0, *ch = 0;
00028 char *currentPhraseEnd = 0;
00029 int number = 0;
00030
00031
00032 for (from = text; *from; from++) {
00033 if (inAV) {
00034 if (currentPhrase == 0) {
00035 if (isalpha(*from))
00036 currentPhrase = from;
00037 }
00038 else {
00039 if ((!isalpha(*from)) && (*from != ' ') && (*from != '+') && (*from !='(') && (*from != ')') && (*from != '\'')) {
00040 if (*from == '<') {
00041 if (!currentPhraseEnd)
00042 currentPhraseEnd = from - 1;
00043 for (; *from && *from != '>'; from++) {
00044 if (!strncmp(from, "value=\"", 7)) {
00045 valto = val;
00046 from += 7;
00047 for (unsigned int i = 0; from[i] != '\"' && i < 127; i++)
00048 *valto++ = from[i];
00049 *valto = 0;
00050 sprintf(wordstr, "%03d", number+1);
00051 module->getEntryAttributes()["AVPhrase"][wordstr]["CompoundedWith"] = val;
00052 from += strlen(val);
00053 }
00054 }
00055 continue;
00056 }
00057
00058 phrase = "";
00059 phrase.append(currentPhrase, (int)(((currentPhraseEnd)?currentPhraseEnd:from) - currentPhrase)-1);
00060 currentPhrase = from;
00061 while (*from && isdigit(*from)) from++;
00062 freq = "";
00063 freq.append(currentPhrase, (int)(from - currentPhrase));
00064 if ((freq.length() > 0) && (phrase.length() > 0)) {
00065 sprintf(wordstr, "%03d", ++number);
00066 if ((strchr(phrase.c_str(), '(') > phrase.c_str()) && (strchr(phrase.c_str(), ')') > phrase.c_str() + 1)) {
00067 string tmp = phrase.substr(0, phrase.find_first_of("("));
00068 phrase.erase(phrase.find_first_of("("), 1);
00069 phrase.erase(phrase.find_first_of(")"), 1);
00070 phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
00071 module->getEntryAttributes()["AVPhrase"][wordstr]["Alt"] = phrase;
00072 phrase = tmp;
00073 }
00074 phrase.erase(0,phrase.find_first_not_of("\r\n\v\t ")); phrase.erase(phrase.find_last_not_of("\r\n\v\t ")+1);
00075 freq.erase(0,freq.find_first_not_of("\r\n\v\t ")); freq.erase(freq.find_last_not_of("\r\n\v\t ")+1);
00076 module->getEntryAttributes()["AVPhrase"][wordstr]["Phrase"] = phrase;
00077 module->getEntryAttributes()["AVPhrase"][wordstr]["Frequency"] = freq;
00078 currentPhrase = 0;
00079 currentPhraseEnd = 0;
00080 }
00081 }
00082 }
00083 if (*from == ';') inAV = false;
00084
00085 }
00086 else if (!strncmp(from, "AV-", 3)) {
00087 inAV = true;
00088 from+=2;
00089 }
00090 }
00091 }
00092 return 0;
00093 }
00094
00095