thmlolb.cpp Source File

00001 /***************************************************************************
00002                      thmlolb.cpp  -  ThML to OLB filter
00003                              -------------------
00004     begin                : 2001-05-10
00005     copyright            : 2001 by CrossWire Bible Society
00006  ***************************************************************************/
00007 
00008 /***************************************************************************
00009  *                                                                         *
00010  *   This program is free software; you can redistribute it and/or modify  *
00011  *   it under the terms of the GNU General Public License as published by  *
00012  *   the Free Software Foundation; either version 2 of the License, or     *
00013  *   (at your option) any later version.                                   *
00014  *                                                                         *
00015  ***************************************************************************/
00016 
00017 #include <stdlib.h>
00018 #include <string.h>
00019 #include <thmlolb.h>
00020 
00021 
00022 ThMLOLB::ThMLOLB()
00023 {
00024 }
00025 
00026 
00027 char ThMLOLB::ProcessText(char *text, int maxlen)
00028 {
00029   char *to, *from, token[2048];
00030   int tokpos = 0;
00031   bool intoken  = false;
00032   int len;
00033   bool ampersand = false;
00034   int i;
00035   
00036   len = strlen(text) + 1;                                               // shift string to right of buffer
00037   if (len < maxlen) {
00038     memmove(&text[maxlen - len], text, len);
00039     from = &text[maxlen - len];
00040   }
00041   else  from = text;                                                    // -------------------------------
00042   for (to = text; *from; from++)
00043     {
00044       if (*from == '<') {
00045         intoken = true;
00046         tokpos = 0;
00047         memset(token, 0, 2048);
00048         ampersand = false;
00049         continue;
00050       }
00051       else if (*from == '&') {
00052         intoken = true;
00053         tokpos = 0;
00054         memset(token, 0, 2048);
00055         ampersand = true;
00056         continue;
00057       }
00058       if (*from == ';' && ampersand) {
00059         intoken = false;
00060         
00061         if (!strncmp("nbsp", token, 4)) *to++ = ' ';
00062         else if (!strncmp("quot", token, 4)) *to++ = '"';
00063         else if (!strncmp("amp", token, 3)) *to++ = '&';
00064         else if (!strncmp("lt", token, 2)) *to++ = '<';
00065         else if (!strncmp("gt", token, 2)) *to++ = '>';
00066         else if (!strncmp("brvbar", token, 6)) *to++ = '|';
00067         else if (!strncmp("sect", token, 4)) *to++ = '§';
00068         else if (!strncmp("copy", token, 4)) *to++ = '©';
00069         else if (!strncmp("laquo", token, 5)) *to++ = '«';
00070         else if (!strncmp("reg", token, 3)) *to++ = '®';
00071         else if (!strncmp("acute", token, 5)) *to++ = '´';
00072         else if (!strncmp("para", token, 4)) *to++ = '¶';
00073         else if (!strncmp("raquo", token, 5)) *to++ = '»';
00074         
00075         else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
00076         else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
00077         else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
00078         else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
00079         else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
00080         else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
00081         else if (!strncmp("aacute", token, 6)) *to++ = 'á';
00082         else if (!strncmp("agrave", token, 6)) *to++ = 'à';
00083         else if (!strncmp("acirc", token, 5)) *to++ = 'â';
00084         else if (!strncmp("auml", token, 4)) *to++ = 'ä';
00085         else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
00086         else if (!strncmp("aring", token, 5)) *to++ = 'å';
00087         else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
00088         else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
00089         else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
00090         else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
00091         else if (!strncmp("eacute", token, 6)) *to++ = 'é';
00092         else if (!strncmp("egrave", token, 6)) *to++ = 'è';
00093         else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
00094         else if (!strncmp("euml", token, 4)) *to++ = 'ë';
00095         else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
00096         else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
00097         else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
00098         else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
00099         else if (!strncmp("iacute", token, 6)) *to++ = 'í';
00100         else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
00101         else if (!strncmp("icirc", token, 5)) *to++ = 'î';
00102         else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
00103         else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
00104         else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
00105         else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
00106         else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
00107         else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
00108         else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
00109         else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
00110         else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
00111         else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
00112         else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
00113         else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
00114         else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
00115         else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
00116         else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
00117         else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
00118         else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
00119         else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
00120         else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
00121         else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
00122         else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
00123         else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
00124         
00125         else if (!strncmp("deg", token, 3)) *to++ = '°';
00126         else if (!strncmp("plusmn", token, 6)) *to++ = '±';
00127         else if (!strncmp("sup2", token, 4)) *to++ = '²';
00128         else if (!strncmp("sup3", token, 4)) *to++ = '³';
00129         else if (!strncmp("sup1", token, 4)) *to++ = '¹';
00130         else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
00131         else if (!strncmp("pound", token, 5)) *to++ = '£';
00132         else if (!strncmp("cent", token, 4)) *to++ = '¢';
00133         else if (!strncmp("frac14", token, 6)) *to++ = '¼';
00134         else if (!strncmp("frac12", token, 6)) *to++ = '½';
00135         else if (!strncmp("frac34", token, 6)) *to++ = '¾';
00136         else if (!strncmp("iquest", token, 6)) *to++ = '¿';
00137         else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
00138         else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
00139         else if (!strncmp("eth", token, 3)) *to++ = 'ð';
00140         else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
00141         else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
00142         else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
00143         else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
00144         else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
00145         else if (!strncmp("curren", token, 6)) *to++ = '¤';
00146         else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
00147         else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
00148         else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
00149         else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
00150         else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
00151         else if (!strncmp("yen", token, 3)) *to++ = '¥';
00152         else if (!strncmp("not", token, 3)) *to++ = '¬';
00153         else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
00154         else if (!strncmp("uml", token, 3)) *to++ = '¨';
00155         else if (!strncmp("shy", token, 3)) *to++ = '';
00156         else if (!strncmp("macr", token, 4)) *to++ = '¯';
00157         continue;
00158         
00159       }
00160       else if (*from == '>' && !ampersand)
00161         {
00162           intoken = false;
00163           // process desired tokens
00164           if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) {
00165                 *to++ = '<';
00166                 for (i = 28; token[i] != '\"'; i++)
00167                         *to++ = token[i];
00168                 *to++ = '>';
00169             continue;
00170           }
00171           else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) {
00172                 *to++ = '<';
00173                 for (i = 28; token[i] != '\"'; i++)
00174                         *to++ = token[i];
00175                 *to++ = '>';
00176             continue;
00177           }
00178           else if (!strncmp(token, "scripRef", 8)) {
00179             *to++ = '#';
00180             continue;
00181           }
00182           else if (!strncmp(token, "/scripRef", 9)) {
00183             *to++ = ' ';
00184             continue;
00185           }
00186           else if (!strncmp(token, "note ", 5)) {
00187             *to++ = '{';
00188             continue;
00189           }
00190           else if (!strncmp(token, "/note", 5)) {
00191             *to++ = '}';
00192             continue;
00193           }
00194           else if (!strnicmp(token, "font", 4)) {
00195             *to++ = '\\';
00196             *to++ = '\\';
00197             continue;
00198           }
00199           else if (!strnicmp(token, "/font", 5)) {
00200             *to++ = '\\';
00201             *to++ = '\\';
00202             continue;       
00203           }
00204           else switch(*token) {
00205                   case 'I':                     // font tags
00206                   case 'i':
00207                     *to++ = '\\';
00208                     *to++ = '@';
00209                     continue;
00210                   case 'B':             // bold start
00211                   case 'b':
00212                     *to++ = '\\';
00213                     *to++ = '$';
00214                     continue;
00215                   case '/':
00216                     switch(token[1]) {
00217                     case 'I':
00218                     case 'i':           // italic end
00219                       *to++ = '\\';
00220                       *to++ = '@';
00221                       continue;
00222                     case 'B':           // bold start
00223                     case 'b':
00224                       *to++ = '\\';
00225                       *to++ = '$';
00226                       continue;
00227                     }
00228                   }
00229           continue;
00230         }
00231         if (intoken) {
00232                 if (tokpos < 2047)
00233                         token[tokpos++] = *from;
00234      }
00235         else    *to++ = *from;
00236   }
00237   *to++ = 0;
00238   *to = 0;
00239   return 0;
00240 }
00241 
00242 
00243