*** empty log message ***

author: danglassey <danglassey> 2002-08-14 09:57:17 +0000
committer: danglassey <danglassey> 2002-08-14 09:57:17 +0000
commit: c9458897ebbb739d8db83c80e06512d8a612f743 (patch)
tree: f8c5381045887e34388cc6b26cfccc254bf766dc /doc/api-documentation/html/hebrewmcim_8cpp-source.html
download: sword-sf-cvs-c9458897ebbb739d8db83c80e06512d8a612f743.tar.gz
1 files changed, 658 insertions, 0 deletions
diff --git a/doc/api-documentation/html/hebrewmcim_8cpp-source.html b/doc/api-documentation/html/hebrewmcim_8cpp-source.html
new file mode 100644
index 0000000..6ec69fa
--- /dev/null
+++ b/doc/api-documentation/html/hebrewmcim_8cpp-source.html
@@ -0,0 +1,658 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
+<title>hebrewmcim.cpp Source File</title>
+<link href="doxygen.css" rel="stylesheet" type="text/css">
+</head><body>
+<!-- Generated by Doxygen 1.2.15 -->
+<center>
+<a class="qindex" href="index.html">Main Page</a> &nbsp; <a class="qindex" href="namespaces.html">Namespace List</a> &nbsp; <a class="qindex" href="hierarchy.html">Class Hierarchy</a> &nbsp; <a class="qindex" href="classes.html">Alphabetical List</a> &nbsp; <a class="qindex" href="annotated.html">Compound List</a> &nbsp; <a class="qindex" href="files.html">File List</a> &nbsp; <a class="qindex" href="functions.html">Compound Members</a> &nbsp; </center>
+<hr><h1>hebrewmcim.cpp</h1><div class="fragment"><pre>00001 
+00011 <font class="preprocessor">#include &lt;hebrewmcim.h&gt;</font>
+00012 
+00013 HebrewMCIM::HebrewMCIM()
+00014                 :<a class="code" href="class_s_w_input_method.html">SWInputMethod</a>() {
+00015 
+00016    init();
+00017 }
+00018 
+00019 
+00020 <font class="keywordtype">int</font> *HebrewMCIM::translate(<font class="keywordtype">char</font> in) {
+00021         <font class="keywordtype">int</font> retVal = 0;
+00022         <font class="keyword">static</font> <font class="keywordtype">int</font> retString[5];
+00023         <font class="keywordtype">int</font> retStringIndex = 0;
+00024 
+00025         memset(retString, 0, 5);
+00026 
+00027         <font class="keywordflow">if</font> (getState() &gt; 1) {
+00028                 <font class="keywordflow">if</font> (getState() &gt;= 12) { <font class="comment">// serious issue with internal structure</font>
+00029                         setState(0);
+00030                         retString[retStringIndex++] = in;
+00031                         <font class="keywordflow">return</font> retString;
+00032                 }
+00033                 map&lt;int, int&gt;::iterator find = subst2[getState()].find(in);
+00034                 <font class="keywordflow">if</font> (find != subst2[getState()].end())
+00035                         retVal = find-&gt;second;
+00036                 <font class="keywordflow">else</font> retVal = in;
+00037 
+00038                 setState(0);
+00039                 retString[retStringIndex++] = retVal;
+00040                 <font class="keywordflow">return</font> retString;
+00041         }
+00042         <font class="keywordflow">else</font> {
+00043                 retVal = subst[in];
+00044 
+00045                 <font class="keywordflow">if</font> (retVal == 0) {
+00046                         setState(0);
+00047                         retString[retStringIndex++] = in;
+00048                         <font class="keywordflow">return</font> retString;
+00049                 }
+00050                 <font class="keywordflow">if</font> (retVal &gt; 100) {
+00051                         setState(1);
+00052                         retString[retStringIndex++] = retVal;
+00053                         <font class="keywordflow">return</font> retString;
+00054                 }
+00055                 <font class="keywordflow">if</font> (retVal == 50) {  <font class="comment">// multiChar</font>
+00056                         setState(1);
+00057                         <font class="keywordtype">int</font> *chars = multiChars[in];
+00058                         <font class="keywordflow">if</font> (chars != 0) {
+00059                                 retString[retStringIndex++] = chars[0];
+00060                                 retString[retStringIndex++] = chars[1];
+00061                                 <font class="keywordflow">return</font> retString;
+00062                         }
+00063                 }
+00064         }
+00065         setState(retVal);
+00066         <font class="keywordflow">return</font> 0;
+00067 }
+00068 
+00069 
+00070 <font class="keywordtype">void</font> HebrewMCIM::init() {
+00071         memset(subst, 0, 255);
+00072 
+00073         subst[<font class="charliteral">')'</font>] = 1488;
+00074         subst[<font class="charliteral">'B'</font>] = 1489;
+00075         subst[<font class="charliteral">'G'</font>] = 1490;
+00076         subst[<font class="charliteral">'D'</font>] = 1491;
+00077         subst[<font class="charliteral">'H'</font>] = 1492;
+00078         subst[<font class="charliteral">'W'</font>] = 1493;
+00079         subst[<font class="charliteral">'Z'</font>] = 1494;
+00080         subst[<font class="charliteral">'X'</font>] = 1495;
+00081         subst[<font class="charliteral">'+'</font>] = 1496;
+00082         subst[<font class="charliteral">'Y'</font>] = 1497;
+00083 
+00084         subst[<font class="charliteral">'k'</font>] = 1498;  <font class="comment">// finals</font>
+00085         subst[<font class="charliteral">'m'</font>] = 1501;
+00086         subst[<font class="charliteral">'n'</font>] = 1503;
+00087         subst[<font class="charliteral">'c'</font>] = 1509;
+00088 
+00089         subst[<font class="charliteral">'P'</font>] = 1508;
+00090         subst[<font class="charliteral">'K'</font>] = 1499;
+00091         subst[<font class="charliteral">'L'</font>] = 1500;
+00092         subst[<font class="charliteral">'M'</font>] = 1502;
+00093         subst[<font class="charliteral">'N'</font>] = 1504;
+00094         subst[<font class="charliteral">'S'</font>] = 1505;
+00095         subst[<font class="charliteral">'('</font>] = 1506;
+00096         subst[<font class="charliteral">'p'</font>] = 1507;
+00097         subst[<font class="charliteral">'C'</font>] = 1510;
+00098         subst[<font class="charliteral">'Q'</font>] = 1511;
+00099         subst[<font class="charliteral">'R'</font>] = 1512;
+00100         subst[<font class="charliteral">'#'</font>] = 1513;
+00101 
+00102         <font class="comment">// special multiChars</font>
+00103         subst[<font class="charliteral">'&amp;'</font>] = 50;
+00104         subst[<font class="charliteral">'$'</font>] = 50;
+00105 
+00106         <font class="keyword">static</font> <font class="keywordtype">int</font> x[] = {1513, 1474};
+00107         multiChars[<font class="charliteral">'&amp;'</font>] = x;
+00108         <font class="keyword">static</font> <font class="keywordtype">int</font> y[] = {1513, 1473};
+00109         multiChars[<font class="charliteral">'$'</font>] = y;
+00110 
+00111         subst[<font class="charliteral">'T'</font>] = 1514;
+00112 
+00113         <font class="comment">// VOWELS</font>
+00114         subst[<font class="charliteral">'A'</font>] = 1463;
+00115         subst[<font class="charliteral">'F'</font>] = 1464;
+00116         subst[<font class="charliteral">'E'</font>] = 1462;
+00117         subst[<font class="charliteral">'"'</font>] = 1461;
+00118         subst[<font class="charliteral">'I'</font>] = 1460;
+00119         subst[<font class="charliteral">'O'</font>] = 1465;
+00120         subst[<font class="charliteral">'U'</font>] = 1467;
+00121 
+00122 
+00123 
+00124         <font class="comment">// OTHER DIACRITICS</font>
+00125         subst[<font class="charliteral">'.'</font>] = 1468;
+00126         subst[<font class="charliteral">'-'</font>] = 1470;
+00127         subst[<font class="charliteral">','</font>] = 1471;
+00128 
+00129         <font class="comment">// Compound input</font>
+00130 
+00131         <font class="comment">// CANTILLATION</font>
+00132 
+00133         subst[<font class="charliteral">':'</font>] = 2;
+00134         subst2[2][<font class="charliteral">'A'</font>] = 1458;
+00135         subst2[2][<font class="charliteral">'E'</font>] = 1457;
+00136         subst2[2][<font class="charliteral">'F'</font>] = 1459;
+00137 
+00138 
+00139         <font class="comment">/* Telisha qetana is postpositive as in '04' above. However, Michigan</font>
+00140 <font class="comment"># code '24' is for a medial telisha. Graphically, there is no</font>
+00141 <font class="comment"># difference.</font>
+00142 <font class="comment">        */</font>
+00143         subst[<font class="charliteral">'2'</font>] = 5;
+00144         subst2[5][<font class="charliteral">'4'</font>] = 1449;
+00145 
+00146 
+00147         <font class="comment">/* Note Michigan encoding distinguishes between medial metheg '35' (occuring</font>
+00148 <font class="comment"># on the left of the vowel), and the ordinary meteg '95' (occuring on the</font>
+00149 <font class="comment"># right of the vowel). It is also used for silluq.</font>
+00150 <font class="comment">        */</font>
+00151         subst[<font class="charliteral">'3'</font>] = 6;
+00152         subst2[6][<font class="charliteral">'3'</font>] = 1433;
+00153         subst2[6][<font class="charliteral">'5'</font>] = 1469;
+00154 
+00155 
+00156         <font class="comment">/* The Michigan code of telisha gedola in medial position. Graphically,</font>
+00157 <font class="comment"># there is no difference.</font>
+00158 <font class="comment">        */</font>
+00159         subst[<font class="charliteral">'4'</font>] = 7;
+00160         subst2[7][<font class="charliteral">'4'</font>] = 1440;
+00161 
+00162         subst[<font class="charliteral">'6'</font>] = 8;
+00163         subst2[8][<font class="charliteral">'0'</font>] = 1451;
+00164         subst2[8][<font class="charliteral">'1'</font>] = 1436;
+00165 
+00166         subst[<font class="charliteral">'1'</font>] = 4;
+00167         subst2[4][<font class="charliteral">'0'</font>] = 1434;
+00168 
+00169         <font class="comment">/* In the poetic books, prepositive dehi occurs; it's unclear whether</font>
+00170 <font class="comment"># tipeha also occurs in the poetic books. Otherwise, we could simply</font>
+00171 <font class="comment"># check for what book in the Tanach we are in. Michigan uses the same</font>
+00172 <font class="comment"># code for each.</font>
+00173 <font class="comment">        */</font>
+00174 
+00175         subst2[4][<font class="charliteral">'3'</font>] = 1430;
+00176 
+00177         <font class="comment">/* This is the poetic accent mugrash, which also includes rebia, but is</font>
+00178 <font class="comment"># encoded separately as '81' in the Michigan text.</font>
+00179 <font class="comment">        */</font>
+00180         subst2[4][<font class="charliteral">'1'</font>] = 1437;
+00181         subst2[4][<font class="charliteral">'4'</font>] = 1440;
+00182 
+00183 
+00184         subst[<font class="charliteral">'0'</font>] = 3;
+00185         subst2[3][<font class="charliteral">'0'</font>] = 1475;
+00186         subst2[3][<font class="charliteral">'1'</font>] = 1426;
+00187 
+00188         <font class="comment">/* According to BHS, zarqa and sinnor are both postpositive. However,</font>
+00189 <font class="comment"># the Michigan encoding uses one code for both. The Unicode zarqa</font>
+00190 <font class="comment"># (0x0598) is definitely NOT postpositive. And further, the shape of</font>
+00191 <font class="comment"># the symbol is different in BHS and Uniocde. This needs further</font>
+00192 <font class="comment"># research to determine what's going on here. For now, we follow BHS</font>
+00193 <font class="comment"># and use the postpositive Unicode zinor or both accents.</font>
+00194 <font class="comment">        */</font>
+00195 
+00196         subst2[3][<font class="charliteral">'2'</font>] = 1454;
+00197 
+00198         <font class="comment">/* Pashta is postpositive, and the Unicode equivalent reflects</font>
+00199 <font class="comment"># this. However, there is a poetic equivalent -- azla legarmeh --</font>
+00200 <font class="comment"># which is not postpositive, but no equivalent code point exists in</font>
+00201 <font class="comment"># Unicode. The Michigan encoding does not distinguish between the two,</font>
+00202 <font class="comment"># although it could be algorithmically determined.</font>
+00203 <font class="comment">        */</font>
+00204 
+00205         subst2[3][<font class="charliteral">'3'</font>] = 1433;
+00206         subst2[3][<font class="charliteral">'4'</font>] = 1449;
+00207         subst2[3][<font class="charliteral">'5'</font>] = 1472;
+00208 
+00209 
+00210         <font class="comment">/* This is the Unicode Hebrew *accent*; there is also another Hebrew</font>
+00211 <font class="comment"># *punctuation* called GERSHAYIM 0x05F4. I'm using the more</font>
+00212 <font class="comment"># traditional rounded marks, rather than the alternate straight</font>
+00213 <font class="comment"># marks.</font>
+00214 <font class="comment">        */</font>
+00215 
+00216         subst2[8][<font class="charliteral">'2'</font>] = 1438;
+00217 
+00218         <font class="comment">// Also known as azla</font>
+00219         subst2[8][<font class="charliteral">'3'</font>] = 1448;
+00220         subst2[8][<font class="charliteral">'4'</font>] = 1452;
+00221         subst2[8][<font class="charliteral">'5'</font>] = 1427;
+00222 
+00223 
+00224         subst[<font class="charliteral">'8'</font>] = 9;
+00225         subst2[9][<font class="charliteral">'0'</font>] = 1428;
+00226         subst2[9][<font class="charliteral">'1'</font>] = 1431;
+00227 
+00228         <font class="comment">/* Note, this accent is actually sinnorit, but it does not exist as a</font>
+00229 <font class="comment"># separate glyph in the Unicode standard. The 'ZINOR' Unicode accent</font>
+00230 <font class="comment"># is postpositive, while sinnorit is not. ZARQA is as close as I can</font>
+00231 <font class="comment"># get to this.</font>
+00232 <font class="comment">        */</font>
+00233         subst2[9][<font class="charliteral">'2'</font>] = 1432;
+00234 
+00235         <font class="comment">/* The Unicode form does not match the form used by BHS, but the names</font>
+00236 <font class="comment"># are the same.</font>
+00237 <font class="comment">        */</font>
+00238         subst2[9][<font class="charliteral">'3'</font>] = 1441;
+00239         subst2[9][<font class="charliteral">'4'</font>] = 1439;
+00240         subst2[9][<font class="charliteral">'5'</font>] = 1429;
+00241 
+00242         subst[<font class="charliteral">'7'</font>] = 10;
+00243         subst2[10][<font class="charliteral">'0'</font>] = 1444;
+00244         subst2[10][<font class="charliteral">'1'</font>] = 1445;
+00245         subst2[10][<font class="charliteral">'2'</font>] = 1446;
+00246         subst2[10][<font class="charliteral">'3'</font>] = 1430;  <font class="comment">// also '13', '73' also is used for majela</font>
+00247         subst2[10][<font class="charliteral">'4'</font>] = 1443;
+00248         subst2[10][<font class="charliteral">'5'</font>] = 1469;  <font class="comment">// this is silluq; should appear to the left of the vowel</font>
+00249 
+00250         subst[<font class="charliteral">'9'</font>] = 11;
+00251         subst2[11][<font class="charliteral">'1'</font>] = 1435;
+00252         subst2[11][<font class="charliteral">'2'</font>] = 1425;
+00253         subst2[11][<font class="charliteral">'3'</font>] = 1450;
+00254         subst2[11][<font class="charliteral">'4'</font>] = 1447;
+00255         subst2[11][<font class="charliteral">'5'</font>] = 1469;  <font class="comment">// should appear to the right of the vowel</font>
+00256 
+00257 }
+00258 
+00259         <font class="comment">/*</font>
+00260 <font class="comment"></font>
+00261 <font class="comment"></font>
+00262 <font class="comment"># CANTILLION MARKS</font>
+00263 <font class="comment"></font>
+00264 <font class="comment">        my  $ETNAHTA =           '&amp;#1425;';</font>
+00265 <font class="comment"># officially the Unicode name for this symbol was "SEGOL." However, that is</font>
+00266 <font class="comment"># not a unique name, conflicting with the vowel of the same name. Further,</font>
+00267 <font class="comment"># the position of the symbol is different. I have changed the name of the</font>
+00268 <font class="comment"># accent to "SEGOLTA," the traditional name for this accent.</font>
+00269 <font class="comment">        my  $SEGOLTA =           '&amp;#1426;';</font>
+00270 <font class="comment">        my  $SHALSHELET =        '&amp;#1427;';</font>
+00271 <font class="comment">        my  $ZAQEF_QATAN =       '&amp;#1428;';</font>
+00272 <font class="comment">        my  $ZAQEF_GADOL =       '&amp;#1429;';</font>
+00273 <font class="comment">        my  $TIPEHA =            '&amp;#1430;';</font>
+00274 <font class="comment">        my  $REVIA =             '&amp;#1431;';</font>
+00275 <font class="comment">        my  $ZARQA =             '&amp;#1432;';</font>
+00276 <font class="comment">        my  $PASHTA =            '&amp;#1433;';</font>
+00277 <font class="comment">        my  $YETIV =             '&amp;#1434;';</font>
+00278 <font class="comment">        my  $TEVIR =             '&amp;#1435;';</font>
+00279 <font class="comment">        my  $GERESH =            '&amp;#1436;';</font>
+00280 <font class="comment">        my  $GERESH_MUQDAM =     '&amp;#1437;';</font>
+00281 <font class="comment">        my  $GERSHAYIM =         '&amp;#1438;';</font>
+00282 <font class="comment">        my  $QARNEY_PARA =       '&amp;#1439;';</font>
+00283 <font class="comment">        my  $TELISHA_GEDOLA =    '&amp;#1440;';</font>
+00284 <font class="comment">        my  $PAZER =             '&amp;#1441;';</font>
+00285 <font class="comment">        my  $MUNAH =             '&amp;#1443;';</font>
+00286 <font class="comment">        my  $MAHAPAKH =          '&amp;#1444;';</font>
+00287 <font class="comment">        my  $MERKHA =            '&amp;#1445;';</font>
+00288 <font class="comment">        my  $MERKHA_KEFULA =     '&amp;#1446;';</font>
+00289 <font class="comment">        my  $DARGA =             '&amp;#1447;';</font>
+00290 <font class="comment">        my  $QADMA =             '&amp;#1448;';</font>
+00291 <font class="comment">        my  $TELISHA_QETANA =    '&amp;#1449;';</font>
+00292 <font class="comment">        my  $YERAH_BEN_YOMO =    '&amp;#1450;';</font>
+00293 <font class="comment">        my  $OLE =               '&amp;#1451;';</font>
+00294 <font class="comment">        my  $ILUY =              '&amp;#1452;';</font>
+00295 <font class="comment">        my  $DEHI =              '&amp;#1453;';</font>
+00296 <font class="comment">        my  $ZINOR =             '&amp;#1454;';</font>
+00297 <font class="comment"># HEBREW MARK</font>
+00298 <font class="comment">        my  $MASORA_CIRCLE =     '&amp;#1455;';</font>
+00299 <font class="comment"># HEBREW EXTENDED-A  points and punctuation</font>
+00300 <font class="comment">        my  $SHEVA =             '&amp;#1456;';</font>
+00301 <font class="comment">        my  $HATAF_SEGOL =       '&amp;#1457;';</font>
+00302 <font class="comment">        my  $HATAF_PATAH =       '&amp;#1458;';</font>
+00303 <font class="comment">        my  $HATAF_QAMATS =      '&amp;#1459;';</font>
+00304 <font class="comment">        my  $HIRIQ =             '&amp;#1460;';</font>
+00305 <font class="comment">        my  $TSERE =             '&amp;#1461;';</font>
+00306 <font class="comment">        my  $SEGOL =             '&amp;#1462;';</font>
+00307 <font class="comment"># furtive Patah is not a distinct character</font>
+00308 <font class="comment">        my  $PATAH =             '&amp;#1463;';</font>
+00309 <font class="comment">        my  $QAMATS =            '&amp;#1464;';</font>
+00310 <font class="comment">        my  $HOLAM =             '&amp;#1465;';</font>
+00311 <font class="comment">        my  $QUBUTS =            '&amp;#1467;';</font>
+00312 <font class="comment"># also used as shuruq</font>
+00313 <font class="comment"># falls within the base letter</font>
+00314 <font class="comment">        my  $DAGESH_OR_MAPIQ =   '&amp;#1468;';</font>
+00315 <font class="comment"># also used as siluq</font>
+00316 <font class="comment">        my  $METAG =             '&amp;#1469;';</font>
+00317 <font class="comment">        my  $MAQAF =             '&amp;#1470;';</font>
+00318 <font class="comment">        my  $RAFE =              '&amp;#1471;';</font>
+00319 <font class="comment"># Also used for legarmeh</font>
+00320 <font class="comment">#   may be treated as spacing punctuation, not as a point</font>
+00321 <font class="comment">        my  $PASEQ =             '&amp;#1472;';</font>
+00322 <font class="comment">        my  $SHIN_DOT =          '&amp;#1473;';</font>
+00323 <font class="comment">        my  $SIN_DOT =           '&amp;#1474;';</font>
+00324 <font class="comment">        my  $SOF_PASUQ =         '&amp;#1475;';</font>
+00325 <font class="comment"># HEBREW MARK</font>
+00326 <font class="comment">        my  $UPPER_DOT =         '&amp;#1476;';</font>
+00327 <font class="comment"># HEBREW LETTERS based on ISO 8859-8</font>
+00328 <font class="comment"># aleph</font>
+00329 <font class="comment">#  x (alef symbol - 2135)</font>
+00330 <font class="comment">        my  $ALEF =              '&amp;#1488;';</font>
+00331 <font class="comment">#  x (bet symbol - 2136)</font>
+00332 <font class="comment">        my  $BET =               '&amp;#1489;';</font>
+00333 <font class="comment">#  x (gimel symbol - 2137)</font>
+00334 <font class="comment">        my  $GIMEL =             '&amp;#1490;';</font>
+00335 <font class="comment">#  x (dalet symbol - 2138)</font>
+00336 <font class="comment">        my  $DALET =             '&amp;#1491;';</font>
+00337 <font class="comment">        my  $HE =                '&amp;#1492;';</font>
+00338 <font class="comment">        my  $VAV =               '&amp;#1493;';</font>
+00339 <font class="comment">        my  $ZAYIN =             '&amp;#1494;';</font>
+00340 <font class="comment">        my  $HET =               '&amp;#1495;';</font>
+00341 <font class="comment">        my  $TET =               '&amp;#1496;';</font>
+00342 <font class="comment">        my  $YOD =               '&amp;#1497;';</font>
+00343 <font class="comment">        my  $FINAL_KAF =         '&amp;#1498;';</font>
+00344 <font class="comment">        my  $KAF =               '&amp;#1499;';</font>
+00345 <font class="comment">        my  $LAMED =             '&amp;#1500;';</font>
+00346 <font class="comment">        my  $FINAL_MEM =         '&amp;#1501;';</font>
+00347 <font class="comment">        my  $MEM =               '&amp;#1502;';</font>
+00348 <font class="comment">        my  $FINAL_NUN =         '&amp;#1503;';</font>
+00349 <font class="comment">        my  $NUN =               '&amp;#1504;';</font>
+00350 <font class="comment">        my  $SAMEKH =            '&amp;#1505;';</font>
+00351 <font class="comment">        my  $AYIN =              '&amp;#1506;';</font>
+00352 <font class="comment">        my  $FINAL_PE =          '&amp;#1507;';</font>
+00353 <font class="comment">        my  $PE =                '&amp;#1508;';</font>
+00354 <font class="comment">        my  $FINAL_TSADI =       '&amp;#1509;';</font>
+00355 <font class="comment"># also known as zade</font>
+00356 <font class="comment">        my  $TSADI =             '&amp;#1510;';</font>
+00357 <font class="comment">        my  $QOF =               '&amp;#1511;';</font>
+00358 <font class="comment">        my  $RESH =              '&amp;#1512;';</font>
+00359 <font class="comment">        my  $SHIN =              '&amp;#1513;';</font>
+00360 <font class="comment">        my  $TAV =               '&amp;#1514;';</font>
+00361 <font class="comment"># Yiddish digraphs</font>
+00362 <font class="comment">#   Hebrew Ligature</font>
+00363 <font class="comment"># tsvey vovn</font>
+00364 <font class="comment">        my  $DOUBLE_VAV =        '&amp;#1520;';</font>
+00365 <font class="comment">        my  $VAV_YOD =           '&amp;#1521;';</font>
+00366 <font class="comment"># tsvey yudn</font>
+00367 <font class="comment">        my  $DOUBLE_YOD =        '&amp;#1522;';</font>
+00368 <font class="comment"></font>
+00369 <font class="comment"># Additional punctuation</font>
+00370 <font class="comment">        my  $PUNCT_GERESH =      '&amp;#1523;';</font>
+00371 <font class="comment">        my  $PUNCT_GERSHAYIM =   '&amp;#1524;';</font>
+00372 <font class="comment"># Reserved: 0x05F5"</font>
+00373 <font class="comment"># x (hebrew point judeo-spanish varika - FB1E)</font>
+00374 <font class="comment">#my  $JUDEO_SPANISH_VARIKA = pack("U",0xFB1E); # UTF-8 OxFB1E</font>
+00375 <font class="comment"></font>
+00376 <font class="comment">#############################</font>
+00377 <font class="comment"># End of Unicode 2.0 Hebrew #</font>
+00378 <font class="comment">#############################</font>
+00379 <font class="comment"></font>
+00380 <font class="comment"># A hash whose key is a Michagan code, and whose value is a Unicode</font>
+00381 <font class="comment"># equvalent</font>
+00382 <font class="comment"></font>
+00383 <font class="comment">        char subst[] = new char [255];</font>
+00384 <font class="comment">        subst[')'] = 1488;</font>
+00385 <font class="comment">        'B'  =&gt; $BET,</font>
+00386 <font class="comment">        'G'  =&gt; $GIMEL,</font>
+00387 <font class="comment">        'D'  =&gt; $DALET,</font>
+00388 <font class="comment">        'H'  =&gt; $HE,</font>
+00389 <font class="comment">        'W'  =&gt; $VAV,</font>
+00390 <font class="comment">        'Z'  =&gt; $ZAYIN,</font>
+00391 <font class="comment">        'X'  =&gt; $HET,</font>
+00392 <font class="comment">        '+'  =&gt; $TET,</font>
+00393 <font class="comment">        'Y'  =&gt; $YOD,</font>
+00394 <font class="comment">        'K'  =&gt; $KAF,</font>
+00395 <font class="comment">        'L'  =&gt; $LAMED,</font>
+00396 <font class="comment">        'M'  =&gt; $MEM,</font>
+00397 <font class="comment">        'N'  =&gt; $NUN,</font>
+00398 <font class="comment">        'S'  =&gt; $SAMEKH,</font>
+00399 <font class="comment">        '('  =&gt; $AYIN,</font>
+00400 <font class="comment">        'P'  =&gt; $PE,</font>
+00401 <font class="comment">        'C'  =&gt; $TSADI,</font>
+00402 <font class="comment">        'Q'  =&gt; $QOF,</font>
+00403 <font class="comment">        'R'  =&gt; $RESH,</font>
+00404 <font class="comment">        '#'  =&gt; $SHIN, # the letter shin without a point</font>
+00405 <font class="comment">        '&amp;'  =&gt; ($SHIN . $SIN_DOT),</font>
+00406 <font class="comment">        '$'  =&gt; ($SHIN . $SHIN_DOT), # '</font>
+00407 <font class="comment">        'T'  =&gt; $TAV,</font>
+00408 <font class="comment"># VOWELS</font>
+00409 <font class="comment">        'A'  =&gt; $PATAH,</font>
+00410 <font class="comment">        'F'  =&gt; $QAMATS,</font>
+00411 <font class="comment">        'E'  =&gt; $SEGOL,</font>
+00412 <font class="comment">        '"'  =&gt; $TSERE,</font>
+00413 <font class="comment">        'I'  =&gt; $HIRIQ,</font>
+00414 <font class="comment">        'O'  =&gt; $HOLAM,</font>
+00415 <font class="comment">        'U'  =&gt; $QUBUTS,</font>
+00416 <font class="comment">        ':'  =&gt; $SHEVA,</font>
+00417 <font class="comment">        ':A' =&gt; $HATAF_PATAH,</font>
+00418 <font class="comment">        ':E' =&gt; $HATAF_SEGOL,</font>
+00419 <font class="comment">        ':F' =&gt; $HATAF_QAMATS,</font>
+00420 <font class="comment"># OTHER DIACRITICS</font>
+00421 <font class="comment">        '.'  =&gt; $DAGESH_OR_MAPIQ,</font>
+00422 <font class="comment">        '-'  =&gt; $MAQAF,</font>
+00423 <font class="comment">        ','  =&gt; $RAFE,</font>
+00424 <font class="comment"># CANTILLATION</font>
+00425 <font class="comment">        '00' =&gt; $SOF_PASUQ,</font>
+00426 <font class="comment">        '01' =&gt; $SEGOLTA,</font>
+00427 <font class="comment"># According to BHS, zarqa and sinnor are both postpositive. However,</font>
+00428 <font class="comment"># the Michigan encoding uses one code for both. The Unicode zarqa</font>
+00429 <font class="comment"># (0x0598) is definitely NOT postpositive. And further, the shape of</font>
+00430 <font class="comment"># the symbol is different in BHS and Uniocde. This needs further</font>
+00431 <font class="comment"># research to determine what's going on here. For now, we follow BHS</font>
+00432 <font class="comment"># and use the postpositive Unicode zinor or both accents.</font>
+00433 <font class="comment">        '02' =&gt; $ZINOR,</font>
+00434 <font class="comment"># Pashta is postpositive, and the Unicode equivalent reflects</font>
+00435 <font class="comment"># this. However, there is a poetic equivalent -- azla legarmeh --</font>
+00436 <font class="comment"># which is not postpositive, but no equivalent code point exists in</font>
+00437 <font class="comment"># Unicode. The Michigan encoding does not distinguish between the two,</font>
+00438 <font class="comment"># although it could be algorithmically determined.</font>
+00439 <font class="comment">        '03' =&gt; $PASHTA,</font>
+00440 <font class="comment">        '04' =&gt; $TELISHA_QETANA,</font>
+00441 <font class="comment">        '05' =&gt; $PASEQ,</font>
+00442 <font class="comment">        '10' =&gt; $YETIV,</font>
+00443 <font class="comment"># In the poetic books, prepositive dehi occurs; it's unclear whether</font>
+00444 <font class="comment"># tipeha also occurs in the poetic books. Otherwise, we could simply</font>
+00445 <font class="comment"># check for what book in the Tanach we are in. Michigan uses the same</font>
+00446 <font class="comment"># code for each.</font>
+00447 <font class="comment">        '13' =&gt; $TIPEHA, # also $DEHI</font>
+00448 <font class="comment"># This is the poetic accent mugrash, which also includes rebia, but is</font>
+00449 <font class="comment"># encoded separately as '81' in the Michigan text.</font>
+00450 <font class="comment">        '11' =&gt; $GERESH_MUQDAM,</font>
+00451 <font class="comment">        '14' =&gt; $TELISHA_GEDOLA,</font>
+00452 <font class="comment"># Telisha qetana is postpositive as in '04' above. However, Michigan</font>
+00453 <font class="comment"># code '24' is for a medial telisha. Graphically, there is no</font>
+00454 <font class="comment"># difference.</font>
+00455 <font class="comment">        '24' =&gt; $TELISHA_QETANA,</font>
+00456 <font class="comment">        '33' =&gt; $PASHTA,</font>
+00457 <font class="comment"># The Michigan code of telisha gedola in medial position. Graphically,</font>
+00458 <font class="comment"># there is no difference.</font>
+00459 <font class="comment">        '44' =&gt; $TELISHA_GEDOLA,</font>
+00460 <font class="comment">        '60' =&gt; $OLE,</font>
+00461 <font class="comment">        '61' =&gt; $GERESH,</font>
+00462 <font class="comment"># This is the Unicode Hebrew *accent*; there is also another Hebrew</font>
+00463 <font class="comment"># *punctuation* called GERSHAYIM 0x05F4. I'm using the more</font>
+00464 <font class="comment"># traditional rounded marks, rather than the alternate straight</font>
+00465 <font class="comment"># marks.</font>
+00466 <font class="comment">        '62' =&gt; $GERSHAYIM,</font>
+00467 <font class="comment"># Also known as azla</font>
+00468 <font class="comment">        '63' =&gt; $QADMA,</font>
+00469 <font class="comment">        '64' =&gt; $ILUY,</font>
+00470 <font class="comment">        '65' =&gt; $SHALSHELET,</font>
+00471 <font class="comment">        '80' =&gt; $ZAQEF_QATAN,</font>
+00472 <font class="comment">        '81' =&gt; $REVIA,</font>
+00473 <font class="comment"># Note, this accent is actually sinnorit, but it does not exist as a</font>
+00474 <font class="comment"># separate glyph in the Unicode standard. The 'ZINOR' Unicode accent</font>
+00475 <font class="comment"># is postpositive, while sinnorit is not. ZARQA is as close as I can</font>
+00476 <font class="comment"># get to this.</font>
+00477 <font class="comment">        '82' =&gt; $ZARQA,</font>
+00478 <font class="comment"># The Unicode form does not match the form used by BHS, but the names</font>
+00479 <font class="comment"># are the same.</font>
+00480 <font class="comment">        '83' =&gt; $PAZER,</font>
+00481 <font class="comment">        '84' =&gt; $QARNEY_PARA,</font>
+00482 <font class="comment">        '85' =&gt; $ZAQEF_GADOL,</font>
+00483 <font class="comment"># Note Michigan encoding distinguishes between medial metheg '35' (occuring</font>
+00484 <font class="comment"># on the left of the vowel), and the ordinary meteg '95' (occuring on the</font>
+00485 <font class="comment"># right of the vowel). It is also used for silluq.</font>
+00486 <font class="comment">        '35' =&gt; $METAG,</font>
+00487 <font class="comment">        '70' =&gt; $MAHAPAKH,</font>
+00488 <font class="comment">        '71' =&gt; $MERKHA,</font>
+00489 <font class="comment">        '72' =&gt; $MERKHA_KEFULA,</font>
+00490 <font class="comment">        '73' =&gt; $TIPEHA, # also '13', '73' also is used for majela</font>
+00491 <font class="comment">        '74' =&gt; $MUNAH,</font>
+00492 <font class="comment">        '75' =&gt; $METAG, # this is silluq; should appear to the left of the vowel</font>
+00493 <font class="comment">        '91' =&gt; $TEVIR,</font>
+00494 <font class="comment">        '92' =&gt; $ETNAHTA,</font>
+00495 <font class="comment">        '93' =&gt; $YERAH_BEN_YOMO,</font>
+00496 <font class="comment">        '94' =&gt; $DARGA,</font>
+00497 <font class="comment">        '95' =&gt; $METAG, # should appear to the right of the vowel</font>
+00498 <font class="comment"></font>
+00499 <font class="comment"># Not used by the Michigan Encoding</font>
+00500 <font class="comment"># $UPPER_DOT = '05C4';</font>
+00501 <font class="comment">        );</font>
+00502 <font class="comment"></font>
+00503 <font class="comment"># declare other variables</font>
+00504 <font class="comment">        my (@bhsLines,</font>
+00505 <font class="comment">        @bhsVerse,</font>
+00506 <font class="comment">        @entity_line) = ();</font>
+00507 <font class="comment"></font>
+00508 <font class="comment">        my ($i,</font>
+00509 <font class="comment">        $verse,</font>
+00510 <font class="comment">        $word,</font>
+00511 <font class="comment">        $character) = 0;</font>
+00512 <font class="comment"></font>
+00513 <font class="comment">        my ($element,</font>
+00514 <font class="comment">        $saveGuttural) = "";</font>
+00515 <font class="comment"></font>
+00516 <font class="comment"># read in a line</font>
+00517 <font class="comment">        while (&lt;&gt;) {</font>
+00518 <font class="comment"># Process one verse</font>
+00519 <font class="comment"># iterate over every character and change to XML decimal entity</font>
+00520 <font class="comment">        CHAR: for ( $i = 0; ($i &lt; scalar(@bhsVerse)); $i++) {</font>
+00521 <font class="comment">         # find and convert final kaf, mem, nun, pe, tsade</font>
+00522 <font class="comment">         ( # if final form</font>
+00523 <font class="comment">          $bhsVerse[$i] =~ /[KMNPC]/</font>
+00524 <font class="comment">         )</font>
+00525 <font class="comment">           &amp;&amp;</font>
+00526 <font class="comment">                (</font>
+00527 <font class="comment">                 ( # whitespace or</font>
+00528 <font class="comment">                  $bhsVerse[$i+1] =~ /[ \-?]/</font>
+00529 <font class="comment">                 )</font>
+00530 <font class="comment">                 ||</font>
+00531 <font class="comment">                 ( # EOL or</font>
+00532 <font class="comment">                  $i == ( scalar(@bhsVerse) - 1 )</font>
+00533 <font class="comment">                 )</font>
+00534 <font class="comment">                 ||</font>
+00535 <font class="comment">                 ( # sof pasuq or</font>
+00536 <font class="comment">                  ( $bhsVerse[$i+1] =~ /0/ ) &amp;&amp;</font>
+00537 <font class="comment">                  ( $bhsVerse[$i+2] =~ /0/ )</font>
+00538 <font class="comment">                 )</font>
+00539 <font class="comment">                 ||</font>
+00540 <font class="comment">                 ( # one accent followed by white, eol or</font>
+00541 <font class="comment">                  (</font>
+00542 <font class="comment">                   ( $bhsVerse[$i+1] =~ /\d/ ) &amp;&amp;</font>
+00543 <font class="comment">                   ( $bhsVerse[$i+2] =~ /\d/ )</font>
+00544 <font class="comment">                  ) &amp;&amp;</font>
+00545 <font class="comment">                  (</font>
+00546 <font class="comment">                   ( $bhsVerse[$i+3] =~ /[ \-?]/ ) ||</font>
+00547 <font class="comment">                   ( $i == ( scalar(@bhsVerse) - 1 ) )</font>
+00548 <font class="comment">                  )</font>
+00549 <font class="comment">                 )</font>
+00550 <font class="comment">                 ||</font>
+00551 <font class="comment">                 ( # two accents followed by white, eol</font>
+00552 <font class="comment">                  (</font>
+00553 <font class="comment">                   ( $bhsVerse[$i+1] =~ /\d/ ) &amp;&amp;</font>
+00554 <font class="comment">                   ( $bhsVerse[$i+2] =~ /\d/ ) &amp;&amp;</font>
+00555 <font class="comment">                   ( $bhsVerse[$i+3] =~ /\d/ ) &amp;&amp;</font>
+00556 <font class="comment">                   ( $bhsVerse[$i+4] =~ /\d/ )</font>
+00557 <font class="comment">                  ) &amp;&amp;</font>
+00558 <font class="comment">                  (</font>
+00559 <font class="comment">                   ( $bhsVerse[$i+5] =~ /[ \-?]/ ) ||</font>
+00560 <font class="comment">                   ( $i == ( scalar(@bhsVerse) - 1 ) )</font>
+00561 <font class="comment">                  )</font>
+00562 <font class="comment">                 )</font>
+00563 <font class="comment">                 ||</font>
+00564 <font class="comment">                 ( # followed by a vowel and white, eol, sof pasuq</font>
+00565 <font class="comment">                  ( $bhsVerse[$i+1] =~ /[:F]/ ) &amp;&amp;</font>
+00566 <font class="comment">                  ( # followed by</font>
+00567 <font class="comment">                   ( $bhsVerse[$i+2] =~ /[ \-?]/ ) || # whitespace or</font>
+00568 <font class="comment">                   ( $i == ( scalar(@bhsVerse) - 1 ) ) || # eol or</font>
+00569 <font class="comment">                   ( # sof pasuq</font>
+00570 <font class="comment">                    ( $bhsVerse[$i+2] =~ /0/ ) &amp;&amp;</font>
+00571 <font class="comment">                    ( $bhsVerse[$i+3] =~ /0/ )</font>
+00572 <font class="comment">                   )</font>
+00573 <font class="comment">                  )</font>
+00574 <font class="comment">                 )</font>
+00575 <font class="comment">                ) # end of what follows after final letter</font>
+00576 <font class="comment">                  &amp;&amp;</font>
+00577 <font class="comment">                    do {</font>
+00578 <font class="comment">                         $bhsVerse[$i] =~ /K/ &amp;&amp; eval { push @entity_line,$FINAL_KAF; }</font>
+00579 <font class="comment">                           &amp;&amp; next CHAR;</font>
+00580 <font class="comment">                         $bhsVerse[$i] =~ /M/ &amp;&amp; eval { push @entity_line,$FINAL_MEM; }</font>
+00581 <font class="comment">                           &amp;&amp; next CHAR;</font>
+00582 <font class="comment">                         $bhsVerse[$i] =~ /N/ &amp;&amp; eval { push @entity_line,$FINAL_NUN; }</font>
+00583 <font class="comment">                           &amp;&amp; next CHAR;</font>
+00584 <font class="comment">                         $bhsVerse[$i] =~ /P/ &amp;&amp; eval { push @entity_line,$FINAL_PE; }</font>
+00585 <font class="comment">                           &amp;&amp; next CHAR;</font>
+00586 <font class="comment">                         $bhsVerse[$i] =~ /C/ &amp;&amp; eval { push @entity_line,$FINAL_TSADI; }</font>
+00587 <font class="comment">                           &amp;&amp; next CHAR;</font>
+00588 <font class="comment">                    };</font>
+00589 <font class="comment">         # find and convert "furtive patach"</font>
+00590 <font class="comment">         ( $bhsVerse[$i] =~ /A/ ) &amp;&amp;             # If the letter is a patach</font>
+00591 <font class="comment">           ( $bhsVerse[$i-1] =~ /[)HX(]/ ) &amp;&amp;    #  and is preceeded by a guttural</font>
+00592 <font class="comment">           ( ( $bhsVerse[$i-2] =~ /[AEFOU]/ ) || #  and is preceeded by a vowel</font>
+00593 <font class="comment">                ( ( $bhsVerse[$i-2] =~ /\./ ) &amp;&amp;    #  or by suruq</font>
+00594 <font class="comment">                  ( $bhsVerse[$i-3] =~ /W/ ) ) ||    #</font>
+00595 <font class="comment">                ( ( $bhsVerse[$i-2] =~ /W/ ) &amp;&amp;      #  or by holem (written plene)</font>
+00596 <font class="comment">                  ( $bhsVerse[$i-3] =~ /O/ ) ) ||    #</font>
+00597 <font class="comment">                ( ( $bhsVerse[$i-2] =~ /Y/ ) &amp;&amp;      #  or by hiriq-yod</font>
+00598 <font class="comment">                  ( $bhsVerse[$i-3] =~ /I/ ) ) ) &amp;&amp;</font>
+00599 <font class="comment">                  do {</font>
+00600 <font class="comment">                         $saveGuttural = pop @entity_line; # snip off the gutteral</font>
+00601 <font class="comment">                         push @entity_line,$PATAH;         # push on the patach</font>
+00602 <font class="comment">                         push @entity_line,$saveGuttural;  # push back on the gutteral</font>
+00603 <font class="comment">                         next CHAR;</font>
+00604 <font class="comment">                  };</font>
+00605 <font class="comment">         # convert cantillation</font>
+00606 <font class="comment">         #   since we have previously dealt with all other cases of</font>
+00607 <font class="comment">         #   numbers, two digit patterns are all we have to search for</font>
+00608 <font class="comment">         $bhsVerse[$i] =~ /\d/ &amp;&amp; $bhsVerse[$i+1] =~ /\d/ &amp;&amp; do {</font>
+00609 <font class="comment">                push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]$bhsVerse[$i+1]"};</font>
+00610 <font class="comment">                $i++; # accents are two digits long, so advance past the 2nd digit</font>
+00611 <font class="comment">                next CHAR;</font>
+00612 <font class="comment">         };</font>
+00613 <font class="comment">         # convert katef vowels, which are two characters long</font>
+00614 <font class="comment">         $bhsVerse[$i] =~ /:/ &amp;&amp; $bhsVerse[$i+1] =~ /[AEF]/ &amp;&amp; do {</font>
+00615 <font class="comment">                push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]$bhsVerse[$i+1]"};</font>
+00616 <font class="comment">                $i++;</font>
+00617 <font class="comment">                next CHAR;</font>
+00618 <font class="comment">         };</font>
+00619 <font class="comment">         # convert everything else</font>
+00620 <font class="comment">         push @entity_line,$Michigan2XMLentity{"$bhsVerse[$i]"};</font>
+00621 <font class="comment">        } # end CHAR</font>
+00622 <font class="comment"># print the line to standard output with XML character-level encoding</font>
+00623 <font class="comment"># each character has the following format:</font>
+00624 <font class="comment"># &lt;c id="1kg1.verse#.word#.character#"&gt;&amp;#1234;&lt;/c&gt;</font>
+00625 <font class="comment"></font>
+00626 <font class="comment"># set up the verse element</font>
+00627 <font class="comment">        $word = 1;</font>
+00628 <font class="comment">        $character = 1;</font>
+00629 <font class="comment">        print "&lt;verse&gt;\n&lt;word&gt;\n";</font>
+00630 <font class="comment"># print each character element</font>
+00631 <font class="comment"># if there is a space, then close the word entity, open a new word</font>
+00632 <font class="comment"># entity, increment the word number, reset the character number to</font>
+00633 <font class="comment"># zero.</font>
+00634 <font class="comment">        foreach $element (@entity_line) {</font>
+00635 <font class="comment">         if ( $element =~ " " ) {</font>
+00636 <font class="comment">           $word++;</font>
+00637 <font class="comment">           $character = 1;</font>
+00638 <font class="comment">           print "&lt;/word&gt;\n&lt;word&gt;\n";</font>
+00639 <font class="comment">           next;</font>
+00640 <font class="comment">         }</font>
+00641 <font class="comment">         print "&lt;c id=\"1kg1.$verse.$word.$character\"&gt;$element&lt;/c&gt;\n";</font>
+00642 <font class="comment">         $character++;</font>
+00643 <font class="comment">        }</font>
+00644 <font class="comment"># close the verse element</font>
+00645 <font class="comment">        print "&lt;/word&gt;&lt;/verse&gt;\n";</font>
+00646 <font class="comment"># reinitialize variables</font>
+00647 <font class="comment">        @bhsVerse = ();</font>
+00648 <font class="comment">        @entity_line = ();</font>
+00649 <font class="comment">        @bhsLines = ();</font>
+00650 <font class="comment">        } # end while</font>
+00651 <font class="comment"># close the XML document</font>
+00652 <font class="comment">        print "&lt;/body&gt;\n";</font>
+00653 <font class="comment">        */</font>
+</pre></div><hr><address align="right"><small>Generated on Thu Jun 20 22:12:59 2002 for The Sword Project by
+<a href="http://www.doxygen.org/index.html">
+<img src="doxygen.png" alt="doxygen" align="middle" border=0 
+width=110 height=53></a>1.2.15 </small></address>
+</body>
+</html>
author	danglassey <danglassey>	2002-08-14 09:57:17 +0000
committer	danglassey <danglassey>	2002-08-14 09:57:17 +0000
commit	c9458897ebbb739d8db83c80e06512d8a612f743 (patch)
tree	f8c5381045887e34388cc6b26cfccc254bf766dc /doc/api-documentation/html/hebrewmcim_8cpp-source.html
download	sword-sf-cvs-c9458897ebbb739d8db83c80e06512d8a612f743.tar.gz