summaryrefslogtreecommitdiffstats
path: root/modules/hebrew-wlc
diff options
context:
space:
mode:
authorMartin Gruner <mg.pub@gmx.net>2006-08-03 19:24:27 +0000
committerMartin Gruner <mg.pub@gmx.net>2006-08-03 19:24:27 +0000
commite8116f598e722b6ce99927cb676d41575fd4737f (patch)
tree668c2b384aee9a31bc4865154902b932e1b350b5 /modules/hebrew-wlc
parentf0c1b110a75b26825856c250216a1be35d92570f (diff)
downloadsword-tools-e8116f598e722b6ce99927cb676d41575fd4737f.tar.gz
pretty cool now
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@76 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/hebrew-wlc')
-rw-r--r--modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java81
1 files changed, 61 insertions, 20 deletions
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
index 1caee6c..840182d 100644
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
+++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
@@ -52,7 +52,7 @@ public void parse(){
int newSubWordNumber = 0;
- java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
+ java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
while ( true ){
s="";
@@ -83,6 +83,13 @@ public void parse(){
System.exit(1);
}
+ //remember old values
+ oldBookCode = newBookCode;
+ oldChapter = newChapter;
+ oldVerse = newVerse;
+ oldWordNumber = newWordNumber;
+ oldSubWordNumber = newSubWordNumber;
+
// Parse the identifier
newBookCode = match.group(1);
newChapter = Integer.parseInt( match.group(2) );
@@ -92,53 +99,81 @@ public void parse(){
String note = match.group(6);
String word = match.group(7);
String lemma = match.group(8);
- String separator = match.group(9);
- String morph = match.group(10);
+ String homonym = match.group(9);
+ if (homonym != null){
+ homonym = homonym.substring(1); //"_1" to "1"
+ }
+ String separator = match.group(10);
+ String morph = match.group(11);
// Verse changed, close old and open new
- if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
+ if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse))
{
- if (oldVerse >= 1) A.writer.closeTag("verse", 2);
+ if (oldVerse > 0) {
+ A.writer.appendText("</w></verse>");
+ }
A.writer.openTag(
"verse osisID=\""+
BookName.getBookName(bookNames, newBookCode).abbrev+"."+
newChapter+"."+
newVerse+"\"", 2);
+
+ if (separator.equals("@")){
+ A.writer.appendText("<w xml:lang=\"he\">");
+ }
+ else if (separator.equals("%")){
+ A.writer.appendText("<w xml:lang=\"ah+\">");
+ }
+ else {
+ System.out.println("unknown separator: "+s);
+ System.exit(1);
+ }
}
//same verse, another word, add space
if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){
- A.writer.appendText(" ");
+ if (separator.equals("@")){
+ A.writer.appendText("</w> <w xml:lang=\"he\">");
+ }
+ else if (separator.equals("%")){
+ A.writer.appendText("</w> <w xml:lang=\"ah+\">");
+ }
+ else {
+ System.out.println("unknown separator: "+s);
+ System.exit(1);
+ }
+ }
+
+ //special case: nonprinting article, leave out for now
+ // TODO: FIX
+ if (word.equals("_")){
+ continue;
}
- //System.out.println("Expression: " + word);
+// System.out.println("s: " + s);
// Paragraph marker found
if (morph.compareTo("x") == 0){
if (word.compareTo("P") == 0){ //
- A.writer.appendText(" "+constructWord(word, lemma, morph)+"<p/>");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+"<p/>");
}
else if (word.compareTo("S") == 0){ //
- A.writer.appendText(" "+constructWord(word, lemma, morph)+" ");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
}
else if (word.compareTo("N") == 0){ //inverted nun
- A.writer.appendText(" "+constructWord(word, lemma, morph)+" ");
+ A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
}
else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);}
}
+ //now the text itself
+ A.writer.appendText( constructWord(word, lemma, homonym, morph) );
+
//Note found
if (note != null && note.length() > 0){
- System.out.println("Note: "+s);
+ //System.out.println("Note: "+s);
A.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" );
}
-
- //remember
- oldBookCode = newBookCode;
- oldChapter = newChapter;
- oldVerse = newVerse;
- oldWordNumber = newWordNumber;
- oldSubWordNumber = newSubWordNumber;
}
A.writer.closeTag("verse", 2);
@@ -147,8 +182,14 @@ public void parse(){
}
//----------------------------------------------------------------------------
-public String constructWord(String word, String lemma, String morph){
- return "<seg type=\"x-morph\" lemma=\""+T.translate(lemma)+"\" morph=\""+morph+"\">"+T.translate(word)+"</seg>";
+public String constructWord(String word, String lemma, String homonym, String morph){
+ String result = "<seg type=\"x-morph\" lemma=\""+T.translate(lemma) + "\" ";
+ if (homonym != null) {
+ result += "homonym=\""+homonym + "\" ";
+ }
+ result += "morph=\""+morph+"\">";
+ result += T.translate(word)+"</seg>";
+ return result;
}
}