diff options
author | Martin Gruner <mg.pub@gmx.net> | 2006-08-03 19:24:27 +0000 |
---|---|---|
committer | Martin Gruner <mg.pub@gmx.net> | 2006-08-03 19:24:27 +0000 |
commit | e8116f598e722b6ce99927cb676d41575fd4737f (patch) | |
tree | 668c2b384aee9a31bc4865154902b932e1b350b5 /modules/hebrew-wlc | |
parent | f0c1b110a75b26825856c250216a1be35d92570f (diff) | |
download | sword-tools-e8116f598e722b6ce99927cb676d41575fd4737f.tar.gz |
pretty cool now
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@76 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/hebrew-wlc')
-rw-r--r-- | modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java | 81 |
1 files changed, 61 insertions, 20 deletions
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java index 1caee6c..840182d 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java @@ -52,7 +52,7 @@ public void parse(){ int newSubWordNumber = 0; - java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)"); + java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)"); while ( true ){ s=""; @@ -83,6 +83,13 @@ public void parse(){ System.exit(1); } + //remember old values + oldBookCode = newBookCode; + oldChapter = newChapter; + oldVerse = newVerse; + oldWordNumber = newWordNumber; + oldSubWordNumber = newSubWordNumber; + // Parse the identifier newBookCode = match.group(1); newChapter = Integer.parseInt( match.group(2) ); @@ -92,53 +99,81 @@ public void parse(){ String note = match.group(6); String word = match.group(7); String lemma = match.group(8); - String separator = match.group(9); - String morph = match.group(10); + String homonym = match.group(9); + if (homonym != null){ + homonym = homonym.substring(1); //"_1" to "1" + } + String separator = match.group(10); + String morph = match.group(11); // Verse changed, close old and open new - if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse)) + if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse)) { - if (oldVerse >= 1) A.writer.closeTag("verse", 2); + if (oldVerse > 0) { + A.writer.appendText("</w></verse>"); + } A.writer.openTag( "verse osisID=\""+ BookName.getBookName(bookNames, newBookCode).abbrev+"."+ newChapter+"."+ newVerse+"\"", 2); + + if (separator.equals("@")){ + A.writer.appendText("<w xml:lang=\"he\">"); + } + else if (separator.equals("%")){ + A.writer.appendText("<w xml:lang=\"ah+\">"); + } + else { + System.out.println("unknown separator: "+s); + System.exit(1); + } } //same verse, another word, add space if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){ - A.writer.appendText(" "); + if (separator.equals("@")){ + A.writer.appendText("</w> <w xml:lang=\"he\">"); + } + else if (separator.equals("%")){ + A.writer.appendText("</w> <w xml:lang=\"ah+\">"); + } + else { + System.out.println("unknown separator: "+s); + System.exit(1); + } + } + + //special case: nonprinting article, leave out for now + // TODO: FIX + if (word.equals("_")){ + continue; } - //System.out.println("Expression: " + word); +// System.out.println("s: " + s); // Paragraph marker found if (morph.compareTo("x") == 0){ if (word.compareTo("P") == 0){ // - A.writer.appendText(" "+constructWord(word, lemma, morph)+"<p/>"); + A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+"<p/>"); } else if (word.compareTo("S") == 0){ // - A.writer.appendText(" "+constructWord(word, lemma, morph)+" "); + A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" "); } else if (word.compareTo("N") == 0){ //inverted nun - A.writer.appendText(" "+constructWord(word, lemma, morph)+" "); + A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" "); } else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);} } + //now the text itself + A.writer.appendText( constructWord(word, lemma, homonym, morph) ); + //Note found if (note != null && note.length() > 0){ - System.out.println("Note: "+s); + //System.out.println("Note: "+s); A.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" ); } - - //remember - oldBookCode = newBookCode; - oldChapter = newChapter; - oldVerse = newVerse; - oldWordNumber = newWordNumber; - oldSubWordNumber = newSubWordNumber; } A.writer.closeTag("verse", 2); @@ -147,8 +182,14 @@ public void parse(){ } //---------------------------------------------------------------------------- -public String constructWord(String word, String lemma, String morph){ - return "<seg type=\"x-morph\" lemma=\""+T.translate(lemma)+"\" morph=\""+morph+"\">"+T.translate(word)+"</seg>"; +public String constructWord(String word, String lemma, String homonym, String morph){ + String result = "<seg type=\"x-morph\" lemma=\""+T.translate(lemma) + "\" "; + if (homonym != null) { + result += "homonym=\""+homonym + "\" "; + } + result += "morph=\""+morph+"\">"; + result += T.translate(word)+"</seg>"; + return result; } } |