diff options
author | Martin Gruner <mg.pub@gmx.net> | 2006-09-16 21:25:27 +0000 |
---|---|---|
committer | Martin Gruner <mg.pub@gmx.net> | 2006-09-16 21:25:27 +0000 |
commit | 16599a8398ef2a2071b4ffc5dbf4b2d5ad75921b (patch) | |
tree | f62533f2e6a241a99ce4ab7666ebd34eb7ff76f9 /modules/hebrew-wlc | |
parent | 01cd6b45313fe4ef924a4d2cdb38cfe95de8c8c2 (diff) | |
download | sword-tools-16599a8398ef2a2071b4ffc5dbf4b2d5ad75921b.tar.gz |
finalized
sent preview version to Kirk for first check
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@80 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/hebrew-wlc')
7 files changed, 465 insertions, 412 deletions
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java index a4236d6..eed8318 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java @@ -27,14 +27,14 @@ public Markers(WLC2OSIS A, Parser P ) { // Samek public void samek(){ //parasah setumah, closed paragraph == small space in line - A.writer.appendText(" " + H.samekh + " ") ; + WLC2OSIS.writer.appendText(" " + H.samekh + " ") ; // P.MarkerWritten = true ; } //------------------------------------------------------------------------------ // Pe public void pe(){ // parasah petuhah, open paragraph == new line - A.writer.appendText(" " + H.pe + " " + "<p/>") ; + WLC2OSIS.writer.appendText(" " + H.pe + " " + "<p/>") ; // P.MarkerWritten = true ; } //------------------------------------------------------------------------------ diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java index b12f00d..baaacb6 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java @@ -6,32 +6,26 @@ import WLC2OSIS.Translate.*; public class Parser{ -WLC2OSIS A ; Translate T ; -public final String MorphologicalSegmentStart = "<seg type=\"morph\">" ; -public final String MorphologicalSegmentEnd = "</seg>" ; -public final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart; +public static final String MorphologicalSegmentStart = "<seg type=\"morph\">" ; +public static final String MorphologicalSegmentEnd = "</seg>" ; +public static final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart; +public static String currentLine; +public static String nextLine; //----------------------------------------------------------------------------- -public Parser(WLC2OSIS A, boolean wlc_only) { - this.A = A ; - - T = new Translate(A, this) ; - +public Parser() { + T = new Translate() ; new MC() ; Note.setNotes(); - } - - -public void parse(){ - String s ; - System.out.println("\n"); +} +public void parse(String in_file, boolean wlc_only){ BufferedReader file; try{ - file = new BufferedReader( new FileReader( A.InputFilename )); + file = new BufferedReader( new FileReader( in_file )); } catch (IOException e) { file = null; @@ -42,27 +36,47 @@ public void parse(){ String oldBookCode = ""; String newBookCode = ""; + String nextBookCode = ""; int oldChapter = 0; int newChapter = 0; + int nextChapter = 0; int oldVerse = 0; int newVerse = 0; + int nextVerse = 0; int oldWordNumber = 0; int newWordNumber = 0; + int nextWordNumber = 0; + + String newWord = ""; + String oldWord = ""; + + currentLine = ""; + try{ + nextLine = file.readLine(); //skip first line, book intro + nextLine = file.readLine(); + } + catch (IOException e) { + System.out.println("Read error: " + e) ; + System.exit(1); + } // book chap vs word# subword# note word lemma homonym lang morph java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)"); while ( true ){ - s=""; + currentLine = nextLine; try{ - s = file.readLine(); - System.out.println(s); + nextLine = file.readLine(); + if ( (nextLine != null) && nextLine.startsWith(">") ){ //skip this line, book intro + nextLine = file.readLine(); + } +// System.out.println("CurrentLine: "+currentLine); +// System.out.println("NextLine: "+nextLine); } catch (IOException e) { - System.out.println("Read error: " + e) ; - break; } - if ( s == null ){ + + if ( currentLine == null ){ try{ file.close(); } @@ -72,16 +86,34 @@ public void parse(){ break; } - if ( s.startsWith(">") ){ //ignore this line + else if ( !currentLine.startsWith("gn") ){ //limit to Genesis for presentation purposes continue; } - java.util.regex.Matcher match = pattern.matcher( s ); + + java.util.regex.Matcher match = pattern.matcher( currentLine ); if (!match.matches()){ - System.out.println("No match found: " +s); + System.out.println("No match found at line: " + currentLine); System.exit(1); } - + + java.util.regex.Matcher match_nextLine = null; + if (nextLine != null){ + match_nextLine = pattern.matcher( nextLine ); + if (match_nextLine.matches()){ + nextBookCode = match_nextLine.group(1); + nextChapter = Integer.parseInt( match_nextLine.group(2) ); + nextVerse = Integer.parseInt( match_nextLine.group(3) ); + nextWordNumber = Integer.parseInt( match_nextLine.group(4) ); + } + } + if ((match_nextLine == null) || !match_nextLine.matches()){ + nextBookCode = ""; + nextChapter = 0; + nextVerse = 0; + nextWordNumber = 0; + } + //remember old values oldBookCode = newBookCode; oldChapter = newChapter; @@ -93,9 +125,12 @@ public void parse(){ newChapter = Integer.parseInt( match.group(2) ); newVerse = Integer.parseInt( match.group(3) ); newWordNumber = Integer.parseInt( match.group(4) ); + //newSubWordNumber = Integer.parseInt( match.group(5) ); not used String note = match.group(6); - String word = match.group(7); + //remember pevious word value + oldWord = newWord; + newWord = match.group(7); String lemma = match.group(8); String homonym = match.group(9); if (homonym != null){ @@ -104,97 +139,116 @@ public void parse(){ String separator = match.group(10); String morph = match.group(11); + if ((newBookCode.equals(nextBookCode)) && (newChapter == nextChapter) && (newVerse == nextVerse) && (newWordNumber == nextWordNumber)) + Translate.do_setFinal = false; + else + Translate.do_setFinal = true; + // Verse changed, close old and open new if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse)) { if (oldVerse > 0) { - A.writer.appendText("</w></verse>"); + WLC2OSIS.writer.appendText("</w></verse>"); } - A.writer.openTag( + WLC2OSIS.writer.openTag( "verse osisID=\""+ BookName.getBookName(bookNames, newBookCode).abbrev+"."+ newChapter+"."+ newVerse+"\"", 2); - if (separator.equals("@")){ - A.writer.appendText("<w xml:lang=\"he\">"); + if (wlc_only){ + WLC2OSIS.writer.appendText("<w>"); + } + else if (separator.equals("@")){ + WLC2OSIS.writer.appendText("<w xml:lang=\"he\">"); } else if (separator.equals("%")){ - A.writer.appendText("<w xml:lang=\"ah+\">"); + WLC2OSIS.writer.appendText("<w xml:lang=\"ah+\">"); } else { - System.out.println("unknown separator: "+s); + System.err.println("unknown separator: " + currentLine); System.exit(1); } } //same verse, another word, add space if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){ - if (separator.equals("@")){ - A.writer.appendText("</w> <w xml:lang=\"he\">"); + //Last word had a maqqef, no space between words + if (oldWord.endsWith("-")) + WLC2OSIS.writer.appendText("</w>"); + //normal case, space between words + else + WLC2OSIS.writer.appendText("</w> "); + + if (wlc_only){ + WLC2OSIS.writer.appendText("<w>"); + } + else if (separator.equals("@")){ + WLC2OSIS.writer.appendText("<w xml:lang=\"he\">"); } else if (separator.equals("%")){ - A.writer.appendText("</w> <w xml:lang=\"ah+\">"); + WLC2OSIS.writer.appendText("<w xml:lang=\"ah+\">"); } else { - System.out.println("unknown separator: "+s); + System.err.println("unknown separator: " + currentLine); System.exit(1); } } //special case: nonprinting article, leave out for now // TODO: FIX - if (word.equals("_") || word.equals("*_") || word.equals("**_")){ + if (newWord.equals("_") || newWord.equals("*_") || newWord.equals("**_")){ continue; } //Qere / Ketiv only - else if (word.equals("**qq")){ - A.writer.appendText("**<note type=\"textual\" xml:lang=\"en\">Ketiv without Quere.</note>"); + else if (newWord.equals("**qq")){ + WLC2OSIS.writer.appendText("**<note type=\"textual\" xml:lang=\"en\">Ketiv without Quere.</note>"); continue; } - else if (word.equals("*kk")){ - A.writer.appendText("*<note type=\"textual\" xml:lang=\"en\">Qere without Ketiv.</note>"); + else if (newWord.equals("*kk")){ + WLC2OSIS.writer.appendText("*<note type=\"textual\" xml:lang=\"en\">Qere without Ketiv.</note>"); continue; } - // Paragraph marker found - if (morph.compareTo("x") == 0){ - if (word.compareTo("P") == 0){ // - A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+"<p/>"); + else if (morph.compareTo("x") == 0){ + if (newWord.compareTo("P") == 0){ // + WLC2OSIS.writer.appendText(" "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+"<p/>"); } - else if (word.compareTo("S") == 0){ // - A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" "); + else if (newWord.compareTo("S") == 0){ // + WLC2OSIS.writer.appendText(" "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+" "); } - else if (word.compareTo("N") == 0){ //inverted nun - A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" "); + else if (newWord.compareTo("N") == 0){ //inverted nun + WLC2OSIS.writer.appendText(" "+constructSegment(newWord, lemma, homonym, morph, wlc_only)+" "); + } + else { + System.err.println("Unknown paragraph marker: " + currentLine); + System.exit(1); } - else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);} } - - - - //now the text itself - A.writer.appendText( constructWord(word, lemma, homonym, morph) ); + else{ + //now the text itself + WLC2OSIS.writer.appendText( constructSegment(newWord, lemma, homonym, morph, wlc_only) ); + } //Note found if (note != null && note.length() > 0){ //System.out.println("Note: "+s); - A.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" ); + WLC2OSIS.writer.appendText("<note type=\"textual\" xml:lang=\"en\">"+Note.Notes.get(note)+"</note>" ); } } - A.writer.closeTag("verse", 2); - - return ; + WLC2OSIS.writer.appendText("</w></verse>"); } -//---------------------------------------------------------------------------- -public String constructWord(String word, String lemma, String homonym, String morph){ - String result = "<seg type=\"x-morph\" lemma=\""+T.convertCompoundWord(lemma) + "\" "; - if (homonym != null) { - result += "homonym=\""+homonym + "\" "; +public String constructSegment(String word, String lemma, String homonym, String morph, boolean wlc_only){ + String result = "<seg type=\"morph\""; + if (!wlc_only){ + result += " lemma=\""+T.convertCompoundWord(lemma)+"\""; + if (homonym != null) + result += " homonym=\""+homonym +"\""; + result += " morph=\""+morph+"\""; } - result += "morph=\""+morph+"\">"; + result += ">"; result += T.convertCompoundWord(word)+"</seg>"; return result; } diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java index 1ba2f8f..a8cf5c1 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/MCO.java @@ -94,6 +94,7 @@ public MCO( String Name, int Type, char Value, char FinalValue, int Group ) { public void print(){ System.out.print(Name + " + ") ; } + //----------------------------------------------------------------------------- public Object clone(){ diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java index 0c9fcd5..e0717c5 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Note.java @@ -9,7 +9,7 @@ import java.util.Hashtable ; //============================================================================== public class Note{ -public static Hashtable Notes = new Hashtable(); +public static Hashtable<String, String> Notes = new Hashtable<String, String>(); public static void setNotes(){ diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java index 0ec8944..968e7f7 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java @@ -1,360 +1,355 @@ -package WLC2OSIS.Translate ; +package WLC2OSIS.Translate; -import WLC2OSIS.* ; -import WLC2OSIS.Parse.* ; -import WLC2OSIS.Translate.Note ; +import WLC2OSIS.Parse.*; +import WLC2OSIS.Translate.Note; + +import java.util.Vector; -import java.util.Vector ; //============================================================================== /** * <b>Critical translation of KQ-free MC words into Unicode characters.</b><p> */ //============================================================================== -public class Translate{ - -private - Parser P ; - WLC2OSIS A ; - - MCO M; - MCO Mark ; - char c, c1 ; - int Type, I, k1, len ; - Vector MCOs, OrderedMCOs ; - int[] ConsonantPositions = new int[100] ; +public class Translate { + +private static MCO M; +private static MCO Mark; +private static char c, c1; +private static int Type, I, k1, len; +private static Vector<MCO> MCOs, OrderedMCOs; +private static int[] ConsonantPositions = new int[100]; + +//This controls if the last consonant may be changed into its final form +//will be set to true by Parser only if we are at the end of one word +public static boolean do_setFinal = true; + //----------------------------------------------------------------------------- -public Translate(WLC2OSIS A, Parser P) { - this.A = A ; - this.P = P ; - } +public Translate() { +} + //------------------------------------------------------------------------------ -public String convertCompoundWord(String W){ - if (W.contains("~")){ //compound word without maqqef +public String convertCompoundWord(String W) { + if (W.contains("~")) { //compound word without maqqef String[] tmp = W.split("~"); return convertWord(tmp[0]) + " " + convertWord(tmp[1]); - } - else if (W.contains("-") && !W.endsWith("-")){ //compound word with maqqef + } else if (W.contains("-") && !W.endsWith("-")) { //compound word with maqqef String[] tmp = W.split("-"); return convertWord(tmp[0]) + convertWord("-") + convertWord(tmp[1]); - } - else{ + } else { return convertWord(W); } } -public String convertWord(String W){ +public String convertWord(String W) { if (W.startsWith("**")) - return "**<note type=\"textual\" xml:lang=\"en\">Quere.</note>"+convertChars(W.substring(2)); + return "**<note type=\"textual\" xml:lang=\"en\">Quere.</note>" + + convertChars(W.substring(2)); else if (W.startsWith("*")) - return "*<note type=\"textual\" xml:lang=\"en\">Ketiv.</note>"+convertChars(W.substring(1)); - else return convertChars(W); + return "*<note type=\"textual\" xml:lang=\"en\">Ketiv.</note>" + + convertChars(W.substring(1)); + else + return convertChars(W); } // Translates an MC word (not qere or ketib) to a Unicode String. // Notes are included as <note type="textual">text of note</note>. -public String convertChars(String W){ - - len = W.length() ; - -// Convert characters in String to MCO objects, expanding -// ConsonantMarks, Numbers, and Notes as necessary. -// Move PrepositiveMarks to after their consonants. - - MCOs = new Vector() ; - for (int k = 0; k < len; k++) { - c = W.charAt(k) ; - - M = (MCO) (MC.getMCO(c)).clone() ; - Type = M.Type ; - -// Unknown - if(Type == MCO.Unknown){ - message("MC: MC Object is of type Unknown." - + "\nWord: " + W - + "\nCharacter: " + c - + "\n " ) ; - System.exit(1); - } - else if(Type <= 5){ // These types need no expansion. - MCOs.add(M) ; - } - else if(Type == MCO.Sheva ){ -// Might be a Hatef Vowel - k1 = k + 1 ; - if(k1 < len){ - c1 = W.charAt(k1) ; - Mark = (MCO) (MC.getMCO(c1)).clone() ; - if (Mark.Type == MCO.Vowel){ -// It is a Hatef vowel - MCO Hatef = new MCO("hataf"+Mark.Name, MCO.Vowel, - Mark.FinalValue, Mark.FinalValue, Mark.Group) ; - MCOs.add(Hatef) ; - k++ ; - } -// Not a Hatef Vowel - else{ - MCOs.add(M) ; - } - } -// Could only be a Sheva - else{ - MCOs.add(M) ; - } - } - else if(Type == MCO.ConsonantMark){ - Mark = (MCO) (M.Object).clone() ; - M.Type = MCO.Consonant ; - MCOs.add(M) ; - MCOs.add(Mark) ; - } -//----------------------------------------------------------------------------- - else if(Type == MCO.Number){ - k++ ; - String StringInt = "" ; - StringInt = StringInt + c ; - StringInt = StringInt + W.charAt(k) ; - - I = new Integer(StringInt).intValue() ; - if ((I > 99) | (I < 0) ){ - message("MC: Reconstructed int is out of range." - + "\nWord: " + W - + "\nCharacters: " + c + W.charAt(k) - + "\nint: " + I ) ; - } - else{ -// Found a Mark of some sort - MCO Found = MC.getMCO(I) ; - - if(Found.Type == MCO.PrepositiveMark){ -// System.out.println("***** Found a prepositive mark. " + I) ; - k++ ; - c1 = W.charAt(k) ; - M = (MCO) (MC.getMCO(c1)).clone() ; - if( M.Type == MCO.Consonant){ -// System.out.println("Swapping prepositive mark and consonant.") ; - MCOs.add(M) ; - MCOs.add(Found) ; - } -// *** Special section for MCO.ConsonantMark added 17 June 2004 *** - else if( M.Type == MCO.ConsonantMark ){ -// System.out.println("Swapping prepositive mark and consonant-mark.") ; - MCOs.add(M) ; - MCOs.add(M.Object) ; - MCOs.add(Found) ; - } - else{ - System.out.println("MC: PrepositiveMark not followed by a Consonant." - + "\nWord: " + W + "\nType: " + M.Type ) ; - MCOs.add(M) ; - System.exit(0) ; - } - } - else{ - MCOs.add(MC.getMCO(I)) ; - } - } - } -//----------------------------------------------------------------------------- - - else if(Type == MCO.Note){ - k++ ; - c1 = W.charAt(k) ; -// Check for an inverted nun. Assumes the note immediately follows a nun. - if (c1=='8'){ - MCO Mtemp = (MCO) MCOs.lastElement() ; - if( (Mtemp.Name).compareTo("nun") == 0) { - String in = H.Invertednun ; - Mtemp.Name = "invertednun" ; - Mtemp.Value = in ; - Mtemp.FinalValue = in ; -// The masoranumberdot fails to work in IE. Ezra SIL already provides a dot. -// Providing no accent. -// MCO ud = (MCO) (MC.getMCO(81)).clone() ; // revia 81, masora 96 -// MCOs.add(ud) ; - } - else{ - System.out.println("Prior MCO isn't a nun!") ; - } - } -// Leave a note - M.Value = Character.toString(c1) ; - MCOs.add(M) ; - } - else{ - System.out.println("MC: Unknown type for an MCO Object.") ; - } - } -//----------------------------------------------------------------------------- - -// At this point the Consonants are where they should be. -// Locating the consonants and FinalConsonants. - -// An incoming word a final consonant before a maqaf -// as well as at the end . - - int LastConsonant = -1 ; - int ConsonantIndex = 0 ; - int ConsonantCount = 0 ; - for (int k = 0; k < MCOs.size(); k++){ - M = (MCO) MCOs.elementAt(k) ; - -// *** Test for MCO.ConsonantMark added 17 June 2004 *** - if( M.Type == MCO.Consonant | M.Type == MCO.ConsonantMark ){ - - ConsonantPositions[ConsonantIndex] = k ; - LastConsonant = k ; - ConsonantIndex++ ; - ConsonantCount++ ; - } -// Look for a Maqef, if found, set the LastConsonant final. - if( (M.Name).compareTo("maqef") == 0){ - setFinal(LastConsonant) ; - } - } - - if (ConsonantCount > 0){ - setFinal(LastConsonant) ; - OrderedMCOs = new Vector() ; - int Limit = 0 ; - for( int ConsonantNumber = 0; ConsonantNumber < ConsonantCount; ConsonantNumber++) { - if (ConsonantNumber+1 == ConsonantCount){ - Limit = MCOs.size() ; - } - else{ - Limit = ConsonantPositions[ConsonantNumber+1] ; - } - Order(MCOs, ConsonantPositions[ConsonantNumber], Limit, OrderedMCOs ) ; - } - } - else{ - OrderedMCOs = MCOs ; - } - -// Output the ordered Vector - - String S = "" ; - for (int k = 0; k < OrderedMCOs.size(); k++){ - M = (MCO) OrderedMCOs.elementAt(k) ; - Type = M.Type ; -// Only Notes require special treatment - if (Type == MCO.Note){ - S = S + "<note type=\"textual\" xml:lang=\"en\">"+ Note.Notes.get( M.Value)+ "</note>"; - } -// //Mark morph segments when a maqef is present -// else if ( (M.Name).compareTo("maqef") == 0 ){ -// S = S + P.MorphologicalSegmentEnd + M.Value + P.MorphologicalSegmentStart; -// } - - else if ((Type == MCO.MorphologicalDivision)){ - S = S + P.MorphologicalDivisionMarker ; - } - else{ - S = S + M.Value ; - } - } - - return S ; - } +public String convertChars(String W) { + + len = W.length(); + + // Convert characters in String to MCO objects, expanding + // ConsonantMarks, Numbers, and Notes as necessary. + // Move PrepositiveMarks to after their consonants. + + MCOs = new Vector<MCO>(); + for (int k = 0; k < len; k++) { + c = W.charAt(k); + + M = (MCO) (MC.getMCO(c)).clone(); + Type = M.Type; + + // Unknown + if (Type == MCO.Unknown) { + errorMessage("MC: MC Object is of type Unknown." + "\nWord: " + W + + "\nCharacter: " + c + + "\n "); + } else if (Type <= 5) { // These types need no expansion. + MCOs.addElement(M); + } else if (Type == MCO.Sheva) { + // Might be a Hatef Vowel + k1 = k + 1; + if (k1 < len) { + c1 = W.charAt(k1); + Mark = (MCO) (MC.getMCO(c1)).clone(); + if (Mark.Type == MCO.Vowel) { + // It is a Hatef vowel + MCO Hatef = new MCO("hataf" + Mark.Name, MCO.Vowel, + Mark.FinalValue, Mark.FinalValue, Mark.Group); + MCOs.add(Hatef); + k++; + } + // Not a Hatef Vowel + else { + MCOs.add(M); + } + } + // Could only be a Sheva + else { + MCOs.add(M); + } + } else if (Type == MCO.ConsonantMark) { + Mark = (MCO) (M.Object).clone(); + M.Type = MCO.Consonant; + MCOs.add(M); + MCOs.add(Mark); + } + //----------------------------------------------------------------------------- + else if (Type == MCO.Number) { + k++; + String StringInt = ""; + StringInt = StringInt + c; + StringInt = StringInt + W.charAt(k); + + I = new Integer(StringInt).intValue(); + if ((I > 99) | (I < 0)) { + errorMessage("MC: Reconstructed int is out of range." + + "\nWord: " + W + "\nCharacters: " + c + W.charAt(k) + + "\nint: " + I); + } else { + // Found a Mark of some sort + MCO Found = MC.getMCO(I); + + if (Found.Type == MCO.PrepositiveMark) { + // System.out.println("***** Found a prepositive mark. " + I) ; + k++; + c1 = W.charAt(k); + M = (MCO) (MC.getMCO(c1)).clone(); + if (M.Type == MCO.Consonant) { + // System.out.println("Swapping prepositive mark and consonant.") ; + MCOs.add(M); + MCOs.add(Found); + } + // *** Special section for MCO.ConsonantMark added 17 June 2004 *** + else if (M.Type == MCO.ConsonantMark) { + // System.out.println("Swapping prepositive mark and consonant-mark.") ; + MCOs.add(M); + MCOs.add(M.Object); + MCOs.add(Found); + } else { + System.out + .println("MC: PrepositiveMark not followed by a Consonant." + + "\nWord: " + W + "\nType: " + M.Type); + MCOs.add(M); + System.exit(0); + } + } else { + MCOs.add(MC.getMCO(I)); + } + } + } + //----------------------------------------------------------------------------- + + else if (Type == MCO.Note) { + k++; + c1 = W.charAt(k); + // Check for an inverted nun. Assumes the note immediately follows a nun. + if (c1 == '8') { + MCO Mtemp = (MCO) MCOs.lastElement(); + if ((Mtemp.Name).compareTo("nun") == 0) { + String in = H.Invertednun; + Mtemp.Name = "invertednun"; + Mtemp.Value = in; + Mtemp.FinalValue = in; + // The masoranumberdot fails to work in IE. Ezra SIL already provides a dot. + // Providing no accent. + // MCO ud = (MCO) (MC.getMCO(81)).clone() ; // revia 81, masora 96 + // MCOs.add(ud) ; + } else { + System.out.println("Prior MCO isn't a nun!"); + } + } + // Leave a note + M.Value = Character.toString(c1); + MCOs.add(M); + } else { + System.out.println("MC: Unknown type for an MCO Object."); + } + } + //----------------------------------------------------------------------------- + + // At this point the Consonants are where they should be. + // Locating the consonants and FinalConsonants. + + // An incoming word a final consonant before a maqaf + // as well as at the end . + + int LastConsonant = -1; + int ConsonantIndex = 0; + int ConsonantCount = 0; + for (int k = 0; k < MCOs.size(); k++) { + M = (MCO) MCOs.elementAt(k); + + if ((M.Type == MCO.Consonant) || (M.Type == MCO.ConsonantMark)) { + ConsonantPositions[ConsonantIndex] = k; + LastConsonant = k; + ConsonantIndex++; + ConsonantCount++; + } + // Look for a Maqef, if found, set the LastConsonant final. + if ((M.Name).compareTo("maqef") == 0) { + setFinal(LastConsonant); + } + } + + if (ConsonantCount > 0) { + setFinal(LastConsonant); + OrderedMCOs = new Vector<MCO>(); + int Limit = 0; + for (int ConsonantNumber = 0; ConsonantNumber < ConsonantCount; ConsonantNumber++) { + if (ConsonantNumber + 1 == ConsonantCount) { + Limit = MCOs.size(); + } else { + Limit = ConsonantPositions[ConsonantNumber + 1]; + } + Order(MCOs, ConsonantPositions[ConsonantNumber], Limit, OrderedMCOs); + } + } else { + OrderedMCOs = MCOs; + } + + // Output the ordered Vector + + String S = ""; + for (int k = 0; k < OrderedMCOs.size(); k++) { + M = (MCO) OrderedMCOs.elementAt(k); + Type = M.Type; + // Only Notes require special treatment + if (Type == MCO.Note) { + S = S + "<note type=\"textual\" xml:lang=\"en\">" + + Note.Notes.get(M.Value) + "</note>"; + } + // //Mark morph segments when a maqef is present + // else if ( (M.Name).compareTo("maqef") == 0 ){ + // S = S + P.MorphologicalSegmentEnd + M.Value + P.MorphologicalSegmentStart; + // } + + else if ((Type == MCO.MorphologicalDivision)) { + S = S + Parser.MorphologicalDivisionMarker; + } else { + S = S + M.Value; + } + } + + return S; +} + //----------------------------------------------------------------------------- // Order the Marks following a Consonant. -void Order( Vector MCOs, int StartIndex, int Limit, Vector OrderedMCOs) { - MCO m ; - -// Check for no Marks - if(StartIndex+1==Limit){ - m = (MCO) MCOs.elementAt(StartIndex) ; - OrderedMCOs.add(m) ; - return ; - } -// Check for one Mark - if(StartIndex+2==Limit){ - m = (MCO) MCOs.elementAt(StartIndex) ; - OrderedMCOs.add(m) ; - m = (MCO) MCOs.elementAt(StartIndex+1) ; - OrderedMCOs.add(m) ; - return ; - } - -// Two or more Marks - -// Save the Consonant - m = (MCO) MCOs.elementAt(StartIndex) ; - OrderedMCOs.add(m) ; - -// Order the marks - int MarkCount = (Limit-StartIndex)-1 ; - boolean[] Written = new boolean[MarkCount] ; - for (int k =0; k < MarkCount; k++){ - Written[k] = false ; - } - - int WrittenCount = 0 ; - do{ -// Find the MCO with the smallest possible Group value and write it. - int GroupTest = 1000 ; - int MCOMin = -1 ; - for( int k = StartIndex+1; k < Limit; k++){ - if(!Written[ k -(StartIndex+1) ]){ - m = (MCO) MCOs.elementAt(k) ; - int g = m.Group ; - if ( g < GroupTest ){ - GroupTest = g ; - MCOMin = k ; - } - } - } - m = (MCO) MCOs.elementAt(MCOMin) ; - OrderedMCOs.add(m) ; - Written[MCOMin -(StartIndex+1) ] = true ; - WrittenCount++ ; - }while(WrittenCount < MarkCount) ; - - } +void Order(Vector<MCO> MCOs, int StartIndex, int Limit, Vector<MCO> OrderedMCOs) { + MCO m; + + // Check for no Marks + if (StartIndex + 1 == Limit) { + m = (MCO) MCOs.elementAt(StartIndex); + OrderedMCOs.add(m); + return; + } + // Check for one Mark + if (StartIndex + 2 == Limit) { + m = (MCO) MCOs.elementAt(StartIndex); + OrderedMCOs.add(m); + m = (MCO) MCOs.elementAt(StartIndex + 1); + OrderedMCOs.add(m); + return; + } + + // Two or more Marks + + // Save the Consonant + m = (MCO) MCOs.elementAt(StartIndex); + OrderedMCOs.add(m); + + // Order the marks + int MarkCount = (Limit - StartIndex) - 1; + boolean[] Written = new boolean[MarkCount]; + for (int k = 0; k < MarkCount; k++) { + Written[k] = false; + } + + int WrittenCount = 0; + do { + // Find the MCO with the smallest possible Group value and write it. + int GroupTest = 1000; + int MCOMin = -1; + for (int k = StartIndex + 1; k < Limit; k++) { + if (!Written[k - (StartIndex + 1)]) { + m = (MCO) MCOs.elementAt(k); + int g = m.Group; + if (g < GroupTest) { + GroupTest = g; + MCOMin = k; + } + } + } + m = (MCO) MCOs.elementAt(MCOMin); + OrderedMCOs.add(m); + Written[MCOMin - (StartIndex + 1)] = true; + WrittenCount++; + } while (WrittenCount < MarkCount); + +} + //----------------------------------------------------------------------------- // Output a message plus position. -public void message(String m){ - System.out.print(m) ; -// P.printPosition() ; - } +public void errorMessage(String m) { + System.err.println(m); + System.err.println("Current line: " + Parser.currentLine); + System.exit(1); +} + //----------------------------------------------------------------------------- // Output a message plus position. -public void print(Vector v){ - System.out.println("\n") ; - for (int k = 0; k < v.size(); k++){ - MCO m = (MCO) v.elementAt(k) ; - m.print() ; - } - System.out.println("\n") ; - } +public void print(Vector v) { + System.out.println("\n"); + for (int k = 0; k < v.size(); k++) { + MCO m = (MCO) v.elementAt(k); + m.print(); + } + System.out.println("\n"); +} + //----------------------------------------------------------------------------- // Set a consonant as final. -public void setFinal(int Position){ -// Do nothing if there's no Position to set - if (Position < 0) return ; - - M = (MCO) MCOs.elementAt(Position) ; - if (M.Type == MCO.Consonant){ - MCO Final = new MCO("final" + M.Name, MCO.Consonant, M.FinalValue, M.FinalValue, - M.Group) ; - MCOs.setElementAt(Final, Position) ; - } - else{ - message("LastConsonant is not a consonant!") ; - } - return ; - } -//----------------------------------------------------------------------------- +public void setFinal(int Position) { + // Do nothing if setFinal is not set; we are in the midst of a word, but not at its end + if (!do_setFinal) + return; + // Do nothing if there's no Position to set + if (Position < 0) + return; + + M = (MCO) MCOs.elementAt(Position); + if ((M.Type == MCO.Consonant) || (M.Type == MCO.ConsonantMark)) { + MCO Final = new MCO("final" + M.Name, MCO.Consonant, M.FinalValue, + M.FinalValue, M.Group); + MCOs.setElementAt(Final, Position); + } else { + String err = "LastConsonant is not a consonant!\n"; + for (int i = 0; i < MCOs.size(); i++) + err += "MCOs[" + i + "] == " + MCOs.elementAt(i).Name + "\n"; + err += "Position " + Position + "\n"; + errorMessage(err); + } } -//============================================================================== -//============================================================================== +}
\ No newline at end of file diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java index fcdbea0..3cc11f4 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java @@ -13,12 +13,12 @@ import WLC2OSIS.Utilities.XMLWriter ; public class WLC2OSIS{ // Define the title and descriptions. -public String Title = "The Westminster Leningrad Codex (WLC)" ; +private String Title = "The Westminster Leningrad Codex (WLC)" ; -public String ShortDescription = "from the electronic version of the Leningrad Codex " +private String ShortDescription = "from the electronic version of the Leningrad Codex " + "maintained by the Westminster Hebrew Institute." ; -public String[] Description = { +private String[] Description = { "This text began as an electronic transcription by Whitaker and Parunak of the 1983 " + "printed edition of Biblia Hebraica Stuttgartensia (BHS). The " + "transcription is called the Michigan-Claremont electronic text and was archived " @@ -51,25 +51,27 @@ public String[] Description = { "The book names in English and Hebrew of the Jewish Publication Society " + "(JPS) Tanach have been added."} ; +public static XMLWriter writer; +public static String OutputDirectory ; -public Parser p ; -public XMLWriter writer; -public String InputFilename; -public String OutputDirectory ; - -public WLC2OSIS( String file, String directory ){ +public WLC2OSIS( String in_file, String out_directory, String out_file, boolean wlc_only ){ - InputFilename = file ; - OutputDirectory = directory ; + OutputDirectory = out_directory ; - System.out.println("\nWLC2OSIS: " + Title + " " + ShortDescription ) ; - System.out.println("\nInput file: " + InputFilename ) ; - System.out.println("\nOutput directory: " + OutputDirectory ) ; + System.out.println("WLC2OSIS: " + Title + " " + ShortDescription ); + if (wlc_only){ + System.out.println("Not including MORPH data."); + } + else{ + System.out.println("Including full MORPH data."); + } + System.out.println("Input file: " + in_file ); + System.out.println("Output file: " + OutputDirectory + "/" + out_file ); // Read, parse, and write the book files. - writer = new XMLWriter(OutputDirectory, "wlc_morph") ; + writer = new XMLWriter(OutputDirectory, out_file) ; writer.openTag("osisText osisIDWork=\"writer\" osisRefWork=\"bible\" xml:lang=\"he\"", 0) ; writer.openTag("header", 0) ; @@ -81,20 +83,20 @@ public WLC2OSIS( String file, String directory ){ writer.writeAttributedString("type", 2, "type=\"OSIS\"", "Bible"); writer.writeAttributedString("identifier", 2, "type=\"OSIS\"", "Bible.he.writer.2004"); writer.writeAttributedString("rights", 2, "type=\"x-copyright\"", - "The writer is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)"); + "The WLC is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)"); writer.writeString("scope", 2, "Hebrew Bible, Old Testament"); writer.writeString("refSystem", 2, "MT"); writer.closeTag("work", 1); writer.closeTag("header", 0); - p = new Parser(this, false) ; - p.parse() ; + Parser p = new Parser() ; + p.parse(in_file, wlc_only) ; writer.closeTag("osisText", 0); writer.close(); - System.out.println("\nWLC2OSIS: Normal end.") ; + System.out.println("Finished.") ; } }
\ No newline at end of file diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java index ece32c7..7e563ff 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSISMain.java @@ -1,17 +1,18 @@ package WLC2OSIS ; public class WLC2OSISMain{ - -static WLC2OSIS A ; - /** - * Static main() method - * + * Static main() method * @param args String[] */ public static void main( String[] args) { - A = new WLC2OSIS( args[0], args[1] ) ; - System.exit(0) ; + { + WLC2OSIS A = new WLC2OSIS( args[0], args[1], "wlc_morph", false ) ; + } + { + WLC2OSIS A = new WLC2OSIS( args[0], args[1], "wlc", true ) ; + } + System.exit(0); } } //class |