From ee9a6e0ba97aca3d7e49322641f364c4d079786f Mon Sep 17 00:00:00 2001 From: Martin Gruner Date: Thu, 27 Jul 2006 19:53:15 +0000 Subject: some work, still not functional git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@74 07627401-56e2-0310-80f4-f8cd0041bdcd --- .../WLC2OSIS/WLC2OSIS/Parse/Markers.java | 30 +---- .../hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java | 99 +++++++--------- .../hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java | 127 --------------------- .../WLC2OSIS/WLC2OSIS/Translate/Translate.java | 2 +- 4 files changed, 43 insertions(+), 215 deletions(-) delete mode 100644 modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java (limited to 'modules/hebrew-wlc/WLC2OSIS') diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java index 2485d0d..a4236d6 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java @@ -27,44 +27,18 @@ public Markers(WLC2OSIS A, Parser P ) { // Samek public void samek(){ //parasah setumah, closed paragraph == small space in line - testMaqafWord() ; -// A.wlc.writeMarker("samekh", 4) ; A.writer.appendText(" " + H.samekh + " ") ; - P.MarkerWritten = true ; +// P.MarkerWritten = true ; } //------------------------------------------------------------------------------ // Pe public void pe(){ // parasah petuhah, open paragraph == new line - testMaqafWord() ; -// A.writer.writeMarker("pe", 4) ; A.writer.appendText(" " + H.pe + " " + "

") ; - P.MarkerWritten = true ; +// P.MarkerWritten = true ; } //------------------------------------------------------------------------------ -// Line -public void line(){ - System.out.println("Markers: End-of-line encountered!") ; - } -//---------------------------------------------------------------------------- -/** - * Tests for a preceding trailing maqaf word and writes it. - * - * Before any marker is written, the TrailingMaqaf flag must be - * tested. If a trailing maqaf word precedes the mark, it must be - * written before the marker. - * - * Apparemtly ONLY EOLs cause this test to be activated. - */ -void testMaqafWord(){ - if (P.w.TrailingMaqaf){ -// System.out.print("Markers: Marker follows trailing maqaf at ") ; -// P.printPosition() ; - P.w.writeWord(P.w.MaqafWord, P.w.MaqafWordType) ; - P.w.TrailingMaqaf = false ; - } - } } //============================================================================== //============================================================================== diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java index d3ac78e..3f29b9c 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java @@ -3,41 +3,15 @@ package WLC2OSIS.Parse ; import java.io.*; import WLC2OSIS.* ; import WLC2OSIS.Translate.* ; -//============================================================================== -/** - * Parser dispatches tokens to Books, Chapters, Markers, Tanach, - * Verses, and Words start/end methods, special to WLC. - * - * Extensively modified for WLC. - */ -//============================================================================== + public class Parser{ WLC2OSIS A ; - -// Working classes - -public Words w ; Translate T ; -public WKQ wkq ; - -// Current state - -public boolean MarkerWritten ; // Indicates a marker has been written - // between two words. - -// Assorted counts -public int ChapterVerseCount ; -public int BookVerseCount ; -public int BookChapterCount ; - -public String MorphologicalSegmentStart = "" ; -public String MorphologicalSegmentEnd = "" ; -public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart; - - -BufferedReader file; +public final String MorphologicalSegmentStart = "" ; +public final String MorphologicalSegmentEnd = "" ; +public final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart; //----------------------------------------------------------------------------- public Parser(WLC2OSIS A, boolean wlc_only) { @@ -45,8 +19,6 @@ public Parser(WLC2OSIS A, boolean wlc_only) { T = new Translate(A, this) ; - w = new Words(A, this) ; - wkq = new WKQ(this) ; new MC() ; Note.setNotes(); } @@ -55,21 +27,29 @@ public Parser(WLC2OSIS A, boolean wlc_only) { public void parse(){ String s ; System.out.println("\n"); - + + BufferedReader file; + try{ file = new BufferedReader( new FileReader( A.InputFilename )); } catch (IOException e) { + file = null; System.out.println("File not found: " + e) ; } BookName[] bookNames = BookName.setBookNames(); String oldBookCode = ""; + String newBookCode = ""; int oldChapter = 0; + int newChapter = 0; int oldVerse = 0; + int newVerse = 0; int oldWordNumber = 0; + int newWordNumber = 0; int oldSubWordNumber = 0; + int newSubWordNumber = 0; java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)"); @@ -103,18 +83,19 @@ public void parse(){ System.exit(1); } -// Parse the identifier - String newBookCode = match.group(1); - int newChapter = Integer.parseInt( match.group(2) ); - int newVerse = Integer.parseInt( match.group(3) ); - int newWordNumber = Integer.parseInt( match.group(4) ); - int newSubWordNumber = Integer.parseInt( match.group(5) ); + // Parse the identifier + newBookCode = match.group(1); + newChapter = Integer.parseInt( match.group(2) ); + newVerse = Integer.parseInt( match.group(3) ); + newWordNumber = Integer.parseInt( match.group(4) ); + newSubWordNumber = Integer.parseInt( match.group(5) ); String note = match.group(6); - String expression = match.group(7); + String word = match.group(7); String lemma = match.group(8); String separator = match.group(9); - String grammar = match.group(10); + String morph = match.group(10); + // Verse changed, close old and open new if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse)) { if (oldVerse >= 1) A.writer.closeTag("verse", 2); @@ -130,16 +111,26 @@ public void parse(){ A.writer.appendText(" "); } - System.out.println("Expression: " + expression); + //System.out.println("Expression: " + word); - w.process(expression); - - oldBookCode = newBookCode; + // Paragraph marker found + if (morph == "x"){ + System.out.println("paragraph marker found!"); + if (word == "P"){ + A.writer.appendText(" "+constructWord(word, lemma, morph)+"

"); + } + else if (word == "S"){ + A.writer.appendText(" "+constructWord(word, lemma, morph)+" "); + } + else {System.out.println("Unknown marker."); System.exit(1);} + } + + //remember + oldBookCode = newBookCode; oldChapter = newChapter; oldVerse = newVerse; oldWordNumber = newWordNumber; oldSubWordNumber = newSubWordNumber; - } A.writer.closeTag("verse", 2); @@ -148,18 +139,8 @@ public void parse(){ } //---------------------------------------------------------------------------- -// Counts the number of occurences of a character in a String. - -public int countChar(String W, char c){ - int Count = 0 ; - for(int k=0; k < W.length(); k++){ - if(W.charAt(k)==c){ - Count++ ; - } - } - return Count ; - } -//---------------------------------------------------------------------------- - +public String constructWord(String word, String lemma, String morph){ + return ""+word+""; +} } diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java deleted file mode 100644 index 66877d8..0000000 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java +++ /dev/null @@ -1,127 +0,0 @@ -package WLC2OSIS.Parse ; - -import WLC2OSIS.* ; -import WLC2OSIS.Translate.H ; -//============================================================================== -/** - * Processes words, sending them to the Translate class - * after their word, qere, ketiv properties have been determined. - */ -//============================================================================== -public class Words{ - -WLC2OSIS A ; -Parser P ; - -boolean TrailingMaqaf ; -String MaqafWord ; -String MaqafWordType ; -Markers m ; - - -//----------------------------------------------------------------------------- - -public Words(WLC2OSIS A, Parser P ) { - this.A = A ; - this.P = P ; - m = new Markers(A, P) ; - - } -//------------------------------------------------------------------------------ - -// Processes a word, -public void process(String W){ - if( W.compareTo("P")==0){ - m.pe(); - } - else if( W.compareTo("S")==0){ - m.samek() ; - } - else{ - P.wkq.process(W); - } -} - - -// Translates and writes a word (simple, ketib, qere) with exception markers. -// All returns leave P.MarkerWritten = false ; -public void write(String W, String Type) { - - String Word = P.T.translate(W) ; - -// Check for any KQ markers which should NOT be here! - - int asteriskcount = P.countChar(W, '*') ; - - if (asteriskcount > 0){ - System.out.print("Words.write: Unexpected KQ character * ") ; - System.out.println("Word: " + W) ; - } - -// Look for a case in which there's been trailing maqaf -// without an intervening marker. - - if(TrailingMaqaf & !P.MarkerWritten ){ - if(MaqafWordType.charAt(0)== Type.charAt(0) ){ - Word = MaqafWord+Word ; // Combine them. - } - else{ - writeWord(MaqafWord, MaqafWordType) ; - TrailingMaqaf = false ; - } - } - -// Check for a trailing maqaf. -// Don't write the word here. - - TrailingMaqaf = false ; - if( Word.charAt(Word.length()-1) == H.maqaf){ - TrailingMaqaf = true ; - MaqafWord = Word ; - MaqafWordType = Type ; - P.MarkerWritten = false ; - return ; - } - - writeWord(Word, Type) ; - } -//---------------------------------------------------------------------------------- - -public void writeWord(String Word, String Type) { - -// Check for any exception markers ]x - - String Out = "" ; - for (int k=0 ; k < Word.length() ; k++){ - char c = Word.charAt(k) ; - if(c == ']'){ - k++ ; - char ExceptionValue = Word.charAt(k) ; - Out = Out + "" + ExceptionValue +"" ; - System.out.println("Exception occured"); - } - else{ - Out = Out + c ; - } - } - - if (Type.charAt(0) == 'w') { - A.writer.appendText(P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd) ; - } - else if (Type.charAt(0) == 'k') { - A.writer.appendText("[" + P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.kaf + "]") ; - } - else if (Type.charAt(0) == 'q') { - A.writer.appendText("("+P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.qof+ ")") ; - } - else { - System.out.println("Warning: unknown word type!"); -// P.printPosition(); - System.exit(0); - } - P.MarkerWritten = false ; - } -} - -//============================================================================== -//============================================================================== diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java index 8c70c30..13aba3c 100644 --- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java +++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java @@ -30,7 +30,7 @@ public Translate(WLC2OSIS A, Parser P) { //------------------------------------------------------------------------------ // Translates an MC word (not qere or ketib) to a Unicode String. -// Notes are included as text of note. +// Notes are included as text of note. public String translate(String W){ len = W.length() ; -- cgit