From ee9a6e0ba97aca3d7e49322641f364c4d079786f Mon Sep 17 00:00:00 2001
From: Martin Gruner <mg.pub@gmx.net>
Date: Thu, 27 Jul 2006 19:53:15 +0000
Subject: some work, still not functional

git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@74 07627401-56e2-0310-80f4-f8cd0041bdcd
---
 .../WLC2OSIS/WLC2OSIS/Parse/Markers.java           |  30 +----
 .../hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java |  99 +++++++---------
 .../hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java  | 127 ---------------------
 .../WLC2OSIS/WLC2OSIS/Translate/Translate.java     |   2 +-
 4 files changed, 43 insertions(+), 215 deletions(-)
 delete mode 100644 modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java

(limited to 'modules/hebrew-wlc/WLC2OSIS')
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
index 2485d0d..a4236d6 100644
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
+++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
@@ -27,44 +27,18 @@ public Markers(WLC2OSIS A, Parser P ) {
 
 // Samek
 public void samek(){ //parasah setumah, closed paragraph == small space in line
-    testMaqafWord() ;
-//     A.wlc.writeMarker("samekh", 4) ;
     A.writer.appendText("   " + H.samekh + "   ") ;
-    P.MarkerWritten = true ;
+//    P.MarkerWritten = true ;
     }
 //------------------------------------------------------------------------------
 
 // Pe
 public void pe(){ // parasah petuhah, open paragraph == new line
-    testMaqafWord() ;
-//     A.writer.writeMarker("pe", 4) ;
     A.writer.appendText(" " + H.pe + " " + "<p/>") ;
-    P.MarkerWritten = true ;
+//    P.MarkerWritten = true ;
     }
 //------------------------------------------------------------------------------
 
-// Line
-public void line(){
-    System.out.println("Markers: End-of-line encountered!") ;
-    }
-//----------------------------------------------------------------------------
-/**
- *  Tests for a preceding trailing maqaf word and writes it. 
- *
- *  Before any marker is written, the TrailingMaqaf flag must be
- *  tested.  If a trailing maqaf word precedes the mark, it must be
- *  written before the marker.
- *
- *  Apparemtly ONLY EOLs cause this test to be activated.
- */
-void testMaqafWord(){
-   if (P.w.TrailingMaqaf){
-//       System.out.print("Markers: Marker follows trailing maqaf at ") ;
-//           P.printPosition() ;
-       P.w.writeWord(P.w.MaqafWord, P.w.MaqafWordType) ;
-       P.w.TrailingMaqaf = false ;
-       }
-   }
 }
 //==============================================================================
 //==============================================================================
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
index d3ac78e..3f29b9c 100644
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
+++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
@@ -3,41 +3,15 @@ package WLC2OSIS.Parse ;
 import java.io.*;
 import WLC2OSIS.* ;
 import WLC2OSIS.Translate.* ;
-//==============================================================================
-/**
- *  <b>Parser dispatches tokens to  Books, Chapters, Markers, Tanach,
- *     Verses, and Words start/end methods, special to WLC. </b>
- *
- *  Extensively modified for WLC.
- */
-//==============================================================================
+
 public class Parser{
 
 WLC2OSIS A ;
-
-//  Working classes
-
-public Words w ;
 Translate T ;
-public WKQ wkq ;
-
-// Current state
-
-public boolean MarkerWritten ;  // Indicates a marker has been written
-                                // between two words.
-
-// Assorted counts
-public int ChapterVerseCount ;
-public int BookVerseCount ;
-public int BookChapterCount ;
-
-public String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
-public String MorphologicalSegmentEnd    = "</seg>" ;
-public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
-
-
-BufferedReader file;
 
+public final String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
+public final String MorphologicalSegmentEnd    = "</seg>" ;
+public final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
 
 //-----------------------------------------------------------------------------
 public Parser(WLC2OSIS A, boolean wlc_only) {
@@ -45,8 +19,6 @@ public Parser(WLC2OSIS A, boolean wlc_only) {
 
     T = new Translate(A, this) ;
   
-    w = new Words(A, this) ;
-    wkq = new WKQ(this) ;
     new MC() ;
     Note.setNotes();
     }    
@@ -55,21 +27,29 @@ public Parser(WLC2OSIS A, boolean wlc_only) {
 public void parse(){
     String s ;
     System.out.println("\n");
-    
+
+    BufferedReader file;
+
    	try{
 	    file = new BufferedReader( new FileReader( A.InputFilename ));
 	}
 	catch (IOException e) {
+		file = null;
 		System.out.println("File not found: " + e) ;
     }
 	
 	BookName[] bookNames = BookName.setBookNames();
 	
 	String oldBookCode = "";
+	String newBookCode = "";
 	int oldChapter = 0;
+	int newChapter = 0;
 	int oldVerse = 0;
+	int newVerse = 0;
 	int oldWordNumber = 0;
+	int newWordNumber = 0;
 	int oldSubWordNumber = 0;
+	int newSubWordNumber = 0;
 
 
     java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
@@ -103,18 +83,19 @@ public void parse(){
 			System.exit(1);
 		}
 		
-// Parse the identifier
-		String newBookCode = match.group(1);
-		int newChapter = Integer.parseInt( match.group(2) );
-		int newVerse   = Integer.parseInt( match.group(3) );
-		int newWordNumber	= Integer.parseInt( match.group(4) );
-		int newSubWordNumber = Integer.parseInt( match.group(5) );
+		// Parse the identifier
+		newBookCode = match.group(1);
+		newChapter = Integer.parseInt( match.group(2) );
+		newVerse   = Integer.parseInt( match.group(3) );
+		newWordNumber	= Integer.parseInt( match.group(4) );
+		newSubWordNumber = Integer.parseInt( match.group(5) );
 		String note = match.group(6);
-		String expression = match.group(7);
+		String word = match.group(7);
 		String lemma	= match.group(8);
 		String separator = match.group(9);
-		String grammar  = match.group(10);
+		String morph  = match.group(10);
 		
+		// Verse changed, close old and open new
 		if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
 		{
 			if (oldVerse >= 1) A.writer.closeTag("verse", 2);
@@ -130,16 +111,26 @@ public void parse(){
 			A.writer.appendText(" ");
 		}
 		
-		System.out.println("Expression: " + expression);
+		//System.out.println("Expression: " + word);
 		
-		w.process(expression);
-  
-	    oldBookCode = newBookCode;
+		// Paragraph marker found
+		if (morph == "x"){
+			System.out.println("paragraph marker found!");
+			if (word == "P"){
+				A.writer.appendText("  "+constructWord(word, lemma, morph)+"<p/>");
+			}
+			else if (word == "S"){
+				A.writer.appendText("  "+constructWord(word, lemma, morph)+"  ");
+			}
+			else {System.out.println("Unknown marker."); System.exit(1);}
+		}
+		
+		//remember
+		oldBookCode = newBookCode;
 	    oldChapter = newChapter;
 	    oldVerse = newVerse;
 	    oldWordNumber = newWordNumber;
 	    oldSubWordNumber = newSubWordNumber;
-
     }
     
     A.writer.closeTag("verse", 2);
@@ -148,18 +139,8 @@ public void parse(){
 }
 //----------------------------------------------------------------------------
 
-// Counts the number of occurences of a character in a String.
-
-public int countChar(String W, char c){
-   int Count = 0 ;
-   for(int k=0; k < W.length(); k++){
-       if(W.charAt(k)==c){
-           Count++ ;
-           }
-       }
-   return Count ;
-   }
-//----------------------------------------------------------------------------
-
+public String constructWord(String word, String lemma, String morph){
+	return "<seg type=\"x-morph\" lemma=\""+lemma+"\" morph=\""+morph+"\">"+word+"</seg>";
+}
 
 }
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
deleted file mode 100644
index 66877d8..0000000
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package WLC2OSIS.Parse ;
-
-import WLC2OSIS.* ;
-import WLC2OSIS.Translate.H ;
-//==============================================================================
-/**
- *  <b>Processes words, sending them to the Translate class
- *  after their word, qere, ketiv properties have been determined.</b>
- */
-//==============================================================================
-public class Words{
-
-WLC2OSIS A ;
-Parser P ;
-
-boolean TrailingMaqaf ;
-String MaqafWord ;
-String MaqafWordType ;
-Markers m ;
-
-
-//-----------------------------------------------------------------------------
-
-public Words(WLC2OSIS A, Parser P ) {
-    this.A = A ;
-    this.P = P ;
-    m = new Markers(A, P) ;
-
-    }    
-//------------------------------------------------------------------------------
-
-// Processes a word,
-public void process(String W){
-	if( W.compareTo("P")==0){
-        m.pe();
-	}
-	else if( W.compareTo("S")==0){
-    	m.samek() ;
-		}
-	else{
-		P.wkq.process(W);
-	}
-}
-
-
-// Translates and writes a word (simple, ketib, qere) with exception markers.
-// All returns leave P.MarkerWritten = false ;
-public void write(String W, String Type) {
-
-   String Word = P.T.translate(W) ;
-   
-// Check for any KQ markers which should NOT be here!
-
-    int asteriskcount = P.countChar(W, '*') ;
-
-    if (asteriskcount > 0){
-        System.out.print("Words.write: Unexpected KQ character * ") ;
-        System.out.println("Word: " + W) ;
-        }
-
-// Look for a case in which there's been trailing maqaf
-// without an intervening marker.
-
-   if(TrailingMaqaf & !P.MarkerWritten ){
-       if(MaqafWordType.charAt(0)== Type.charAt(0) ){
-           Word = MaqafWord+Word ;  // Combine them.
-           }
-       else{
-           writeWord(MaqafWord, MaqafWordType) ;
-           TrailingMaqaf = false ;
-           }
-       }
-   
-// Check for a trailing maqaf.
-// Don't write the word here.
-
-   TrailingMaqaf = false ;
-   if( Word.charAt(Word.length()-1) == H.maqaf){
-       TrailingMaqaf = true ;
-       MaqafWord = Word ;
-       MaqafWordType = Type ;
-       P.MarkerWritten = false ;
-       return ;
-       }
-
-   writeWord(Word, Type) ;
-   } 
-//----------------------------------------------------------------------------------
-
-public void writeWord(String Word, String Type) {
-
-// Check for any exception markers ]x 
-   
-   String Out = "" ;
-   for (int k=0 ; k < Word.length() ; k++){
-       char c = Word.charAt(k) ;
-       if(c == ']'){
-           k++ ;
-           char ExceptionValue = Word.charAt(k) ;
-           Out = Out + "<x>" + ExceptionValue +"</x>" ;
-	   System.out.println("Exception occured");
-           }
-       else{
-           Out = Out + c ;
-           }
-       }
-       
-    if (Type.charAt(0) == 'w') {
-        A.writer.appendText(P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd) ;
-    }
-    else if (Type.charAt(0) == 'k') {
-        A.writer.appendText("[" + P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.kaf + "]") ;
-    }
-    else if (Type.charAt(0) == 'q') {
-        A.writer.appendText("("+P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.qof+ ")") ;
-    }
-    else {
-        System.out.println("Warning: unknown word type!");
-//		P.printPosition();
-        System.exit(0);
-    }
-    P.MarkerWritten = false ;
-    }
-}
-
-//==============================================================================
-//==============================================================================
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
index 8c70c30..13aba3c 100644
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
+++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
@@ -30,7 +30,7 @@ public Translate(WLC2OSIS A, Parser P) {
 //------------------------------------------------------------------------------
 
 // Translates an MC word (not qere or ketib) to a Unicode String.
-// Notes are included as <note type="textual">text of note</x>.
+// Notes are included as <note type="textual">text of note</note>.
 
 public String translate(String W){
     len = W.length() ;
-- 
cgit