summaryrefslogtreecommitdiffstats
path: root/modules/hebrew-wlc/WLC2OSIS
diff options
context:
space:
mode:
authorMartin Gruner <mg.pub@gmx.net>2006-07-27 19:53:15 +0000
committerMartin Gruner <mg.pub@gmx.net>2006-07-27 19:53:15 +0000
commitee9a6e0ba97aca3d7e49322641f364c4d079786f (patch)
treed002aba9c6eec5ac63ba3d4d999797b17ab83cb3 /modules/hebrew-wlc/WLC2OSIS
parentbbc9701b175697d709c1e79e304f55d22ef28510 (diff)
downloadsword-tools-ee9a6e0ba97aca3d7e49322641f364c4d079786f.tar.gz
some work, still not functional
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@74 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/hebrew-wlc/WLC2OSIS')
-rw-r--r--modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java30
-rw-r--r--modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java99
-rw-r--r--modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java127
-rw-r--r--modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java2
4 files changed, 43 insertions, 215 deletions
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
index 2485d0d..a4236d6 100644
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
+++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
@@ -27,44 +27,18 @@ public Markers(WLC2OSIS A, Parser P ) {
// Samek
public void samek(){ //parasah setumah, closed paragraph == small space in line
- testMaqafWord() ;
-// A.wlc.writeMarker("samekh", 4) ;
A.writer.appendText(" " + H.samekh + " ") ;
- P.MarkerWritten = true ;
+// P.MarkerWritten = true ;
}
//------------------------------------------------------------------------------
// Pe
public void pe(){ // parasah petuhah, open paragraph == new line
- testMaqafWord() ;
-// A.writer.writeMarker("pe", 4) ;
A.writer.appendText(" " + H.pe + " " + "<p/>") ;
- P.MarkerWritten = true ;
+// P.MarkerWritten = true ;
}
//------------------------------------------------------------------------------
-// Line
-public void line(){
- System.out.println("Markers: End-of-line encountered!") ;
- }
-//----------------------------------------------------------------------------
-/**
- * Tests for a preceding trailing maqaf word and writes it.
- *
- * Before any marker is written, the TrailingMaqaf flag must be
- * tested. If a trailing maqaf word precedes the mark, it must be
- * written before the marker.
- *
- * Apparemtly ONLY EOLs cause this test to be activated.
- */
-void testMaqafWord(){
- if (P.w.TrailingMaqaf){
-// System.out.print("Markers: Marker follows trailing maqaf at ") ;
-// P.printPosition() ;
- P.w.writeWord(P.w.MaqafWord, P.w.MaqafWordType) ;
- P.w.TrailingMaqaf = false ;
- }
- }
}
//==============================================================================
//==============================================================================
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
index d3ac78e..3f29b9c 100644
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
+++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
@@ -3,41 +3,15 @@ package WLC2OSIS.Parse ;
import java.io.*;
import WLC2OSIS.* ;
import WLC2OSIS.Translate.* ;
-//==============================================================================
-/**
- * <b>Parser dispatches tokens to Books, Chapters, Markers, Tanach,
- * Verses, and Words start/end methods, special to WLC. </b>
- *
- * Extensively modified for WLC.
- */
-//==============================================================================
+
public class Parser{
WLC2OSIS A ;
-
-// Working classes
-
-public Words w ;
Translate T ;
-public WKQ wkq ;
-
-// Current state
-
-public boolean MarkerWritten ; // Indicates a marker has been written
- // between two words.
-
-// Assorted counts
-public int ChapterVerseCount ;
-public int BookVerseCount ;
-public int BookChapterCount ;
-
-public String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
-public String MorphologicalSegmentEnd = "</seg>" ;
-public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
-
-
-BufferedReader file;
+public final String MorphologicalSegmentStart = "<seg type=\"morph\">" ;
+public final String MorphologicalSegmentEnd = "</seg>" ;
+public final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
//-----------------------------------------------------------------------------
public Parser(WLC2OSIS A, boolean wlc_only) {
@@ -45,8 +19,6 @@ public Parser(WLC2OSIS A, boolean wlc_only) {
T = new Translate(A, this) ;
- w = new Words(A, this) ;
- wkq = new WKQ(this) ;
new MC() ;
Note.setNotes();
}
@@ -55,21 +27,29 @@ public Parser(WLC2OSIS A, boolean wlc_only) {
public void parse(){
String s ;
System.out.println("\n");
-
+
+ BufferedReader file;
+
try{
file = new BufferedReader( new FileReader( A.InputFilename ));
}
catch (IOException e) {
+ file = null;
System.out.println("File not found: " + e) ;
}
BookName[] bookNames = BookName.setBookNames();
String oldBookCode = "";
+ String newBookCode = "";
int oldChapter = 0;
+ int newChapter = 0;
int oldVerse = 0;
+ int newVerse = 0;
int oldWordNumber = 0;
+ int newWordNumber = 0;
int oldSubWordNumber = 0;
+ int newSubWordNumber = 0;
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
@@ -103,18 +83,19 @@ public void parse(){
System.exit(1);
}
-// Parse the identifier
- String newBookCode = match.group(1);
- int newChapter = Integer.parseInt( match.group(2) );
- int newVerse = Integer.parseInt( match.group(3) );
- int newWordNumber = Integer.parseInt( match.group(4) );
- int newSubWordNumber = Integer.parseInt( match.group(5) );
+ // Parse the identifier
+ newBookCode = match.group(1);
+ newChapter = Integer.parseInt( match.group(2) );
+ newVerse = Integer.parseInt( match.group(3) );
+ newWordNumber = Integer.parseInt( match.group(4) );
+ newSubWordNumber = Integer.parseInt( match.group(5) );
String note = match.group(6);
- String expression = match.group(7);
+ String word = match.group(7);
String lemma = match.group(8);
String separator = match.group(9);
- String grammar = match.group(10);
+ String morph = match.group(10);
+ // Verse changed, close old and open new
if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
{
if (oldVerse >= 1) A.writer.closeTag("verse", 2);
@@ -130,16 +111,26 @@ public void parse(){
A.writer.appendText(" ");
}
- System.out.println("Expression: " + expression);
+ //System.out.println("Expression: " + word);
- w.process(expression);
-
- oldBookCode = newBookCode;
+ // Paragraph marker found
+ if (morph == "x"){
+ System.out.println("paragraph marker found!");
+ if (word == "P"){
+ A.writer.appendText(" "+constructWord(word, lemma, morph)+"<p/>");
+ }
+ else if (word == "S"){
+ A.writer.appendText(" "+constructWord(word, lemma, morph)+" ");
+ }
+ else {System.out.println("Unknown marker."); System.exit(1);}
+ }
+
+ //remember
+ oldBookCode = newBookCode;
oldChapter = newChapter;
oldVerse = newVerse;
oldWordNumber = newWordNumber;
oldSubWordNumber = newSubWordNumber;
-
}
A.writer.closeTag("verse", 2);
@@ -148,18 +139,8 @@ public void parse(){
}
//----------------------------------------------------------------------------
-// Counts the number of occurences of a character in a String.
-
-public int countChar(String W, char c){
- int Count = 0 ;
- for(int k=0; k < W.length(); k++){
- if(W.charAt(k)==c){
- Count++ ;
- }
- }
- return Count ;
- }
-//----------------------------------------------------------------------------
-
+public String constructWord(String word, String lemma, String morph){
+ return "<seg type=\"x-morph\" lemma=\""+lemma+"\" morph=\""+morph+"\">"+word+"</seg>";
+}
}
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
deleted file mode 100644
index 66877d8..0000000
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package WLC2OSIS.Parse ;
-
-import WLC2OSIS.* ;
-import WLC2OSIS.Translate.H ;
-//==============================================================================
-/**
- * <b>Processes words, sending them to the Translate class
- * after their word, qere, ketiv properties have been determined.</b>
- */
-//==============================================================================
-public class Words{
-
-WLC2OSIS A ;
-Parser P ;
-
-boolean TrailingMaqaf ;
-String MaqafWord ;
-String MaqafWordType ;
-Markers m ;
-
-
-//-----------------------------------------------------------------------------
-
-public Words(WLC2OSIS A, Parser P ) {
- this.A = A ;
- this.P = P ;
- m = new Markers(A, P) ;
-
- }
-//------------------------------------------------------------------------------
-
-// Processes a word,
-public void process(String W){
- if( W.compareTo("P")==0){
- m.pe();
- }
- else if( W.compareTo("S")==0){
- m.samek() ;
- }
- else{
- P.wkq.process(W);
- }
-}
-
-
-// Translates and writes a word (simple, ketib, qere) with exception markers.
-// All returns leave P.MarkerWritten = false ;
-public void write(String W, String Type) {
-
- String Word = P.T.translate(W) ;
-
-// Check for any KQ markers which should NOT be here!
-
- int asteriskcount = P.countChar(W, '*') ;
-
- if (asteriskcount > 0){
- System.out.print("Words.write: Unexpected KQ character * ") ;
- System.out.println("Word: " + W) ;
- }
-
-// Look for a case in which there's been trailing maqaf
-// without an intervening marker.
-
- if(TrailingMaqaf & !P.MarkerWritten ){
- if(MaqafWordType.charAt(0)== Type.charAt(0) ){
- Word = MaqafWord+Word ; // Combine them.
- }
- else{
- writeWord(MaqafWord, MaqafWordType) ;
- TrailingMaqaf = false ;
- }
- }
-
-// Check for a trailing maqaf.
-// Don't write the word here.
-
- TrailingMaqaf = false ;
- if( Word.charAt(Word.length()-1) == H.maqaf){
- TrailingMaqaf = true ;
- MaqafWord = Word ;
- MaqafWordType = Type ;
- P.MarkerWritten = false ;
- return ;
- }
-
- writeWord(Word, Type) ;
- }
-//----------------------------------------------------------------------------------
-
-public void writeWord(String Word, String Type) {
-
-// Check for any exception markers ]x
-
- String Out = "" ;
- for (int k=0 ; k < Word.length() ; k++){
- char c = Word.charAt(k) ;
- if(c == ']'){
- k++ ;
- char ExceptionValue = Word.charAt(k) ;
- Out = Out + "<x>" + ExceptionValue +"</x>" ;
- System.out.println("Exception occured");
- }
- else{
- Out = Out + c ;
- }
- }
-
- if (Type.charAt(0) == 'w') {
- A.writer.appendText(P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd) ;
- }
- else if (Type.charAt(0) == 'k') {
- A.writer.appendText("[" + P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.kaf + "]") ;
- }
- else if (Type.charAt(0) == 'q') {
- A.writer.appendText("("+P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.qof+ ")") ;
- }
- else {
- System.out.println("Warning: unknown word type!");
-// P.printPosition();
- System.exit(0);
- }
- P.MarkerWritten = false ;
- }
-}
-
-//==============================================================================
-//==============================================================================
diff --git a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
index 8c70c30..13aba3c 100644
--- a/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
+++ b/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
@@ -30,7 +30,7 @@ public Translate(WLC2OSIS A, Parser P) {
//------------------------------------------------------------------------------
// Translates an MC word (not qere or ketib) to a Unicode String.
-// Notes are included as <note type="textual">text of note</x>.
+// Notes are included as <note type="textual">text of note</note>.
public String translate(String W){
len = W.length() ;