removed a bunch of non-USFM stuff, including quotation handling

made \s handling more robust with respect to sections without titles git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@354 07627401-56e2-0310-80f4-f8cd0041bdcd
author: Chris Little <chrislit@crosswire.org> 2012-03-09 09:04:37 +0000
committer: Chris Little <chrislit@crosswire.org> 2012-03-09 09:04:37 +0000
commit: a38a09e6e9ab88e23e38c9110a090927040e1c54 (patch)
tree: 07f91a41e84db8fbac90a7ad4a9a7265e0db2516 /modules/perlconverters
parent: 083308f932fd8ca7b83d9966df67e8e005a20bb3 (diff)
download: sword-tools-a38a09e6e9ab88e23e38c9110a090927040e1c54.tar.gz
1 files changed, 34 insertions, 34 deletions
diff --git a/modules/perlconverters/usfm2osis.pl b/modules/perlconverters/usfm2osis.pl
index 2ea7a4c..fca3542 100644
--- a/modules/perlconverters/usfm2osis.pl
+++ b/modules/perlconverters/usfm2osis.pl
@@ -42,7 +42,7 @@
 use utf8;
 
 # Stores the script version and date
-$version = "1.7.2";
+$version = "1.7.3";
 
 $date = '$Date$';
 $rev = '$Rev$';
@@ -201,7 +201,7 @@ foreach $file (@files) {
 #	$line =~ s/[\r\n]+//g;
 	$line =~ s/\s*$//;
 	$line =~ s/^\s*//;
-	
+
 	if ($line !~ /^\s*$/) {
 	    if ($line !~ /^\\/) {	
 		@filedata[$i-1] .= " $line";
@@ -211,10 +211,15 @@ foreach $file (@files) {
 	    else {
 		@filedata[$i] = $line;
 	    }
+	}	
+    }
+    for ($i = 0; $i < scalar(@filedata);$i++) {
+	if (@filedata[$i] =~ /(.+) (\\v\b\s*.*)/) {
+	    splice(@filedata, $i+1, 0, $2);
+	    @filedata[$i] = $1;
 	}
     }
     
-    
     $ollevel = 0;
     $vers = 0;
     $chap = 0;
@@ -236,10 +241,10 @@ foreach $file (@files) {
 
 	### Basic XML entity encoding
 	$line =~ s/&(?![a-zA-Z0-9])/&amp;/g;
-	$line =~ s/<< ?/\@/g;
-	$line =~ s/>>/\#/g;
-	$line =~ s/</\$/g;
-	$line =~ s/>/\%/g;
+#	$line =~ s/<< ?/\@/g;
+#	$line =~ s/>>/\#/g;
+#	$line =~ s/</\$/g;
+#	$line =~ s/>/\%/g;
 
 	# $line =~ s/(\w)\'(\w)/"$1" . chr(0x2019) . "$2"/eg;
 	$line =~ s/\\fr 1\/2 \\fr\*/chr(0xBD)/eg;
@@ -318,12 +323,6 @@ foreach $file (@files) {
 	### Introduction--Markers Supported: \imt#, \is#, \iot, \io#, \ip \ie
 	#### Markers Not Yet Supported: \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \ior...\ior*, \iex, \imte
 
-	# \it title (DCO: Commented out because \it is for italics not introduction titles in USFM 2.1)
-#	if ($line =~ /^\\it\b\s*(.*)/) {
-#	    $line = "<div type=\"introduction\">\n<title>$1<\/title>";
-#	    openTag("<\/div>");
-#	}
-
 	# \imt major title
 	if ($line =~ /^\\imt\b\s*(.+)/) {
 	    $line = "<div type=\"introduction\">\n<title>$1<\/title>";
@@ -427,9 +426,6 @@ foreach $file (@files) {
 		}
 	    }
 	}
-
-	
-	
 	
         # \ie introduction end (discard)
 	if ($line =~ /^\\ie\b/) {
@@ -441,12 +437,13 @@ foreach $file (@files) {
 	#### Markers Not Yet Supported: \mte#, \mr, \sr 
 
 	# \ms majorSection
-	if ($line =~ /^\\ms\d?\b\s*(.+)/) {
+	if ($line =~ /^\\ms\d?\b\s*(.*)/) {
 	    push (@outdata, closeTag("<\/p>"));
 	    push (@outdata, closeTag("<\/div type=\"majorSection\">"));
 	    push (@outdata, "<div type=\"majorSection\">\n");
 	    openTag("<\/div type=\"majorSection\">");
 	    $line =~ s/\\ms\d?\b\s*(.+)/<title>$1<\/title>/;
+	    $line =~ s/\\ms\d?\b\s*//;
 	}
 	# \d canonical title
 	if ($line =~ /^\\d\b\s*(.+)?(\\d\*)?/) {
@@ -455,29 +452,31 @@ foreach $file (@files) {
 	}
 
 	# \s \s1 section (From Chapters and Verses)
-	if ($line =~ /^\\s1?\b\s*(.+)/) {
+	if ($line =~ /^\\s1?\b\s*(.*)/) {
 	    push (@outdata, closeTag("<\/p>"));
 	    push (@outdata, closeTag("<\/div type=\"section\">"));
 	    push (@outdata, "<div type=\"section\">\n");
 	    openTag("<\/div type=\"section\">");
 	    $line =~ s/\\s1?\b\s*(.+)/<title>$1<\/title>/;
+	    $line =~ s/\\s1?\b\s*//;
 	    if ($line =~ /HEBREW TITLE/) {
 		$line =~ s/<title>/<title type=\"psalm\" canonical=\"true\">/;
 	    }
 	}
 
 	# \ss \s2 subSection (From Chapters and Verses)
-	if ($line =~ /^\\s[s2]\b\s*(.+)/) {
+	if ($line =~ /^\\s[s2]\b\s*(.*)/) {
 	    push (@outdata, closeTag("<\/p>"));
 	    push (@outdata, closeTag("<\/div type=\"subSection\">"));
 	    push (@outdata, "<div type=\"subSection\">\n");
 	    openTag("<\/div type=\"subSection\">");
 	    $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/;
+	    $line =~ s/\\s[s2]\b\s*//;
 	}
 
 	# \sss \s3 x-subSubSection (From Chapters and Verses)
 	# This will also handle deeper levels (4+) of subsections.
-	if ($line =~ /^\\s(ss|\d+)\b\s*(.+)/) {
+	if ($line =~ /^\\s(ss|\d+)\b\s*(.*)/) {
 	    my $ssLvl = $1;
 	    my $ssType = "";
 	    while ($ssLvl > 2) {
@@ -490,6 +489,7 @@ foreach $file (@files) {
 	    push (@outdata, "<div type=\"$ssType\">\n");
 	    openTag("<\/div type=\"$ssType\">");
 	    $line =~ s/\\s(ss|\d+)\b\s*(.+)/<title>$2<\/title>/;
+	    $line =~ s/\\s(ss|\d+)\b\s*//;
 	}
 
 	# \mt\mt1 title
@@ -1020,27 +1020,27 @@ for ($i = 0; $i < scalar(@filedata); $i++) {
 $fullfile =~ s/<\/osisText>\n<\/osis>\n(<chapter eID[^>]+>)/$1\n<\/osisText>\n<\/osis>/mg; #swap the chapter back up one before the osisText closer
 $fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
 
-print "Tagging quotations.\n";
+#print "Tagging quotations.\n";
 
-$q = 1;
+#$q = 1;
 
-$fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
+#$fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
 
-$fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg;
+#$fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg;
 
-$q = 1;
+#$q = 1;
 
-while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) {
-    $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
-}
-while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) {
-    $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
-}
+#while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) {
+#    $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
+#}
+#while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) {
+#    $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
+#}
 
-$fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg;
-$fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg;
+#$fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg;
+#$fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg;
 
-$fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
+#$fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
 
 print OUTF $fullfile;
 close (OUTF);
author	Chris Little <chrislit@crosswire.org>	2012-03-09 09:04:37 +0000
committer	Chris Little <chrislit@crosswire.org>	2012-03-09 09:04:37 +0000
commit	a38a09e6e9ab88e23e38c9110a090927040e1c54 (patch)
tree	07f91a41e84db8fbac90a7ad4a9a7265e0db2516 /modules/perlconverters
parent	083308f932fd8ca7b83d9966df67e8e005a20bb3 (diff)
download	sword-tools-a38a09e6e9ab88e23e38c9110a090927040e1c54.tar.gz