$1<\/title>"; # openTag("<\/div>"); # } # \imt major title if ($line =~ /^\\imt\b\s*(.+)/) { $line = "<div type=\"introduction\">\n<title>$1<\/title>"; openTag("<\/div>"); } # \is introduction section title if ($line =~ /^\\is(\d*)\b\s*(.*)/) { $level = $1; if ($level eq "") { $level = "1"; } $line = "<div type=\"section\"><title>$2<\/title>"; openTag("<\/div>"); } # \iot introduction outline title if ($line =~ /^\\iot\b\s*(.*)/) { $line = "<div type=\"outline\">\n<title>$1<\/title>"; } # \io\d+ introduction outline item if ($line =~ /^\\io(\d+)\b\s*(.*)/) { if ($ollevel == $1) { $line = "<item>$2<\/item>"; } elsif ($ollevel > $1) { $line = ""; while ($ollevel > $1) { $line .= "<\/list><\/item>\n"; $ollevel--; } $line .= "<item>$2<\/item>"; } elsif ($ollevel < $1) { $line = ""; if ($ollevel != 0) { $line .= "<item>"; } while ($ollevel < $1) { $line .= "<list>\n"; $ollevel++; } $line .= "<item>$2<\/item>\n"; } if (@filedata[$i+1] !~ /^\\io/) { while ($ollevel > 0) { $line .= "\n<\/list>"; if ($ollevel > 1) {$line .= "<\/item>";} $ollevel--; } if ($ollevel == 0) { $line .= "\n<\/div>"; } } } # \ip introduction paragraph if ($line =~ /^\\ip\b\s*(.*)/) { $line = "<p>$1<\/p>"; } # \im introduction paragraph ('left flush' - NOT IMPLENTED FLUSH) if ($line =~ /^\\im\b\s*(.*)/) { $line = "<p>$1<\/p>"; } # \im introduction quotation (implemented as ordinary quotation) if ($line =~ /^\\imq\b\s*(.*)/) { $line = "<q>$1<\/q>"; } # \iq line (including \iq#), adapted from \q (see below), needs more clean-up if ($line =~ /^\\iq/) { if ($l != 1) { push (@outdata, "<lg>\n"); $l = 1; } if ($line =~ /\\iq(\d*)$/) { if ($1 eq "") { $line = "<l>\n"; } else { $line = "<l level=\"$1\">\n"; } @filedata[$i+1] .= "<\/l>"; if (@filedata[$i+2] !~ /\\iq(?!t)/) { @filedata[$i+1] .= "\n<\/lg>"; $l = 0; } } else { $line =~ s/\\iq\b\s*(.+)/<l>$1<\/l>/; $line =~ s/\\iq(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/; if (@filedata[$i+1] !~ /\\iq(?![ta])/) { $line .= "\n<\/lg>"; $l = 0; } } } # \ie introduction end (discard) if ($line =~ /^\\ie\b/) { $line = ""; } ### Titles, Headings, and Labels (elsewhere?)--Markers Supported: \d, \ms#, \s#, \mt#, \r, \sp, \rq..\rq* #### Markers Not Yet Supported: \mte#, \mr, \sr # \ms majorSection if ($line =~ /^\\ms\b\s*(.+)/) { push (@outdata, closeTag("<\/p>")); push (@outdata, closeTag("<\/div type=\"majorSection\">")); push (@outdata, "<div type=\"majorSection\">\n"); openTag("<\/div type=\"majorSection\">"); $line =~ s/\\ms\b\s*(.+)/<title>$1<\/title>/; } # \d canonical title if ($line =~ /^\\(ms|d)\b\s*(.+)/) { push (@outdata, closeTag("<\/p>")); $line =~ s/\\d\b\s*(.+)/<title type=\"canonical\">$1<\/title>/; } # \s \s1 section (From Chapters and Verses) if ($line =~ /^\\s1?\b\s*(.+)/) { push (@outdata, closeTag("<\/p>")); push (@outdata, closeTag("<\/div type=\"section\">")); push (@outdata, "<div type=\"section\">\n"); openTag("<\/div type=\"section\">"); $line =~ s/\\s1?\b\s*(.+)/<title>$1<\/title>/; if ($line =~ /HEBREW TITLE/) { $line =~ s/<title>/<title type=\"psalm\" canonical=\"true\">/; } } # \ss \s2 subSection (From Chapters and Verses) if ($line =~ /^\\s[s2]\b\s*(.+)/) { push (@outdata, closeTag("<\/p>")); push (@outdata, closeTag("<\/div type=\"subSection\">")); push (@outdata, "<div type=\"subSection\">\n"); openTag("<\/div type=\"subSection\">"); $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/; } # \sss \s3 x-subSubSection (From Chapters and Verses) # This will also handle deeper levels (4+) of subsections. if ($line =~ /^\\s(ss|\d+)\b\s*(.+)/) { my $ssLvl = $1; my $ssType = ""; while ($ssLvl > 2) { $ssLvl--; $ssType .= "Sub"; } $ssType = "x-sub" . $ssType . "Section"; push (@outdata, closeTag("<\/p>")); push (@outdata, closeTag("<\/div type=\"$ssType\">")); push (@outdata, "<div type=\"$ssType\">\n"); openTag("<\/div type=\"$ssType\">"); $line =~ s/\\s(ss|\d+)\b\s*(.+)/<title>$2<\/title>/; } # \mt\mt1 title if ($line =~ /^\\mt[1234]?\b\s*(.+)/) { $line = "<title type=\"main\">$1<\/title>"; } # \mt2 title if ($line =~ /^\\mt2\b\s*(.+)/) { $line = "<title type=\"continued\">$1<\/title>"; } # \st,\st2 title if ($line =~ /^\\st2?\b\s*(.+)/) { $line = "<title type=\"continued\">$1<\/title>"; } # \st3 title if ($line =~ /^\\st3\b\s*(.+)/) { $line = "<title type=\"sub\">$1<\/title>"; } # \r sub title if ($line =~ /^\\mr\b\s*(.+)/) { $line = "<title type=\"sub\">$1<\/title>"; } # \r parallel title if ($line =~ /^\\r\b\s*(.+)/) { $line = "<title type=\"parallel\">$1<\/title>"; } # \sp speaker if ($line =~ /^\\sp\b\s*(.+)/) { $line = "<speaker>$1<\/speaker>"; } # \rq..\rq* inline reference $line =~ s/\\rq( .*?)\\rq\*/<reference>$1<\/reference>/g; ### Chapters and Verses--Markers Supported: \c, \v, \vp...\vp*, \cl #### Markers Not Yet Supported: \ca...\ca*, \cp, \cd, \va...\va* # \c chapter if ($line =~ /^\\c\b\s*([^ ]*)/) { if ($1 ne "") { $chap = $1; } else { $chap++; } push (@outdata, $versClose); $versClose = ""; if ($moduleType eq "bible") { push (@outdata, closeTag("<\/p>")) } if ($chapClose =~ /<chapter/) { push (@outdata, $chapClose); # close previous chapter $chapClose = ""; } else { push (@outdata, closeTag("<\/div>")); # close introduction div } push (@outdata, "<chapter sID=\"$book.$chap\" osisID=\"$book.$chap\"\/>\n"); $chapClose = "<chapter eID=\"$book.$chap\"\/>\n"; $line =~ s/\\c\b\s*([^ ]*)//; } # \cl chapter label if ($line =~ /^\\cl\b\s*(.*)/) { $line = "<title>$1<\/title>"; } # \v verse if ($line =~ /^\\v\b\s*(\d[^\\ ]*)?/) { if ($1 ne "") { $vers = $1; } else { $vers++; } push (@outdata, $versClose); $divOpen=false; $versClose = ""; if ($vers =~ /(\d+[^\\\- ]*)\-(\d+[^\\ ]*)/) { $vF = $1; $vT = $2; $vF =~ /^(\d+)/; $vFn = scalar($1); $vT =~ /^(\d+)/; $vTn = scalar($1); $osisID = "$book.$chap.$vF"; if ($vTn > $vFn && $vFn > 0) { for ($j = $vFn + 1; $j < $vTn; $j++) { $osisID .=" $book.$chap.$j"; } } $osisID .= " $book.$chap.$vT"; } else { $osisID = "$book.$chap.$vers"; } if ($moduleType eq "bible") { push (@outdata, "<verse sID=\"$osisID\" osisID=\"$osisID\"\/>\n"); $versClose = "<verse eID=\"$osisID\"\/>\n"; $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//; } elsif ($moduleType eq "comment") { closeTag("<\/p>"); push (@outdata, "<div type=\"section\" annotateType=\"commentary\" annotateRef=\"$osisID\">\n"); $versClose = "<\/p>\n<\/div>\n"; $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//; $divOpen=true; } else { print "usfm2osis.pl supports only the module types \"bible\" and \"comment\" \n"; exit 1; } } # \vp...\vp# published verse numbers (just delete for now) $line =~ s/\\vp\*\s*//g; $line =~ s/\\vp\b\s*(\d+[a-z]?|[a-z])\s*//g; ### Paragraphs--Markers Supported: \p, \b, \m, \nb, \cls #### Markers Not Yet Supported: \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \li#, \pc, \pr, \ph#, \b # Hack to solve an issue in a module that used <R> for linebreaks in the usfm files--may be commented out (not USFM 2.1) $line =~ s/\\lb\*/<lb \/>/g; # \p paragraph (From Chapters and Verses) if ($line =~ /^\\p\b\s*/) { if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){ push (@outdata, closeTag("<\/p>")); } push (@outdata, "<p>\n"); openTag("<\/p>"); $line =~ s/\\p\b\s*//; } # \pc paragraph centered (From Chapters and Verses) if ($line =~ /^\\pc\b\s*/) { if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){ push (@outdata, closeTag("<\/p>")); } push (@outdata, "<p type=\"x-center\">\n"); openTag("<\/p>"); $line =~ s/\\pc\b\s*//; } # \mi2 paragraph flush left, no indentation if ($line =~ /^\\mi2\b\s*/) { if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){ push (@outdata, closeTag("<\/p>")); } push (@outdata, "<p type=\"x-noindent\">\n"); openTag("<\/p>"); $line =~ s/\\mi2\b\s*//; } # \cls paragraph (From Chapters and Verses) if ($line =~ /^\\cls\b\s*/) { if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){ push (@outdata, closeTag("<\/closer>")); } push (@outdata, "<closer>\n"); openTag("<\/closer>"); $line =~ s/\\cls\b\s*//; } # \b $line =~ s/\\b\b//; # \m $line =~ s/\\m\b//; # \nb $line =~ s/\\nb\b//; ### Poetry--Markers Supported: \q#, \qs...\qs*, \qr, \qc, \qac...\qac*, \qa, \qm# #### Markers Not Yet Supported: \b # \qt...\qt*, OT quotation (handle early) $line =~ s/\\qt\b\s*(.*?)\\qt\*/<seg type="otPassage">$1<\/seg>/g; # \qa, acrostic heading $line =~ s/^\\qa\b\s*(.*)/<title type=\"acrostic\">$1<\/title>/g; # \qac...\qac*, acrostic character style (used within a line) $line =~ s/\\qac\b\s*(.*?)\\qac\*/<hi type="acrostic">$1<\/hi>/g; # \q line (including \q#, \qr, \qc, and \qs...\qs*) if ($line =~ /^\\q/) { if ($l != 1) { push (@outdata, "<lg>\n"); $l = 1; } if ($line =~ /\\qm?(c|r|\d*)$/) { if ($1 eq "") { $line = "<l>\n"; } elsif ($1 eq "c") { $line = "<l type=\"x-centered\">"; } elsif ($1 eq "r") { $line = "<l type=\"x-right\">"; } else { $line = "<l level=\"$1\">\n"; } @filedata[$i+1] .= "<\/l>"; if (@filedata[$i+2] !~ /\\q(?!t)/) { @filedata[$i+1] .= "\n<\/lg>"; $l = 0; } } else { $line =~ s/\\q\b\s*(.+)/<l>$1<\/l>/; $line =~ s/\\qm?(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/; $line =~ s/\\qc\b\s*(.+)/<l type=\"x-centered\">$1<\/l>/; $line =~ s/\\qr\b\s*(.+)/<l type=\"x-right\">$1<\/l>/; $line =~ s/\\qs\b\s*(.+?)\s*\\qs\*/<l type="selah">$1<\/l>/; if (@filedata[$i+1] !~ /\\q(?![ta])/) { $line .= "\n<\/lg>"; $l = 0; } } $line =~ s/\s*\\qs\b\s*(.+?)\s*\\qs\*/<\/l>\n<l type="selah">$1/; } $line =~ s/\s*\\qs\b\s*(.+?)\s*\\qs\*\s*/<lg><l type="selah">$1<\/l><\/lg>/; ### Tables--Markers Supported: \tr, \th#, \tc#, \tcr# ####Markers Not Yet Supported: \thr# # \th table heading if ($line =~ /^\\t/) { if ($line =~ /^\\tr\b\s*(\\th.*)/) { $line = "$1"; if ($table != 1) { push (@outdata, "<table>\n"); $table = 1; } $line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g; $line = "<row>$line<\/row>"; } if ($line =~ /^\\tr\b\s*(\\tc.*)/) { $line = $1; if ($table != 1) { push (@outdata, "<table>\n"); $table = 1; } $line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/<cell>$1<\/cell>/g; $line = "<row>$line<\/row>"; if (@filedata[$i+1] !~ /\\tr/) { $line .= "<\/table>\n"; $table = 0; } } if ($line =~ /^\\th1\b\s*(.*)/) { if ($table != 1) { push (@outdata, "<table>\n"); $table = 1; } $line = "<row><cell role=\"label\">$1<\/cell>\n"; } elsif ($line =~ /^\\th\d+\b\s*(.*)/) { $line = "<cell role=\"label\">$1<\/cell>\n"; } if ($line =~ /^\\tb1\b\s*(.*)/) { if ($table != 1) { push (@outdata, "<table>\n"); $table = 1; } else { push (@outdata, "<\/row>"); } $line = "<row><cell>$1<\/cell>\n"; if (@filedata[$i+1] !~ /\\tb/) { $line .= "<\/row><\/table>\n"; $table = 0; } } elsif ($line =~ /^\\tb\d+\b\s*(.*)/) { $line = "<cell>$1<\/cell>\n"; if (@filedata[$i+1] !~ /\\tb/) { $line .= "<\/row><\/table>\n"; $table = 0; } } } sub parseRef { $ref = @_[0]; $ref =~ s/[:\.]\s*$//; $ref =~ s/:/\./g; $ref = "$book.$ref"; $ref =~ s/(\d+)\.(\d[^\,]+)\-(\d+)/$1.$2-$book.$1.$3/; $ref =~ s/(\d+)\.(\d[^\-]+)\-+\s*(\d.+)/$1.$2\-$book.$1.$3/; return $ref; } ### Footnotes--Markers Supported: \fk, \fq, \f...\f*, \fv, \ft, \fqa ####Markers Not Yet Supported: \fe...\fe*, \fr, \fl, \fp, \fdc...\fdc*, \fm...\fm* sub footnoteHandler { $note = @_[0]; $note = "<note>$note</note>"; # \fk Catch Words $note =~ s/\\fk\s(.+?)(\s*)\\fk\*/\\fX<catchWord>$1<\/catchWord>\\fX$2/g; $note =~ s/\\fk\s(.+?)(\s*)(?=\\f)/\\fX<catchWord>$1<\/catchWord>$2\\fX/g; $note =~ s/\\fk\*/\\fX/g; # \fq Quotations in Footnotes # CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document. $note =~ s/\\fq\s(.+?)(\s*)\\fq\*/\\fX<catchWord>$1<\/catchWord>\\fX$2/g; $note =~ s/\\fq\s(.+?)(\s*)(?=\\f)/\\fX<catchWord>$1<\/catchWord>$2\\fX/g; $note =~ s/\\fq\*/\\fX/g; # \fqa Alternate translations in Footnotes $note =~ s/\\fqa\s(.+?)\\fqa\*/\\fX<rdg type=\"alternate\">$1<\/rdg>\\fX/g; $note =~ s/\\fqa\s(.+?)(?=\\f)/\\fX<rdg type=\"alternate\">$1<\/rdg>\\fX/g; $note =~ s/\\fqa\*/\\fX/g; # \fv Footnote verse number $note =~ s/\\fv\s(.+?)\\fv\*/\\fX<reference osisID=\"$book.$chap.$1\">$1<\/reference>\\fX/g; $note =~ s/\\fv\s*(\d+)\b\s*(?=\\f)/\\fX<reference osisID=\"$book.$chap.$1\">$1<\/reference>\\fX/g; $note =~ s/\\fv\*/\\fX/g; # \fr Footnote origin reference (the verse where the fn appears) while ($note =~ /\\fr\s*(.+?)\s*(?=\\f)/) { $sourceVal = parseRef($1); $nFN++; # $note =~ s/\\fr\s*(.+?)\s*(?=\\f)//; $note =~ s/\\fr\s*//; $note =~ s/<note>/<note n="$nFN">/; } # \ft Footnote text $note =~ s/\\ft\s//g; $note =~ s/\\ft\*//g; # \f* Footnote closer $note =~ s/\s*\\f\*//; # \f Footnote opener $note =~ s/\\f\b\s*([^\s]\s*)?//; # \fX was inserted above to mark former locations of various already-handled markers, which can now be removed $note =~ s/\\fX//g; return $note; } $line =~ s/(\\f\b.+?\\f\*)/footnoteHandler($1)/eg; ### Crossreferences--Markers Supported: \x + \xo...\x*, \xk, \xq, \xt #### Markers Not Yet Supported: \xdc...\xdc* sub xrefHandler { $xref = @_[0]; $xref = "<note type=\"crossReference\">$xref</note>"; # \xk Catch Words $xref =~ s/\\xk\s(.+?)(\s*)\\xk\*/<catchWord>$1<\/catchWord>$2/g; $xref =~ s/\\xk\s(.+?)(\s*)(?=\\x)/<catchWord>$1<\/catchWord>$2/g; $xref =~ s/\\xk\*//g; # \xq Quotations in Footnotes # CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document. $xref =~ s/\\xq\s(.+?)(\s*)\\xq\*/<catchWord>$1<\/catchWord>$2/g; $xref =~ s/\\xq\s(.+?)(\s*)(?=\\x)/<catchWord>$1<\/catchWord>$2/g; $xref =~ s/\\xq\*//g; # \xo Footnote origin reference (the verse where the fn appears) while ($xref =~ /\\xo\s*(.+?)\s*(?=\\x)/) { $sourceVal = parseRef($1); $xFN++; # $xref =~ s/\\xo\s*(.+?)\s*(?=\\x)//; $xref =~ s/\\xo\s*//; $xref =~ s/<note type=\"crossReference\">/<note type=\"crossReference\" n="$xFN">/; } # \xt Crossref itself $xref =~ s/\\xt\s(.+?)\\xt\*/<reference>$1<\/reference>/g; $xref =~ s/\\xt\s(.+?)(?=\\x)/<reference>$1<\/reference>/g; $xref =~ s/\\xt\*//g; # \x* Footnote closer $xref =~ s/\\x\*//; # \x Footnote opener $xref =~ s/\\x\b\s*([^\s]\s*)?//; return $xref; } $line =~ s/(\\x\b.+?\\x\*)/xrefHandler($1)/eg; # crossReference osisRef="" $line =~ s/<reference osisRef="">([^<]+)<\/reference>/<reference osisRef="$1">$1<\/reference>/g; $line =~ s/osisRef="\s/osisRef="\s/g; $line =~ s/\s">/">/g; $line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period $line =~ s/<reference osisRef="([^\"]+):([^\"]+)"/<reference osisRef="$1\.$2"/g; # Gen 1:1 $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.(\d+)-(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$2\.$4"/g; # Gen 1:1-2 $line =~ s/<reference osisRef="([^\.\"]+).(\d+):(\d+)-(\d+).(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$4\.$5"/g; # Gen 1:1-2:3 $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>; <reference osisRef="(\d+)\.(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>; <reference osisRef="$1\.$5\.$6"/g; # Gen. 1:1, 2:3 $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>, <reference osisRef="(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>, <reference osisRef="$1\.$2\.$5"/g; # Gen. 1:1, 3 $line =~ s/<reference osisRef="([^\"\.]+)\.(\d+)"/<reference osisRef="$1\.1\.$2"/g; # Jude 1 ### Special Text and Character Styles--Markers Supported: \it...\it*, \nd...\nd*, \pn...\pn*, \tl...\tl*, \qt...\qt*, \add...\add*, \pb, \bk...\bk*, \sc..\sc*, \bd...\bd* #### Markers Not Yet Supported: Special Text: \k...\k*, \lit, \ord...\ord*, \sig...\sig*, \sls...\sls*, \wj...\wj*; Character Styling: \em...\em*, \bdit...\bdit*, \no...\no*; Spacing and Breaks: !$, //; Special Features: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh* # \dc...\dc*, inserted deuterocanonical text $line =~ s/\\dc\b\s*(.*?)\\dc\*/<transChange type=\"added\" editions=\"dc\">$1<\/transChange>/g; # \it...\it*, italic text $line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g; # \bd...\bd*, bold text $line =~ s/\\bd\b\s*(.*?)\\bd\*/<hi type=\"bold\">$1<\/hi>/g; # \bk...\bk*, book name in text $line =~ s/\\bk\b\s*(.*?)\\bk\*/<hi type=\"italic\">$1<\/hi>/g; # \sc...\sc*, small-caps character style (used within a line) $line =~ s/\\sc\b\s*(.*?)\\sc\*/<hi type="small-caps">$1<\/hi>/g; # \nd...\nd*, Divine Name $line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g; # \pn...\pn*, Proper name $line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g; # \tl...\tl*, Foreign Langauge (treated here merely as transliterated text) $line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g; # \add...\add*, text added for translation purposes $line =~ s/\\add\b\s*(.*?)\\add\*/<transChange type=\"added\">$1<\/transChange>/g; # \pb, page break $line =~ s/\\pb\b/<milestone type=\"pb\"\/>/g; ### Other (probably non-standard) items ### Markers Supported: \zelastic # \zelastic, elastic height marker for typesetting $line =~ s/\\zelastic\b//g; $line =~ s/_/ /g; ### End USFM 2.1 Items if ($line !~ /^\s*$/) { push (@outdata, "$line\n"); } } } if ($versClose =~ /<verse/) { push (@outdata, $versClose); # close verse $versClose = ""; } if ($chapClose =~ /<chapter/) { push (@outdata, $chapClose); # close chapter $chapClose = ""; } push (@outdata, closeTag("<\/osis>")); for ($i = 0; $i < scalar(@outdata); $i++) { #@outdata[$i] =~ s/---/â€•/g; # m-dash #@outdata[$i] =~ s/--/â€”/g; # n-dash @outdata[$i] =~ s/([es]ID=\"[^\" ]+) [^\"]*\"/$1\"/; } for ($i = 0; $i < scalar(@outdata); $i++) { if (@outdata[$i] !~ /^\s*$/) { @outdata[$i] =~ s/[\r\n]+/\n/g; @outdata[$i] =~ s/\n?$/\n/; print OUTF @outdata[$i]; } } close (OUTF); print "Doing some cleanup.\n"; open (INF, "<:utf8", "$outputFilename"); @filedata = <INF>; close (INF); open (OUTF, ">:utf8", "$outputFilename"); # bubble chapter down for ($i = 0; $i < scalar(@filedata); $i++) { if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) { $temp = @filedata[$i]; @filedata[$i] = @filedata[$i-1]; @filedata[$i-1] = $temp; $i -= 2; } } # bubble verse end up # CCL--this may require further attention, but works for the present for ($i = 0; $i < scalar(@filedata); $i++) { if (@filedata[$i-1] =~ /^(<title|<\/?div|<\/?p)/ && @filedata[$i] =~ /^<verse eID.+\/>/) { $temp = @filedata[$i]; @filedata[$i] = @filedata[$i-1]; @filedata[$i-1] = $temp; $i -= 2; } } for ($i = 0; $i < scalar(@filedata); $i++) { $fullfile .= @filedata[$i]; } $fullfile =~ s/<\/osisText>\n<\/osis>\n(<chapter eID[^>]+>)/$1\n<\/osisText>\n<\/osis>/mg; #swap the chapter back up one before the osisText closer $fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer print "Tagging quotations.\n"; $q = 1; $fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg; $fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg; $q = 1; while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) { $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg; } while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) { $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg; } $fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg; $fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg; $fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg; print OUTF $fullfile; close (OUTF); print "All done! OSIS file: $outputFilename\n";

\n"); # open current book openTag("<\/div type=\"book\">"); $line = ""; } # \h (running header--discard) if ($line =~ /^\\h\b/) { $line = ""; } # \ide Encoding (discard) if ($line =~ /^\\ide\b/) { $line = ""; } # \sts Status (discard) if ($line =~ /^\\sts\b/) { $line = ""; } # \rem Comments from translator (discard) if ($line =~ /^\\rem\b/) { $line = ""; } # \restore Version control comments from translator, not a formal USFM marker but used by paratext (discard) if ($line =~ /^\\restore\b/) { $line = ""; } # \toc1 Table of Contents (discard) if ($line =~ /^\\toc\d\b/) { $line = ""; } ### Introduction--Markers Supported: \imt#, \is#, \iot, \io#, \ip \ie #### Markers Not Yet Supported: \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \ior...\ior*, \iex, \imte # \it title (DCO: Commented out because \it is for italics not introduction titles in USFM 2.1) # if ($line =~ /^\\it\b\s*(.*)/) { # $line = "

\n$1<\/title>"; # openTag("<\/div>"); # } # \imt major title if ($line =~ /^\\imt\b\s*(.+)/) { $line = "<div type=\"introduction\">\n<title>$1<\/title>"; openTag("<\/div>"); } # \is introduction section title if ($line =~ /^\\is(\d*)\b\s*(.*)/) { $level = $1; if ($level eq "") { $level = "1"; } $line = "<div type=\"section\"><title>$2<\/title>"; openTag("<\/div>"); } # \iot introduction outline title if ($line =~ /^\\iot\b\s*(.*)/) { $line = "<div type=\"outline\">\n<title>$1<\/title>"; } # \io\d+ introduction outline item if ($line =~ /^\\io(\d+)\b\s*(.*)/) { if ($ollevel == $1) { $line = "<item>$2<\/item>"; } elsif ($ollevel > $1) { $line = ""; while ($ollevel > $1) { $line .= "<\/list><\/item>\n"; $ollevel--; } $line .= "<item>$2<\/item>"; } elsif ($ollevel < $1) { $line = ""; if ($ollevel != 0) { $line .= "<item>"; } while ($ollevel < $1) { $line .= "<list>\n"; $ollevel++; } $line .= "<item>$2<\/item>\n"; } if (@filedata[$i+1] !~ /^\\io/) { while ($ollevel > 0) { $line .= "\n<\/list>"; if ($ollevel > 1) {$line .= "<\/item>";} $ollevel--; } if ($ollevel == 0) { $line .= "\n<\/div>"; } } } # \ip introduction paragraph if ($line =~ /^\\ip\b\s*(.*)/) { $line = "<p>$1<\/p>"; } # \im introduction paragraph ('left flush' - NOT IMPLENTED FLUSH) if ($line =~ /^\\im\b\s*(.*)/) { $line = "<p>$1<\/p>"; } # \im introduction quotation (implemented as ordinary quotation) if ($line =~ /^\\imq\b\s*(.*)/) { $line = "<q>$1<\/q>"; } # \iq line (including \iq#), adapted from \q (see below), needs more clean-up if ($line =~ /^\\iq/) { if ($l != 1) { push (@outdata, "<lg>\n"); $l = 1; } if ($line =~ /\\iq(\d*)$/) { if ($1 eq "") { $line = "<l>\n"; } else { $line = "<l level=\"$1\">\n"; } @filedata[$i+1] .= "<\/l>"; if (@filedata[$i+2] !~ /\\iq(?!t)/) { @filedata[$i+1] .= "\n<\/lg>"; $l = 0; } } else { $line =~ s/\\iq\b\s*(.+)/<l>$1<\/l>/; $line =~ s/\\iq(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/; if (@filedata[$i+1] !~ /\\iq(?![ta])/) { $line .= "\n<\/lg>"; $l = 0; } } } # \ie introduction end (discard) if ($line =~ /^\\ie\b/) { $line = ""; } ### Titles, Headings, and Labels (elsewhere?)--Markers Supported: \d, \ms#, \s#, \mt#, \r, \sp, \rq..\rq* #### Markers Not Yet Supported: \mte#, \mr, \sr # \ms majorSection if ($line =~ /^\\ms\b\s*(.+)/) { push (@outdata, closeTag("<\/p>")); push (@outdata, closeTag("<\/div type=\"majorSection\">")); push (@outdata, "<div type=\"majorSection\">\n"); openTag("<\/div type=\"majorSection\">"); $line =~ s/\\ms\b\s*(.+)/<title>$1<\/title>/; } # \d canonical title if ($line =~ /^\\(ms|d)\b\s*(.+)/) { push (@outdata, closeTag("<\/p>")); $line =~ s/\\d\b\s*(.+)/<title type=\"canonical\">$1<\/title>/; } # \s \s1 section (From Chapters and Verses) if ($line =~ /^\\s1?\b\s*(.+)/) { push (@outdata, closeTag("<\/p>")); push (@outdata, closeTag("<\/div type=\"section\">")); push (@outdata, "<div type=\"section\">\n"); openTag("<\/div type=\"section\">"); $line =~ s/\\s1?\b\s*(.+)/<title>$1<\/title>/; if ($line =~ /HEBREW TITLE/) { $line =~ s/<title>/<title type=\"psalm\" canonical=\"true\">/; } } # \ss \s2 subSection (From Chapters and Verses) if ($line =~ /^\\s[s2]\b\s*(.+)/) { push (@outdata, closeTag("<\/p>")); push (@outdata, closeTag("<\/div type=\"subSection\">")); push (@outdata, "<div type=\"subSection\">\n"); openTag("<\/div type=\"subSection\">"); $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/; } # \sss \s3 x-subSubSection (From Chapters and Verses) # This will also handle deeper levels (4+) of subsections. if ($line =~ /^\\s(ss|\d+)\b\s*(.+)/) { my $ssLvl = $1; my $ssType = ""; while ($ssLvl > 2) { $ssLvl--; $ssType .= "Sub"; } $ssType = "x-sub" . $ssType . "Section"; push (@outdata, closeTag("<\/p>")); push (@outdata, closeTag("<\/div type=\"$ssType\">")); push (@outdata, "<div type=\"$ssType\">\n"); openTag("<\/div type=\"$ssType\">"); $line =~ s/\\s(ss|\d+)\b\s*(.+)/<title>$2<\/title>/; } # \mt\mt1 title if ($line =~ /^\\mt[1234]?\b\s*(.+)/) { $line = "<title type=\"main\">$1<\/title>"; } # \mt2 title if ($line =~ /^\\mt2\b\s*(.+)/) { $line = "<title type=\"continued\">$1<\/title>"; } # \st,\st2 title if ($line =~ /^\\st2?\b\s*(.+)/) { $line = "<title type=\"continued\">$1<\/title>"; } # \st3 title if ($line =~ /^\\st3\b\s*(.+)/) { $line = "<title type=\"sub\">$1<\/title>"; } # \r sub title if ($line =~ /^\\mr\b\s*(.+)/) { $line = "<title type=\"sub\">$1<\/title>"; } # \r parallel title if ($line =~ /^\\r\b\s*(.+)/) { $line = "<title type=\"parallel\">$1<\/title>"; } # \sp speaker if ($line =~ /^\\sp\b\s*(.+)/) { $line = "<speaker>$1<\/speaker>"; } # \rq..\rq* inline reference $line =~ s/\\rq( .*?)\\rq\*/<reference>$1<\/reference>/g; ### Chapters and Verses--Markers Supported: \c, \v, \vp...\vp*, \cl #### Markers Not Yet Supported: \ca...\ca*, \cp, \cd, \va...\va* # \c chapter if ($line =~ /^\\c\b\s*([^ ]*)/) { if ($1 ne "") { $chap = $1; } else { $chap++; } push (@outdata, $versClose); $versClose = ""; if ($moduleType eq "bible") { push (@outdata, closeTag("<\/p>")) } if ($chapClose =~ /<chapter/) { push (@outdata, $chapClose); # close previous chapter $chapClose = ""; } else { push (@outdata, closeTag("<\/div>")); # close introduction div } push (@outdata, "<chapter sID=\"$book.$chap\" osisID=\"$book.$chap\"\/>\n"); $chapClose = "<chapter eID=\"$book.$chap\"\/>\n"; $line =~ s/\\c\b\s*([^ ]*)//; } # \cl chapter label if ($line =~ /^\\cl\b\s*(.*)/) { $line = "<title>$1<\/title>"; } # \v verse if ($line =~ /^\\v\b\s*(\d[^\\ ]*)?/) { if ($1 ne "") { $vers = $1; } else { $vers++; } push (@outdata, $versClose); $divOpen=false; $versClose = ""; if ($vers =~ /(\d+[^\\\- ]*)\-(\d+[^\\ ]*)/) { $vF = $1; $vT = $2; $vF =~ /^(\d+)/; $vFn = scalar($1); $vT =~ /^(\d+)/; $vTn = scalar($1); $osisID = "$book.$chap.$vF"; if ($vTn > $vFn && $vFn > 0) { for ($j = $vFn + 1; $j < $vTn; $j++) { $osisID .=" $book.$chap.$j"; } } $osisID .= " $book.$chap.$vT"; } else { $osisID = "$book.$chap.$vers"; } if ($moduleType eq "bible") { push (@outdata, "<verse sID=\"$osisID\" osisID=\"$osisID\"\/>\n"); $versClose = "<verse eID=\"$osisID\"\/>\n"; $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//; } elsif ($moduleType eq "comment") { closeTag("<\/p>"); push (@outdata, "<div type=\"section\" annotateType=\"commentary\" annotateRef=\"$osisID\">\n"); $versClose = "<\/p>\n<\/div>\n"; $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//; $divOpen=true; } else { print "usfm2osis.pl supports only the module types \"bible\" and \"comment\" \n"; exit 1; } } # \vp...\vp# published verse numbers (just delete for now) $line =~ s/\\vp\*\s*//g; $line =~ s/\\vp\b\s*(\d+[a-z]?|[a-z])\s*//g; ### Paragraphs--Markers Supported: \p, \b, \m, \nb, \cls #### Markers Not Yet Supported: \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \li#, \pc, \pr, \ph#, \b # Hack to solve an issue in a module that used <R> for linebreaks in the usfm files--may be commented out (not USFM 2.1) $line =~ s/\\lb\*/<lb \/>/g; # \p paragraph (From Chapters and Verses) if ($line =~ /^\\p\b\s*/) { if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){ push (@outdata, closeTag("<\/p>")); } push (@outdata, "<p>\n"); openTag("<\/p>"); $line =~ s/\\p\b\s*//; } # \pc paragraph centered (From Chapters and Verses) if ($line =~ /^\\pc\b\s*/) { if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){ push (@outdata, closeTag("<\/p>")); } push (@outdata, "<p type=\"x-center\">\n"); openTag("<\/p>"); $line =~ s/\\pc\b\s*//; } # \mi2 paragraph flush left, no indentation if ($line =~ /^\\mi2\b\s*/) { if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){ push (@outdata, closeTag("<\/p>")); } push (@outdata, "<p type=\"x-noindent\">\n"); openTag("<\/p>"); $line =~ s/\\mi2\b\s*//; } # \cls paragraph (From Chapters and Verses) if ($line =~ /^\\cls\b\s*/) { if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){ push (@outdata, closeTag("<\/closer>")); } push (@outdata, "<closer>\n"); openTag("<\/closer>"); $line =~ s/\\cls\b\s*//; } # \b $line =~ s/\\b\b//; # \m $line =~ s/\\m\b//; # \nb $line =~ s/\\nb\b//; ### Poetry--Markers Supported: \q#, \qs...\qs*, \qr, \qc, \qac...\qac*, \qa, \qm# #### Markers Not Yet Supported: \b # \qt...\qt*, OT quotation (handle early) $line =~ s/\\qt\b\s*(.*?)\\qt\*/<seg type="otPassage">$1<\/seg>/g; # \qa, acrostic heading $line =~ s/^\\qa\b\s*(.*)/<title type=\"acrostic\">$1<\/title>/g; # \qac...\qac*, acrostic character style (used within a line) $line =~ s/\\qac\b\s*(.*?)\\qac\*/<hi type="acrostic">$1<\/hi>/g; # \q line (including \q#, \qr, \qc, and \qs...\qs*) if ($line =~ /^\\q/) { if ($l != 1) { push (@outdata, "<lg>\n"); $l = 1; } if ($line =~ /\\qm?(c|r|\d*)$/) { if ($1 eq "") { $line = "<l>\n"; } elsif ($1 eq "c") { $line = "<l type=\"x-centered\">"; } elsif ($1 eq "r") { $line = "<l type=\"x-right\">"; } else { $line = "<l level=\"$1\">\n"; } @filedata[$i+1] .= "<\/l>"; if (@filedata[$i+2] !~ /\\q(?!t)/) { @filedata[$i+1] .= "\n<\/lg>"; $l = 0; } } else { $line =~ s/\\q\b\s*(.+)/<l>$1<\/l>/; $line =~ s/\\qm?(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/; $line =~ s/\\qc\b\s*(.+)/<l type=\"x-centered\">$1<\/l>/; $line =~ s/\\qr\b\s*(.+)/<l type=\"x-right\">$1<\/l>/; $line =~ s/\\qs\b\s*(.+?)\s*\\qs\*/<l type="selah">$1<\/l>/; if (@filedata[$i+1] !~ /\\q(?![ta])/) { $line .= "\n<\/lg>"; $l = 0; } } $line =~ s/\s*\\qs\b\s*(.+?)\s*\\qs\*/<\/l>\n<l type="selah">$1/; } $line =~ s/\s*\\qs\b\s*(.+?)\s*\\qs\*\s*/<lg><l type="selah">$1<\/l><\/lg>/; ### Tables--Markers Supported: \tr, \th#, \tc#, \tcr# ####Markers Not Yet Supported: \thr# # \th table heading if ($line =~ /^\\t/) { if ($line =~ /^\\tr\b\s*(\\th.*)/) { $line = "$1"; if ($table != 1) { push (@outdata, "<table>\n"); $table = 1; } $line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g; $line = "<row>$line<\/row>"; } if ($line =~ /^\\tr\b\s*(\\tc.*)/) { $line = $1; if ($table != 1) { push (@outdata, "<table>\n"); $table = 1; } $line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/<cell>$1<\/cell>/g; $line = "<row>$line<\/row>"; if (@filedata[$i+1] !~ /\\tr/) { $line .= "<\/table>\n"; $table = 0; } } if ($line =~ /^\\th1\b\s*(.*)/) { if ($table != 1) { push (@outdata, "<table>\n"); $table = 1; } $line = "<row><cell role=\"label\">$1<\/cell>\n"; } elsif ($line =~ /^\\th\d+\b\s*(.*)/) { $line = "<cell role=\"label\">$1<\/cell>\n"; } if ($line =~ /^\\tb1\b\s*(.*)/) { if ($table != 1) { push (@outdata, "<table>\n"); $table = 1; } else { push (@outdata, "<\/row>"); } $line = "<row><cell>$1<\/cell>\n"; if (@filedata[$i+1] !~ /\\tb/) { $line .= "<\/row><\/table>\n"; $table = 0; } } elsif ($line =~ /^\\tb\d+\b\s*(.*)/) { $line = "<cell>$1<\/cell>\n"; if (@filedata[$i+1] !~ /\\tb/) { $line .= "<\/row><\/table>\n"; $table = 0; } } } sub parseRef { $ref = @_[0]; $ref =~ s/[:\.]\s*$//; $ref =~ s/:/\./g; $ref = "$book.$ref"; $ref =~ s/(\d+)\.(\d[^\,]+)\-(\d+)/$1.$2-$book.$1.$3/; $ref =~ s/(\d+)\.(\d[^\-]+)\-+\s*(\d.+)/$1.$2\-$book.$1.$3/; return $ref; } ### Footnotes--Markers Supported: \fk, \fq, \f...\f*, \fv, \ft, \fqa ####Markers Not Yet Supported: \fe...\fe*, \fr, \fl, \fp, \fdc...\fdc*, \fm...\fm* sub footnoteHandler { $note = @_[0]; $note = "<note>$note</note>"; # \fk Catch Words $note =~ s/\\fk\s(.+?)(\s*)\\fk\*/\\fX<catchWord>$1<\/catchWord>\\fX$2/g; $note =~ s/\\fk\s(.+?)(\s*)(?=\\f)/\\fX<catchWord>$1<\/catchWord>$2\\fX/g; $note =~ s/\\fk\*/\\fX/g; # \fq Quotations in Footnotes # CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document. $note =~ s/\\fq\s(.+?)(\s*)\\fq\*/\\fX<catchWord>$1<\/catchWord>\\fX$2/g; $note =~ s/\\fq\s(.+?)(\s*)(?=\\f)/\\fX<catchWord>$1<\/catchWord>$2\\fX/g; $note =~ s/\\fq\*/\\fX/g; # \fqa Alternate translations in Footnotes $note =~ s/\\fqa\s(.+?)\\fqa\*/\\fX<rdg type=\"alternate\">$1<\/rdg>\\fX/g; $note =~ s/\\fqa\s(.+?)(?=\\f)/\\fX<rdg type=\"alternate\">$1<\/rdg>\\fX/g; $note =~ s/\\fqa\*/\\fX/g; # \fv Footnote verse number $note =~ s/\\fv\s(.+?)\\fv\*/\\fX<reference osisID=\"$book.$chap.$1\">$1<\/reference>\\fX/g; $note =~ s/\\fv\s*(\d+)\b\s*(?=\\f)/\\fX<reference osisID=\"$book.$chap.$1\">$1<\/reference>\\fX/g; $note =~ s/\\fv\*/\\fX/g; # \fr Footnote origin reference (the verse where the fn appears) while ($note =~ /\\fr\s*(.+?)\s*(?=\\f)/) { $sourceVal = parseRef($1); $nFN++; # $note =~ s/\\fr\s*(.+?)\s*(?=\\f)//; $note =~ s/\\fr\s*//; $note =~ s/<note>/<note n="$nFN">/; } # \ft Footnote text $note =~ s/\\ft\s//g; $note =~ s/\\ft\*//g; # \f* Footnote closer $note =~ s/\s*\\f\*//; # \f Footnote opener $note =~ s/\\f\b\s*([^\s]\s*)?//; # \fX was inserted above to mark former locations of various already-handled markers, which can now be removed $note =~ s/\\fX//g; return $note; } $line =~ s/(\\f\b.+?\\f\*)/footnoteHandler($1)/eg; ### Crossreferences--Markers Supported: \x + \xo...\x*, \xk, \xq, \xt #### Markers Not Yet Supported: \xdc...\xdc* sub xrefHandler { $xref = @_[0]; $xref = "<note type=\"crossReference\">$xref</note>"; # \xk Catch Words $xref =~ s/\\xk\s(.+?)(\s*)\\xk\*/<catchWord>$1<\/catchWord>$2/g; $xref =~ s/\\xk\s(.+?)(\s*)(?=\\x)/<catchWord>$1<\/catchWord>$2/g; $xref =~ s/\\xk\*//g; # \xq Quotations in Footnotes # CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document. $xref =~ s/\\xq\s(.+?)(\s*)\\xq\*/<catchWord>$1<\/catchWord>$2/g; $xref =~ s/\\xq\s(.+?)(\s*)(?=\\x)/<catchWord>$1<\/catchWord>$2/g; $xref =~ s/\\xq\*//g; # \xo Footnote origin reference (the verse where the fn appears) while ($xref =~ /\\xo\s*(.+?)\s*(?=\\x)/) { $sourceVal = parseRef($1); $xFN++; # $xref =~ s/\\xo\s*(.+?)\s*(?=\\x)//; $xref =~ s/\\xo\s*//; $xref =~ s/<note type=\"crossReference\">/<note type=\"crossReference\" n="$xFN">/; } # \xt Crossref itself $xref =~ s/\\xt\s(.+?)\\xt\*/<reference>$1<\/reference>/g; $xref =~ s/\\xt\s(.+?)(?=\\x)/<reference>$1<\/reference>/g; $xref =~ s/\\xt\*//g; # \x* Footnote closer $xref =~ s/\\x\*//; # \x Footnote opener $xref =~ s/\\x\b\s*([^\s]\s*)?//; return $xref; } $line =~ s/(\\x\b.+?\\x\*)/xrefHandler($1)/eg; # crossReference osisRef="" $line =~ s/<reference osisRef="">([^<]+)<\/reference>/<reference osisRef="$1">$1<\/reference>/g; $line =~ s/osisRef="\s/osisRef="\s/g; $line =~ s/\s">/">/g; $line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period $line =~ s/<reference osisRef="([^\"]+):([^\"]+)"/<reference osisRef="$1\.$2"/g; # Gen 1:1 $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.(\d+)-(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$2\.$4"/g; # Gen 1:1-2 $line =~ s/<reference osisRef="([^\.\"]+).(\d+):(\d+)-(\d+).(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$4\.$5"/g; # Gen 1:1-2:3 $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>; <reference osisRef="(\d+)\.(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>; <reference osisRef="$1\.$5\.$6"/g; # Gen. 1:1, 2:3 $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>, <reference osisRef="(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>, <reference osisRef="$1\.$2\.$5"/g; # Gen. 1:1, 3 $line =~ s/<reference osisRef="([^\"\.]+)\.(\d+)"/<reference osisRef="$1\.1\.$2"/g; # Jude 1 ### Special Text and Character Styles--Markers Supported: \it...\it*, \nd...\nd*, \pn...\pn*, \tl...\tl*, \qt...\qt*, \add...\add*, \pb, \bk...\bk*, \sc..\sc*, \bd...\bd* #### Markers Not Yet Supported: Special Text: \k...\k*, \lit, \ord...\ord*, \sig...\sig*, \sls...\sls*, \wj...\wj*; Character Styling: \em...\em*, \bdit...\bdit*, \no...\no*; Spacing and Breaks: !$, //; Special Features: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh* # \dc...\dc*, inserted deuterocanonical text $line =~ s/\\dc\b\s*(.*?)\\dc\*/<transChange type=\"added\" editions=\"dc\">$1<\/transChange>/g; # \it...\it*, italic text $line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g; # \bd...\bd*, bold text $line =~ s/\\bd\b\s*(.*?)\\bd\*/<hi type=\"bold\">$1<\/hi>/g; # \bk...\bk*, book name in text $line =~ s/\\bk\b\s*(.*?)\\bk\*/<hi type=\"italic\">$1<\/hi>/g; # \sc...\sc*, small-caps character style (used within a line) $line =~ s/\\sc\b\s*(.*?)\\sc\*/<hi type="small-caps">$1<\/hi>/g; # \nd...\nd*, Divine Name $line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g; # \pn...\pn*, Proper name $line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g; # \tl...\tl*, Foreign Langauge (treated here merely as transliterated text) $line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g; # \add...\add*, text added for translation purposes $line =~ s/\\add\b\s*(.*?)\\add\*/<transChange type=\"added\">$1<\/transChange>/g; # \pb, page break $line =~ s/\\pb\b/<milestone type=\"pb\"\/>/g; ### Other (probably non-standard) items ### Markers Supported: \zelastic # \zelastic, elastic height marker for typesetting $line =~ s/\\zelastic\b//g; $line =~ s/_/ /g; ### End USFM 2.1 Items if ($line !~ /^\s*$/) { push (@outdata, "$line\n"); } } } if ($versClose =~ /<verse/) { push (@outdata, $versClose); # close verse $versClose = ""; } if ($chapClose =~ /<chapter/) { push (@outdata, $chapClose); # close chapter $chapClose = ""; } push (@outdata, closeTag("<\/osis>")); for ($i = 0; $i < scalar(@outdata); $i++) { #@outdata[$i] =~ s/---/â€•/g; # m-dash #@outdata[$i] =~ s/--/â€”/g; # n-dash @outdata[$i] =~ s/([es]ID=\"[^\" ]+) [^\"]*\"/$1\"/; } for ($i = 0; $i < scalar(@outdata); $i++) { if (@outdata[$i] !~ /^\s*$/) { @outdata[$i] =~ s/[\r\n]+/\n/g; @outdata[$i] =~ s/\n?$/\n/; print OUTF @outdata[$i]; } } close (OUTF); print "Doing some cleanup.\n"; open (INF, "<:utf8", "$outputFilename"); @filedata = <INF>; close (INF); open (OUTF, ">:utf8", "$outputFilename"); # bubble chapter down for ($i = 0; $i < scalar(@filedata); $i++) { if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) { $temp = @filedata[$i]; @filedata[$i] = @filedata[$i-1]; @filedata[$i-1] = $temp; $i -= 2; } } # bubble verse end up # CCL--this may require further attention, but works for the present for ($i = 0; $i < scalar(@filedata); $i++) { if (@filedata[$i-1] =~ /^(<title|<\/?div|<\/?p)/ && @filedata[$i] =~ /^<verse eID.+\/>/) { $temp = @filedata[$i]; @filedata[$i] = @filedata[$i-1]; @filedata[$i-1] = $temp; $i -= 2; } } for ($i = 0; $i < scalar(@filedata); $i++) { $fullfile .= @filedata[$i]; } $fullfile =~ s/<\/osisText>\n<\/osis>\n(<chapter eID[^>]+>)/$1\n<\/osisText>\n<\/osis>/mg; #swap the chapter back up one before the osisText closer $fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer print "Tagging quotations.\n"; $q = 1; $fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg; $fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg; $q = 1; while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) { $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg; } while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) { $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg; } $fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg; $fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg; $fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg; print OUTF $fullfile; close (OUTF); print "All done! OSIS file: $outputFilename\n";