summaryrefslogtreecommitdiffstats
path: root/modules/perlconverters/usfm2osis.pl
diff options
context:
space:
mode:
authorChris Little <chrislit@crosswire.org>2009-02-08 07:57:17 +0000
committerChris Little <chrislit@crosswire.org>2009-02-08 07:57:17 +0000
commitd36cf1a52887a13cecb0c4bb064d0c142094c348 (patch)
treec9633709baa95fa6ae555272fe7669f5b051ddb5 /modules/perlconverters/usfm2osis.pl
parent839e9b99dee4fc1697b44e512083f002bb57e796 (diff)
downloadsword-tools-d36cf1a52887a13cecb0c4bb064d0c142094c348.tar.gz
some changes to handle Welsh Bible
notably, handles footnotes/crossrefs more flexibly git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@149 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/perlconverters/usfm2osis.pl')
-rw-r--r--modules/perlconverters/usfm2osis.pl1567
1 files changed, 802 insertions, 765 deletions
diff --git a/modules/perlconverters/usfm2osis.pl b/modules/perlconverters/usfm2osis.pl
index d140f7c..5747e18 100644
--- a/modules/perlconverters/usfm2osis.pl
+++ b/modules/perlconverters/usfm2osis.pl
@@ -1,765 +1,802 @@
-#!/usr/bin/perl
-
-## USFM to OSIS (2.1.1) converter
-
-## Licensed under the standard BSD license:
-
-# Copyright (c) 2002-2008 CrossWire Bible Society <http://www.crosswire.org/>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of the CrossWire Bible Society nor the names of
-# its contributors may be used to endorse or promote products
-# derived from this software without specific prior written
-# permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-## For general inquiries, comments, suggestions, bug reports, etc. email:
-## sword-support@crosswire.org
-
-#########################################################################
-
-# Stores the script version and date
-$version = "1.4";
-$date = "2008-07-04";
-# Sets the version of OSIS used in the OSIS header
-$osisVersion = "2.1.1";
-# Stores the USFM Version
-$usfmVersion = "2.1"; # The USFM reference document can be found at http://confluence.ubs-icap.org/display/USFM/Home;jsessionid=97071C5C1E562036A1CAF4FF77147565 (as of 2008-07-07)
-
-# This is the hash which maps the conversion of USFM book abbreviations to OSIS book abbreviations. ***I would like to add the ability to access an external file to provide options for other languages. In other words, in preparing a USFM file for conversion, a separate file could be created which could be used to map the conversion of abbreviated book names in other languages to OSIS. This would be especially useful for cross-references, but I haven't figured out how to do it yet.
-%OSISbook = (
-# Theoretically, these are laid out according to <BooksPresent>, but I can really only guess without a spec ***Need to check
-"" => "", "GEN" => "Gen", "EXO" => "Exod", "LEV" => "Lev", "NUM" => "Num",
- "DEU" => "Deut", "JOS" => "Josh", "JDG" => "Judg", "RUT" => "Ruth",
- "1SA" => "1Sam", "2SA" => "2Sam", "1KI" => "1Kgs", "2KI" => "2Kgs",
- "1CH" => "1Chr", "2CH" => "2Chr", "EZR" => "Ezra", "NEH" => "Neh",
- "EST" => "Esth", "JOB" => "Job", "PSA" => "Ps", "PRO" => "Prov",
- "ECC" => "Eccl", "SNG" => "Song", "ISA" => "Isa", "JER" => "Jer",
- "LAM" => "Lam", "EZK" => "Ezek", "DAN" => "Dan", "HOS" => "Hos",
- "JOL" => "Joel", "AMO" => "Amos", "OBA" => "Obad", "JON" => "Jonah",
- "MIC" => "Mic", "NAM" => "Nah", "HAB" => "Hab", "ZEP" => "Zeph",
- "HAG" => "Hag", "ZEC" => "Zech", "MAL" => "Mal", "MAT" => "Matt",
- "MRK" => "Mark", "LUK" => "Luke", "JHN" => "John", "ACT" => "Acts",
- "ROM" => "Rom", "1CO" => "1Cor", "2CO" => "2Cor", "GAL" => "Gal",
- "EPH" => "Eph", "PHP" => "Phil", "COL" => "Col", "1TH" => "1Thess",
- "2TH" => "2Thess", "1TI" => "1Tim", "2TI" => "2Tim", "TIT" => "Titus",
- "PHM" => "Phlm", "HEB" => "Heb", "JAS" => "Jas", "1PE" => "1Pet",
- "2PE" => "2Pet", "1JN" => "1John", "2JN" => "2John", "3JN" => "3John",
- "JUD" => "Jude", "REV" => "Rev", "TOB" => "Tob", "JDT" => "Jdt",
- "ESG" => "Esth", "WIS" => "Wis", "SIR" => "Sir", "BAR" => "Bar",
- "LJE" => "EpJer", "S3Y" => "PrAzar", "SUS" => "Sus", "BEL" => "Bel",
- "1MA" => "1Macc", "2MA" => "2Macc", "3MA" => "3Macc", "4MA" => "4Macc",
- "1ES" => "1Esd", "2ES" => "2Esd", "MAN" => "PrMan",
-# Following this is just an uneducated guess
- "PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
- "JSB" => "Josh", "TBS" => "Tob", "SST" => "Sus", "DNT" => "Dan",
- "BLT" => "Bel", "ADE" => "AddEsth"
- );
-
-# Generates a list of available encodings.
-use Encode;
-@encodingList = Encode->encodings(":all");
-foreach $enc (@encodingList) {
- $encodings .= "$enc, ";
-}
-$encodings =~ s/\, $//;
-
-# Syntax instructions
-if (scalar(@ARGV) < 2) {
- print "\nusfm2osis.pl -- USFM $usfmVersion to OSIS $osisVersion converter version $version ($date)\n\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] [-e USFM encoding] <USFM filenames|wildcard>\n";
- print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
- print "- The osisWork is a short name with no spaces which will identify your module.\n";
- print "- If no -o option is specified for the output filename, the default output file is: \n\tosisWork.osis.xml.\n";
- print "- Supported encodings include:\n\t$encodings\n";
- print "- If the encoding is omitted, utf8 is the default value.\n";
- print "- USFM filenames with the SFM extension can be accessed using a wildcard: \n\t*.SFM\n";
- print "As an example, if you want to generate the osisWork <bible> and your USFM files are encoded in utf8, located in the /Bible folder relative to this script with the file extension SFM, enter:\n\tperl usfm2osis.pl bible Bible/*.SFM\n\n";
- exit (-1);
-}
-
-$osisWork = $ARGV[0];
-
-$nextarg = 1;
-
-if ($ARGV[$nextarg] eq "-o") {
- $outputFilename = "$ARGV[$nextarg+1]";
- $nextarg += 2;
-}
-else {
- $outputFilename = "$osisWork.osis.xml";
-}
-open (OUTF, , ">:utf8", "$outputFilename") or die "Could not open file $ARGV[2] for writing.";
-
-if ($ARGV[$nextarg] eq "-e") {
- $inputEncoding = "$ARGV[$nextarg+1]";
- $nextarg += 2;
-}
-else {
- $inputEncoding = "utf8";
-}
-$encFound = 0;
-foreach $enc (@encodingList) {
- if ($enc eq $inputEncoding) {
- $encFound = 1;
- }
-}
-if ($encFound == 0) {
- die "Encoding $inputEncoding not supported.\nSupported encodings include:\n\t$encodings\n";
-}
-else {
- print "Encoding \"$inputEncoding\" is supported.\n"
-}
-
-for (; $nextarg < scalar(@ARGV); $nextarg++) {
- push(@files, $ARGV[$nextarg]);
-}
-
-push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
-
-$tagStack = "<\/osisText><\/osis>";
-$chapClose = "";
-$versClose = "";
-
-sub closeTag {
- $tag = @_[0];
-
- if ($tagStack =~ /$tag/) {
- $tagStack =~ s/^(.*?$tag)//;
- $taglist = $1;
- $taglist =~ s/>/>\n/g;
- $taglist =~ s/(<\/\w+)\s+[^>]+>/$1>/g;
- return $taglist;
- }
- else {
- return:
- }
-}
-
-sub openTag {
- $tag = @_[0];
- $tagStack = $tag . $tagStack;
- return;
-}
-
-foreach $file (@files) {
- print "Processing $file.\n";
- open (SFM, "$file");
- my @filedata = "";
- while (<SFM>) {
- my $sfline;
- $sfline = decode($inputEncoding, $_);
- push (@filedata, $sfline);
- }
- close (SFM);
-
- $ollevel = 0;
- $vers = 0;
- $chap = 0;
- $book = "";
- # Sets the initial value for the attribute "n" in footnotes.
- $nFN = 0;
- # Creates array for the attribute "n" in cross-references
- @nCR = (a .. z);
- # Sets the initial value for the attribute "n" in cross-references.
- $nCR = @nCR [0];
-
- #encoding stuff
- for ($i = 0; $i < scalar(@filedata); $i++) {
- $line = @filedata[$i];
- $line =~ s/[\r\n]//g;
-
- ### Basic XML entity encoding
- $line =~ s/&(?![a-zA-Z0-9])/&amp;/g;
- $line =~ s/<< ?/\@/g;
- $line =~ s/>>/\#/g;
- $line =~ s/</\$/g;
- $line =~ s/>/\%/g;
-
- $line =~ s/(\w)\'(\w)/$1ʼ$2/g;
- $line =~ s/\\fr 1\/2 \\fr\*/½/g;
-
- @filedata[$i] = $line;
- }
-
- for ($i = 0; $i < scalar(@filedata); $i++) {
- $line = @filedata[$i];
-
- ### File Identification--Markers Supported: \id, \h, \ide, \sts, \rem, \toc1, \toc2, \toc3
-
- $line =~ s/\\v\b\s+(\d+)(\-\d+|\s*\\v\b\s+\d+)\s*\\v\b\s+(\d+)/\\v $1\-$3/;
- $line =~ s/\\v\b\s+(\d+)\s*\\v\b\s+(\d+\-)?(\d+)/\\v $1\-$3/;
- $line =~ s/^\\(p[is]|mi)\b/\\p/;
- $line =~ s/^\\li\b/\\p/; #\li isn't part of USFM, so we'll make it \p
-
- # \id (book marker)
- if ($line =~ /^\\id\b\s*([^ ]*)/) {
- $book = $OSISbook{$1};
- $chap = 0;
- if ($versClose =~ /<verse/) {
- push (@outdata, $versClose); # close verse
- $versClose = "";
- }
-# push (@outdata, closeTag("<\/div[^>]*?>")); # close section
- if ($chapClose =~ /<chapter/) {
- push (@outdata, $chapClose); # close chapter
- $chapClose = "";
- }
-
- push (@outdata, closeTag("<\/div type=\"book\">")); #close book
- if ($book eq "") {
- $book = "UnknownUSFMBook";
- }
- push (@outdata, "<div type=\"book\" osisID=\"$book\">\n"); # open current book
- openTag("<\/div type=\"book\">");
- $line = "";
- }
-
- # \h (running header--discard)
- if ($line =~ /^\\h\b/) {
- $line = "";
- }
-
- # \ide Encoding (discard)
- if ($line =~ /^\\ide\b/) {
- $line = "";
- }
-
- # \sts Status (discard)
- if ($line =~ /^\\sts\b/) {
- $line = "";
- }
-
- # \rem Comments from translator (discard)
- if ($line =~ /^\\rem\b/) {
- $line = "";
- }
-
- # \toc1 Table of Contents (discard)
- if ($line =~ /^\\toc\d\b/) {
- $line = "";
- }
-
- ### Introduction--Markers Supported: \imt#, \is#, \iot, \io#, \ip
- #### Markers Not Yet Supported: \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \ior...\ior*, \iex, \imte, \ie
-
- # \it title (DCO: Commented out because \it is for italics not introduction titles in USFM 2.1)
-# if ($line =~ /^\\it\b\s*(.*)/) {
-# $line = "<div type=\"introduction\">\n<title>$1<\/title>";
-# openTag("<\/div>");
-# }
-
- # \imt major title
- if ($line =~ /^\\imt\b\s*(.+)/) {
- $line = "<div type=\"introduction\">\n<title>$1<\/title>";
- openTag("<\/div>");
- }
-
-
- # \is introduction section title
- if ($line =~ /^\\is(\d*)\b\s*(.*)/) {
- $level = $1;
- if ($level eq "") {
- $level = "1";
- }
- $line = "<div type=\"section\"><title>$2<\/title>";
- openTag("<\/div>");
- }
-
- # \iot introduction outline title
- if ($line =~ /^\\iot\b\s*(.*)/) {
- $line = "<div type=\"outline\">\n<title>$1<\/title>";
- }
-
- # \io\d+ introduction outline item
- if ($line =~ /^\\io(\d+)\b\s*(.*)/) {
- if ($ollevel == $1) {
- $line = "<item>$2<\/item>";
- }
- elsif ($ollevel > $1) {
- $line = "";
- while ($ollevel > $1) {
- $line .= "<\/list><\/item>\n";
- $ollevel--;
- }
- $line .= "<item>$2<\/item>";
- }
- elsif ($ollevel < $1) {
- $line = "";
- if ($ollevel != 0) {
- $line .= "<item>";
- }
- while ($ollevel < $1) {
- $line .= "<list>\n";
- $ollevel++;
- }
- $line .= "<item>$2<\/item>\n";
- }
-
- if (@filedata[$i+1] !~ /^\\io/) {
- while ($ollevel > 0) {
- $line .= "\n<\/list>";
- if ($ollevel > 1) {$line .= "<\/item>";}
- $ollevel--;
- }
- if ($ollevel == 0) {
- $line .= "\n<\/div>";
- }
- }
- }
-
- # \ip introduction paragraph
- if ($line =~ /^\\ip\b\s*(.*)/) {
- $line = "<p>$1<\/p>";
- }
-
- ### Titles, Headings, and Labels (elsewhere?)--Markers Supported: \d, \ms#, \s#, \mt#, \r, \sp
- #### Markers Not Yet Supported: \mte#, \mr, \sr, \rq...\rq*
-
- # \d \ms majorSection
- if ($line =~ /^\\(ms|d)\b\s*(.+)/) {
- push (@outdata, closeTag("<\/p>"));
- push (@outdata, closeTag("<\/div type=\"majorSection\">"));
- push (@outdata, "<div type=\"majorSection\">\n");
- openTag("<\/div type=\"majorSection\">");
- $line =~ s/\\(ms|d)\b\s*(.+)/<title>$2<\/title>/;
- }
-
- # \s section (From Chapters and Verses)
- if ($line =~ /^\\s\b\s*(.+)/) {
- push (@outdata, closeTag("<\/p>"));
- push (@outdata, closeTag("<\/div type=\"section\">"));
- push (@outdata, "<div type=\"section\">\n");
- openTag("<\/div type=\"section\">");
- $line =~ s/\\s\b\s*(.+)/<title>$1<\/title>/;
- if ($line =~ /HEBREW TITLE/) {
- $line =~ s/<title>/<title type=\"psalm\">/;
- }
- }
-
- # \ss \s2 subSection (From Chapters and Verses)
- if ($line =~ /^\\s[s2]\b\s*(.+)/) {
- $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/;
- }
-
- # \sss \s3 x-subsubSection (From Chapters and Verses)
- if ($line =~ /^\\s(ss|3)\b\s*(.+)/) {
- push (@outdata, closeTag("<\/p>"));
- push (@outdata, closeTag("<\/div type=\"x=subSubSection\">"));
- push (@outdata, "<div type=\"x-subSubSection\">\n");
- openTag("<\/div type=\"x-subSubSection\">");
- $line =~ s/\\s(ss|3)\b\s*(.+)/<title>$2<\/title>/;
- }
- # \mt\mt1 title
- if ($line =~ /^\\mt[1234]?\b\s*(.+)/) {
- $line = "<title type=\"main\">$1<\/title>";
- }
-
- # \mt2 title
- if ($line =~ /^\\mt2\b\s*(.+)/) {
- $line = "<title type=\"continued\">$1<\/title>";
- }
-
- # \st,\st2 title
- if ($line =~ /^\\st2?\b\s*(.+)/) {
- $line = "<title type=\"continued\">$1<\/title>";
- }
-
- # \st3 title
- if ($line =~ /^\\st3\b\s*(.+)/) {
- $line = "<title type=\"sub\">$1<\/title>";
- }
-
- # \r sub title
- if ($line =~ /^\\mr\b\s*(.+)/) {
- $line = "<title type=\"sub\">$1<\/title>";
- }
-
- # \r parallel title
- if ($line =~ /^\\r\b\s*(.+)/) {
- $line = "<title type=\"parallel\">$1<\/title>";
- }
-
- # \sp speaker
- if ($line =~ /^\\sp\b\s*(.+)/) {
- $line = "<speaker>$1<\/speaker>";
- }
-
-
- ### Chapters and Verses--Markers Supported: \c, \v
- #### Markers Not Yet Supported: \ca...\ca*, \cl, \cp, \cd, \va...\va*, \vp...\vp*
-
- # \c chapter
- if ($line =~ /^\\c\b\s*([^ ]*)/) {
- if ($1 ne "") {
- $chap = $1;
- }
- else {
- $chap++;
- }
-
- push (@outdata, $versClose);
- $versClose = "";
- push (@outdata, closeTag("<\/p>"));
- if ($chapClose =~ /<chapter/) {
- push (@outdata, $chapClose); # close previous chapter
- $chapClose = "";
- } else {
- push (@outdata, closeTag("<\/div>")); # close introduction div
- }
-
- push (@outdata, "<chapter sID=\"$book.$chap\" osisID=\"$book.$chap\"\/>\n");
- $chapClose = "<chapter eID=\"$book.$chap\"\/>\n";
- $line =~ s/\\c\b\s*([^ ]*)//;
- }
-
- # \v verse
- if ($line =~ /^\\v\b\s*(\d[^\\ ]*)?/) {
- if ($1 ne "") {
- $vers = $1;
- }
- else {
- $vers++;
- }
-
- push (@outdata, $versClose);
- $versClose = "";
-
- if ($vers =~ /(\d+[^\\\- ]*)\-(\d+[^\\ ]*)/) {
- $vF = $1;
- $vT = $2;
- $vF =~ /^(\d+)/;
- $vFn = scalar($1);
- $vT =~ /^(\d+)/;
- $vTn = scalar($1);
- $osisID = "$book.$chap.$vF";
- if ($vTn > $vFn && $vFn > 0) {
- for ($j = $vFn + 1; $j < $vTn; $j++) {
- $osisID .=" $book.$chap.$j";
- }
- }
- $osisID .= " $book.$chap.$vT";
- }
- else {
- $osisID = "$book.$chap.$vers";
- }
- push (@outdata, "<verse sID=\"$osisID\" osisID=\"$osisID\"\/>\n");
- $versClose = "<verse eID=\"$osisID\"\/>\n";
- $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//;
- }
-
- ### Paragraphs--Markers Supported: \p, \b, \m
- #### Markers Not Yet Supported: \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
-
- # Hack to solve an issue in a module that used <R> for linebreaks in the usfm files--may be commented out (not USFM 2.1)
- $line =~ s/\\lb\*/<lb \/>/g;
-
- # \p paragraph (From Chapters and Verses)
- if ($line =~ /^\\p\b\s*/) {
- push (@outdata, closeTag("<\/p>"));
- push (@outdata, "<p>\n");
- openTag("<\/p>");
- $line =~ s/\\p\b\s*//;
- }
-
- # \b
- $line =~ s/\\b\b//;
- # \m
- $line =~ s/\\m\b//;
-
- ### Poetry--Markers Supported: \q#, \qs...\qs*
- #### Markers Not Yet Supported: \qr, \qc, \qa, \qac...\qac*, \qm#, \b
-
- # \q line
- if ($line =~ /^\\q/) {
- if ($l != 1) {
- push (@outdata, "<lg>\n");
- $l = 1;
- }
- if ($line =~ /\\q(c|\d*)$/) {
- if ($1 eq "") {
- $line = "<l>\n";
- }
- elsif ($1 eq "c") {
- $line = "<l type=\"x-centered\">";
- }
- else {
- $line = "<l level=\"$1\">\n";
- }
- @filedata[$i+1] .= "<\/l>";
- if (@filedata[$i+2] !~ /\\q/) {
- @filedata[$i+1] .= "\n<\/lg>";
- $l = 0;
- }
- }
- else {
- $line =~ s/\\q\b\s*(.+)/<l>$1<\/l>/;
- $line =~ s/\\q(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/;
- $line =~ s/\\qc\b\s*(.+)/<l type=\"x-centered\">$1<\/l>/;
- if (@filedata[$i+1] !~ /\\q/) {
- $line .= "\n<\/lg>";
- $l = 0;
- }
- }
- }
-
- # \qs...\qs*, Selah
- $line =~ s/\\qs\b\s*([^\\]+)\\qs\*/<l type="selah"> $1<\/l>/;
-
- ### Tables--Markers Supported: \tr, \th#, \tc#, \tcr#
- ####Markers Not Yet Supported: \thr#
-
- # \th table heading
- if ($line =~ /^\\t/) {
- if ($line =~ /^\\tr\b\s*(\\th.*)/) {
- $line = "$1";
- if ($table != 1) {
- push (@outdata, "<table>\n");
- $table = 1;
- }
- $line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g;
- $line = "<row>$line<\/row>";
- }
-
- if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
- $line = $1;
- if ($table != 1) {
- push (@outdata, "<table>\n");
- $table = 1;
- }
- $line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/<cell>$1<\/cell>/g;
- $line = "<row>$line<\/row>";
- if (@filedata[$i+1] !~ /\\tr/) {
- $line .= "<\/table>\n";
- $table = 0;
- }
- }
-
- if ($line =~ /^\\th1\b\s*(.*)/) {
- if ($table != 1) {
- push (@outdata, "<table>\n");
- $table = 1;
- }
- $line = "<row><cell role=\"label\">$1<\/cell>\n";
- }
- elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
- $line = "<cell role=\"label\">$1<\/cell>\n";
- }
-
- if ($line =~ /^\\tb1\b\s*(.*)/) {
- if ($table != 1) {
- push (@outdata, "<table>\n");
- $table = 1;
- }
- else {
- push (@outdata, "<\/row>");
- }
- $line = "<row><cell>$1<\/cell>\n";
- if (@filedata[$i+1] !~ /\\tb/) {
- $line .= "<\/row><\/table>\n";
- $table = 0;
- }
- }
- elsif ($line =~ /^\\tb\d+\b\s*(.*)/) {
- $line = "<cell>$1<\/cell>\n";
- if (@filedata[$i+1] !~ /\\tb/) {
- $line .= "<\/row><\/table>\n";
- $table = 0;
- }
- }
- }
-
- ### Footnotes--Markers Supported: \fk, \fq, \f...\f*, \fv
- ####Markers Not Yet Supported: \fe...\fe*, \fr, \fqa, \fl, \fp, \ft, \fdc...\fdc*, \fm...\fm*
-
- #\fk Catch Words (must precede \f)
- $line =~ s/\\fk\s/\<catchWord\>/g;
- $line =~ s/\\fk\*/\<\/catchWord\>/g;
- #\fq Quotations in Footnotes (must precede \f)
- $line =~ s/\\fq\s/\<q\>/g;
- $line =~ s/\\fq\*/\<\/q\>/g;
- #\fv Quotations in Footnotes (must precede \f)
- $line =~ s/\\fv\s*(\d+)\b/<seg type="verseNumber">$1<\/seg>/g;
-
- # \f note DCO--Made changes to match this: \f + \fr 3:20 \ft \fk catchWord\fk* plain text \fq text in quotes\fq*\f* (This works.)
- for ($j = 2; $j > 0; $j--) {
- if ($line =~ /\\f\b\s*(.)\s\\fr\s*([^\s]+)\s*\\ft\s*([^\\]+)\\f\*\s*/) {
- $nVal = $1;
- $sourceVal = $2;
- $noteText = $3;
-
- $nFN ++;
- $sourceVal =~ s/:/\./g;
- $sourceVal = "$book.$sourceVal";
- $sourceVal =~ s/(\d+)\.(\d[^\,]+)\-(\d+)/$1.$2-$book.$1.$3/;
- $sourceVal =~ s/(\d+)\.(\d[^\-]+)\-+\s*(\d.+)/$1.$2\-$book.$1.$3/;
-
- $line =~ s/\\f\s(.)\s\\fr\s([^\s]+)\s*\\ft\s*([^\\]+)\\f\*\s*/<note osisRef="$sourceVal" osisID="$sourceVal\!footnote.$nFN" n="$nFN">$3<\/note>/g;
- }
- }
-
- # \f if we STILL have notes, just change them to <note>
- if ($line =~ /\\f\b\s*/) {
- $line =~ s/\\f\b\s*/<note>/;
- }
- # \f* Footnote closers
- if ($line =~ /\\f\*/) {
- $line =~ s/\\f\*/<\/note>/g;
- }
-
- ### Crossreferences--Markers Supported: \x + \xo...\x*
- #### Markers Not Yet Supported: \xk, \xq, \xt, \xdc...\xdc*
-
- # \x crossReference (note element with source attribute only) \x + \xo...\x*
- if ($line =~ /\\x\s(.)\s\\xo\s([^\s]+)\s*\\xt\s*([^\\]+)\\x\*\s*/) {
- $nVal = $1;
- $sourceVal = $2;
- $noteText = $3;
-
- $sourceVal =~ s/:/\./g;
- $sourceVal = "$book.$sourceVal";
- $sourceVal =~ s/(\d+)\.(\d+)-(\d+)/$1.$2-$book.$1.$3/;
- $sourceVal =~ s/(\d+)\.(\d[^\-]+)-+\s*(\d+)/$1.$2\-$book.$1.$3/;
-
- $line =~ s/\\x\s(.)\s\\xo\s([^\s]+)\s*\\xt\s*([^\\]+)\\x\*\s*/<note type="crossReference" n="$nCR" osisID="$sourceVal\!crossReference.$nCR" osisRef="$sourceVal">\\xt $3<\/note>/g;
-
- #osisID="Gen.6.5-Gen.6.8!crossReference.
- $nCR = $nCR++;
- $nCR = 'a' if $nCR++ eq 'z';
- }
- $line =~ s/osisID="([^\!\-"]+)\-([^\!"]+)\!crossReference./osisID="$1!crossReference./g; # Corrects the osisID of cross-references when the source reference has multiple verses; leaves the osisRef as-is
-
- # \xt crossReference target
- if ($line =~ /\\xt\s*([^<]+)<\/note>/) {
- $crText = $1;
-
- $crText =~ s/\.//g;
- $crText =~ s/;\s/<\/reference>; <reference osisRef="">/g;
- $crText =~ s/\,\s*/<\/reference>\, <reference osisRef="">/g;
-
- $line =~ s/\\xt\s*([^<]+)<\/note>/<reference osisRef="">$crText<\/reference><\/note>/g;
- }
-
- # crossReference osisRef=""
- $line =~ s/<reference osisRef="">([^<]+)<\/reference>/<reference osisRef="$1">$1<\/reference>/g;
- $line =~ s/osisRef="\s/osisRef="\s/g;
- $line =~ s/\s">/">/g;
- $line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period
-
- $line =~ s/<reference osisRef="([^\"]+):([^\"]+)"/<reference osisRef="$1\.$2"/g; # Gen 1:1
- $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.(\d+)-(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$2\.$4"/g; # Gen 1:1-2
- $line =~ s/<reference osisRef="([^\.\"]+).(\d+):(\d+)-(\d+).(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$4\.$5"/g; # Gen 1:1-2:3
- $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>; <reference osisRef="(\d+)\.(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>; <reference osisRef="$1\.$5\.$6"/g; # Gen. 1:1, 2:3
- $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>, <reference osisRef="(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>, <reference osisRef="$1\.$2\.$5"/g; # Gen. 1:1, 3
- $line =~ s/<reference osisRef="([^\"\.]+)\.(\d+)"/<reference osisRef="$1\.1\.$2"/g; # Jude 1
-
- ### Special Text and Character Styles--Markers Supported: \it...\it*, \nd...\nd*, \pn...\pn*, \tl...\tl*
- #### Markers Not Yet Supported: Special Text: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \ord...\ord*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \wj...\wj*; Character Styling: \em...\em*, \bd...\bd*, \bdit...\bdit*, \no...\no*, \sc...\sc*; Spacing and Breaks: !$, //, \pb; Special Features: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh*
-
- # \it...\it*, italic text
- $line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g;
-
- # \nd...\nd*, Divine Name
- $line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g;
-
- # \pn...\pn*, Proper name
- $line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g;
-
- # \tl...\tl*, Foreign Langauge (treated here merely as transliterated text)
- $line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g;
-
- $line =~ s/_/ /g;
-
-
-### End USFM 2.1 Items
-
- if ($line !~ /^\s*$/) {
- push (@outdata, "$line\n");
- }
- }
-}
-
-push (@outdata, closeTag("<\/osis>"));
-
-for ($i = 0; $i < scalar(@outdata); $i++) {
- #@outdata[$i] =~ s/---/―/g; # m-dash
- #@outdata[$i] =~ s/--/—/g; # n-dash
- @outdata[$i] =~ s/([es]ID=\"[^\" ]+) [^\"]*\"/$1\"/;
-}
-
-for ($i = 0; $i < scalar(@outdata); $i++) {
- if (@outdata[$i] !~ /^\s*$/) {
- @outdata[$i] =~ s/[\r\n]+/\n/g;
- @outdata[$i] =~ s/\n?$/\n/;
- print OUTF @outdata[$i];
- }
-}
-close (OUTF);
-
-print "Doing some cleanup.\n";
-
-open (INF, "$outputFilename");
-@filedata = <INF>;
-close (INF);
-open (OUTF, ">$outputFilename");
-
-#bubble chapter down
-for ($i = 0; $i < scalar(@filedata); $i++) {
- if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) {
- $temp = @filedata[$i];
- @filedata[$i] = @filedata[$i-1];
- @filedata[$i-1] = $temp;
- $i -= 2;
- }
-}
-for ($i = 0; $i < scalar(@filedata); $i++) {
- $fullfile .= @filedata[$i];
-}
-$fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
-
-print "Tagging quotations.\n";
-
-$q = 1;
-
-$fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
-
-$fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg;
-
-$q = 1;
-
-while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) {
- $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
-}
-while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) {
- $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
-}
-
-$fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg;
-$fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg;
-
-$fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
-
-print OUTF $fullfile;
-close (OUTF);
-
-print "All done! OSIS file: $outputFilename\n";
+#!/usr/bin/perl
+
+## USFM to OSIS (2.1.1) converter
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2002-2008 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of the CrossWire Bible Society nor the names of
+# its contributors may be used to endorse or promote products
+# derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support@crosswire.org
+
+#########################################################################
+
+# Stores the script version and date
+$version = "1.5";
+$date = "2009-02-08";
+# Sets the version of OSIS used in the OSIS header
+$osisVersion = "2.1.1";
+# Stores the USFM Version
+$usfmVersion = "2.1"; # The USFM reference document can be found at http://confluence.ubs-icap.org/display/USFM/Home;jsessionid=97071C5C1E562036A1CAF4FF77147565 (as of 2008-07-07)
+
+# This is the hash which maps the conversion of USFM book abbreviations to OSIS book abbreviations. ***I would like to add the ability to access an external file to provide options for other languages. In other words, in preparing a USFM file for conversion, a separate file could be created which could be used to map the conversion of abbreviated book names in other languages to OSIS. This would be especially useful for cross-references, but I haven't figured out how to do it yet.
+%OSISbook = (
+# Theoretically, these are laid out according to <BooksPresent>, but I can really only guess without a spec ***Need to check
+"" => "", "GEN" => "Gen", "EXO" => "Exod", "LEV" => "Lev", "NUM" => "Num",
+ "DEU" => "Deut", "JOS" => "Josh", "JDG" => "Judg", "RUT" => "Ruth",
+ "1SA" => "1Sam", "2SA" => "2Sam", "1KI" => "1Kgs", "2KI" => "2Kgs",
+ "1CH" => "1Chr", "2CH" => "2Chr", "EZR" => "Ezra", "NEH" => "Neh",
+ "EST" => "Esth", "JOB" => "Job", "PSA" => "Ps", "PRO" => "Prov",
+ "ECC" => "Eccl", "SNG" => "Song", "ISA" => "Isa", "JER" => "Jer",
+ "LAM" => "Lam", "EZK" => "Ezek", "DAN" => "Dan", "HOS" => "Hos",
+ "JOL" => "Joel", "AMO" => "Amos", "OBA" => "Obad", "JON" => "Jonah",
+ "MIC" => "Mic", "NAM" => "Nah", "HAB" => "Hab", "ZEP" => "Zeph",
+ "HAG" => "Hag", "ZEC" => "Zech", "MAL" => "Mal", "MAT" => "Matt",
+ "MRK" => "Mark", "LUK" => "Luke", "JHN" => "John", "ACT" => "Acts",
+ "ROM" => "Rom", "1CO" => "1Cor", "2CO" => "2Cor", "GAL" => "Gal",
+ "EPH" => "Eph", "PHP" => "Phil", "COL" => "Col", "1TH" => "1Thess",
+ "2TH" => "2Thess", "1TI" => "1Tim", "2TI" => "2Tim", "TIT" => "Titus",
+ "PHM" => "Phlm", "HEB" => "Heb", "JAS" => "Jas", "1PE" => "1Pet",
+ "2PE" => "2Pet", "1JN" => "1John", "2JN" => "2John", "3JN" => "3John",
+ "JUD" => "Jude", "REV" => "Rev", "TOB" => "Tob", "JDT" => "Jdt",
+ "ESG" => "Esth", "WIS" => "Wis", "SIR" => "Sir", "BAR" => "Bar",
+ "LJE" => "EpJer", "S3Y" => "PrAzar", "SUS" => "Sus", "BEL" => "Bel",
+ "1MA" => "1Macc", "2MA" => "2Macc", "3MA" => "3Macc", "4MA" => "4Macc",
+ "1ES" => "1Esd", "2ES" => "2Esd", "MAN" => "PrMan",
+# Following this is just an uneducated guess
+ "PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
+ "JSB" => "Josh", "TBS" => "Tob", "SST" => "Sus", "DNT" => "Dan",
+ "BLT" => "Bel", "ADE" => "AddEsth"
+ );
+
+# Generates a list of available encodings.
+use Encode;
+@encodingList = Encode->encodings(":all");
+foreach $enc (@encodingList) {
+ $encodings .= "$enc, ";
+}
+$encodings =~ s/\, $//;
+
+# Syntax instructions
+if (scalar(@ARGV) < 2) {
+ print "\nusfm2osis.pl -- USFM $usfmVersion to OSIS $osisVersion converter version $version ($date)\n\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] [-e USFM encoding] <USFM filenames|wildcard>\n";
+ print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
+ print "- The osisWork is a short name with no spaces which will identify your module.\n";
+ print "- If no -o option is specified for the output filename, the default output file is: \n\tosisWork.osis.xml.\n";
+ print "- Supported encodings include:\n\t$encodings\n";
+ print "- If the encoding is omitted, utf8 is the default value.\n";
+ print "- USFM filenames with the SFM extension can be accessed using a wildcard: \n\t*.SFM\n";
+ print "As an example, if you want to generate the osisWork <bible> and your USFM files are encoded in utf8, located in the /Bible folder relative to this script with the file extension SFM, enter:\n\tperl usfm2osis.pl bible Bible/*.SFM\n\n";
+ exit (-1);
+}
+
+$osisWork = $ARGV[0];
+
+$nextarg = 1;
+
+if ($ARGV[$nextarg] eq "-o") {
+ $outputFilename = "$ARGV[$nextarg+1]";
+ $nextarg += 2;
+}
+else {
+ $outputFilename = "$osisWork.osis.xml";
+}
+open (OUTF, , ">:utf8", "$outputFilename") or die "Could not open file $ARGV[2] for writing.";
+
+if ($ARGV[$nextarg] eq "-e") {
+ $inputEncoding = "$ARGV[$nextarg+1]";
+ $nextarg += 2;
+}
+else {
+ $inputEncoding = "utf8";
+}
+$encFound = 0;
+foreach $enc (@encodingList) {
+ if ($enc eq $inputEncoding) {
+ $encFound = 1;
+ }
+}
+if ($encFound == 0) {
+ die "Encoding $inputEncoding not supported.\nSupported encodings include:\n\t$encodings\n";
+}
+else {
+ print "Encoding \"$inputEncoding\" is supported.\n"
+}
+
+for (; $nextarg < scalar(@ARGV); $nextarg++) {
+ push(@files, $ARGV[$nextarg]);
+}
+
+push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
+
+$tagStack = "<\/osisText><\/osis>";
+$chapClose = "";
+$versClose = "";
+
+sub closeTag {
+ $tag = @_[0];
+
+ if ($tagStack =~ /$tag/) {
+ $tagStack =~ s/^(.*?$tag)//;
+ $taglist = $1;
+ $taglist =~ s/>/>\n/g;
+ $taglist =~ s/(<\/\w+)\s+[^>]+>/$1>/g;
+ return $taglist;
+ }
+ else {
+ return:
+ }
+}
+
+sub openTag {
+ $tag = @_[0];
+ $tagStack = $tag . $tagStack;
+ return;
+}
+
+foreach $file (@files) {
+ print "Processing $file.\n";
+ open (SFM, "$file");
+ my @filedata = "";
+ while (<SFM>) {
+ my $sfline;
+ $sfline = decode($inputEncoding, $_);
+ push (@filedata, $sfline);
+ }
+ close (SFM);
+
+ $ollevel = 0;
+ $vers = 0;
+ $chap = 0;
+ $book = "";
+ # Sets the initial value for the attribute "n" in footnotes.
+ $nFN = 0;
+ $xFN = 0;
+ # Creates array for the attribute "n" in cross-references
+ @nCR = (a .. z);
+ # Sets the initial value for the attribute "n" in cross-references.
+ $nCR = @nCR [0];
+
+ #encoding stuff
+ for ($i = 0; $i < scalar(@filedata); $i++) {
+ $line = @filedata[$i];
+ $line =~ s/[\r\n]//g;
+
+ ### Basic XML entity encoding
+ $line =~ s/&(?![a-zA-Z0-9])/&amp;/g;
+ $line =~ s/<< ?/\@/g;
+ $line =~ s/>>/\#/g;
+ $line =~ s/</\$/g;
+ $line =~ s/>/\%/g;
+
+ $line =~ s/(\w)\'(\w)/"$1" . chr(0x2019) . "$2"/eg;
+ $line =~ s/\\fr 1\/2 \\fr\*/chr(0xBD)/eg;
+
+ @filedata[$i] = $line;
+ }
+
+ for ($i = 0; $i < scalar(@filedata); $i++) {
+ $line = @filedata[$i];
+
+ ### File Identification--Markers Supported: \id, \h, \ide, \sts, \rem, \toc1, \toc2, \toc3
+
+ if ($line =~ /\\v\b\s*(\d+)\,(\d+)/) {
+ if ($1 + 1 == $2) {
+ $line =~ s/\\v\b\s*(\d+)\,(\d+)/\\v $1\-$2/;
+ }
+ }
+ $line =~ s/\\v\b\s+(\d+)(\-\d+|\s*\\v\b\s+\d+)\s*\\v\b\s+(\d+)/\\v $1\-$3/;
+ $line =~ s/\\v\b\s+(\d+)\s*\\v\b\s+(\d+\-)?(\d+)/\\v $1\-$3/;
+ $line =~ s/^\\(p[is]|mi)\b/\\p/;
+ $line =~ s/^\\li\b/\\p/; #\li isn't part of USFM, so we'll make it \p
+
+ # \id (book marker)
+ if ($line =~ /^\\id\b\s*([^ ]*)/) {
+ $book = $OSISbook{$1};
+ $chap = 0;
+ if ($versClose =~ /<verse/) {
+ push (@outdata, $versClose); # close verse
+ $versClose = "";
+ }
+# push (@outdata, closeTag("<\/div[^>]*?>")); # close section
+ if ($chapClose =~ /<chapter/) {
+ push (@outdata, $chapClose); # close chapter
+ $chapClose = "";
+ }
+
+ push (@outdata, closeTag("<\/div type=\"book\">")); #close book
+ if ($book eq "") {
+ $book = "UnknownUSFMBook";
+ }
+ push (@outdata, "<div type=\"book\" osisID=\"$book\">\n"); # open current book
+ openTag("<\/div type=\"book\">");
+ $line = "";
+ }
+
+ # \h (running header--discard)
+ if ($line =~ /^\\h\b/) {
+ $line = "";
+ }
+
+ # \ide Encoding (discard)
+ if ($line =~ /^\\ide\b/) {
+ $line = "";
+ }
+
+ # \sts Status (discard)
+ if ($line =~ /^\\sts\b/) {
+ $line = "";
+ }
+
+ # \rem Comments from translator (discard)
+ if ($line =~ /^\\rem\b/) {
+ $line = "";
+ }
+
+ # \toc1 Table of Contents (discard)
+ if ($line =~ /^\\toc\d\b/) {
+ $line = "";
+ }
+
+ ### Introduction--Markers Supported: \imt#, \is#, \iot, \io#, \ip
+ #### Markers Not Yet Supported: \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \ior...\ior*, \iex, \imte, \ie
+
+ # \it title (DCO: Commented out because \it is for italics not introduction titles in USFM 2.1)
+# if ($line =~ /^\\it\b\s*(.*)/) {
+# $line = "<div type=\"introduction\">\n<title>$1<\/title>";
+# openTag("<\/div>");
+# }
+
+ # \imt major title
+ if ($line =~ /^\\imt\b\s*(.+)/) {
+ $line = "<div type=\"introduction\">\n<title>$1<\/title>";
+ openTag("<\/div>");
+ }
+
+
+ # \is introduction section title
+ if ($line =~ /^\\is(\d*)\b\s*(.*)/) {
+ $level = $1;
+ if ($level eq "") {
+ $level = "1";
+ }
+ $line = "<div type=\"section\"><title>$2<\/title>";
+ openTag("<\/div>");
+ }
+
+ # \iot introduction outline title
+ if ($line =~ /^\\iot\b\s*(.*)/) {
+ $line = "<div type=\"outline\">\n<title>$1<\/title>";
+ }
+
+ # \io\d+ introduction outline item
+ if ($line =~ /^\\io(\d+)\b\s*(.*)/) {
+ if ($ollevel == $1) {
+ $line = "<item>$2<\/item>";
+ }
+ elsif ($ollevel > $1) {
+ $line = "";
+ while ($ollevel > $1) {
+ $line .= "<\/list><\/item>\n";
+ $ollevel--;
+ }
+ $line .= "<item>$2<\/item>";
+ }
+ elsif ($ollevel < $1) {
+ $line = "";
+ if ($ollevel != 0) {
+ $line .= "<item>";
+ }
+ while ($ollevel < $1) {
+ $line .= "<list>\n";
+ $ollevel++;
+ }
+ $line .= "<item>$2<\/item>\n";
+ }
+
+ if (@filedata[$i+1] !~ /^\\io/) {
+ while ($ollevel > 0) {
+ $line .= "\n<\/list>";
+ if ($ollevel > 1) {$line .= "<\/item>";}
+ $ollevel--;
+ }
+ if ($ollevel == 0) {
+ $line .= "\n<\/div>";
+ }
+ }
+ }
+
+ # \ip introduction paragraph
+ if ($line =~ /^\\ip\b\s*(.*)/) {
+ $line = "<p>$1<\/p>";
+ }
+
+ ### Titles, Headings, and Labels (elsewhere?)--Markers Supported: \d, \ms#, \s#, \mt#, \r, \sp
+ #### Markers Not Yet Supported: \mte#, \mr, \sr, \rq...\rq*
+
+ # \d \ms majorSection
+ if ($line =~ /^\\(ms|d)\b\s*(.+)/) {
+ push (@outdata, closeTag("<\/p>"));
+ push (@outdata, closeTag("<\/div type=\"majorSection\">"));
+ push (@outdata, "<div type=\"majorSection\">\n");
+ openTag("<\/div type=\"majorSection\">");
+ $line =~ s/\\(ms|d)\b\s*(.+)/<title>$2<\/title>/;
+ }
+
+ # \s \s1 section (From Chapters and Verses)
+ if ($line =~ /^\\s1?\b\s*(.+)/) {
+ push (@outdata, closeTag("<\/p>"));
+ push (@outdata, closeTag("<\/div type=\"section\">"));
+ push (@outdata, "<div type=\"section\">\n");
+ openTag("<\/div type=\"section\">");
+ $line =~ s/\\s1?\b\s*(.+)/<title>$1<\/title>/;
+ if ($line =~ /HEBREW TITLE/) {
+ $line =~ s/<title>/<title type=\"psalm\">/;
+ }
+ }
+
+ # \ss \s2 subSection (From Chapters and Verses)
+ if ($line =~ /^\\s[s2]\b\s*(.+)/) {
+ $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/;
+ }
+
+ # \sss \s3 x-subsubSection (From Chapters and Verses)
+ if ($line =~ /^\\s(ss|3)\b\s*(.+)/) {
+ push (@outdata, closeTag("<\/p>"));
+ push (@outdata, closeTag("<\/div type=\"x=subSubSection\">"));
+ push (@outdata, "<div type=\"x-subSubSection\">\n");
+ openTag("<\/div type=\"x-subSubSection\">");
+ $line =~ s/\\s(ss|3)\b\s*(.+)/<title>$2<\/title>/;
+ }
+ # \mt\mt1 title
+ if ($line =~ /^\\mt[1234]?\b\s*(.+)/) {
+ $line = "<title type=\"main\">$1<\/title>";
+ }
+
+ # \mt2 title
+ if ($line =~ /^\\mt2\b\s*(.+)/) {
+ $line = "<title type=\"continued\">$1<\/title>";
+ }
+
+ # \st,\st2 title
+ if ($line =~ /^\\st2?\b\s*(.+)/) {
+ $line = "<title type=\"continued\">$1<\/title>";
+ }
+
+ # \st3 title
+ if ($line =~ /^\\st3\b\s*(.+)/) {
+ $line = "<title type=\"sub\">$1<\/title>";
+ }
+
+ # \r sub title
+ if ($line =~ /^\\mr\b\s*(.+)/) {
+ $line = "<title type=\"sub\">$1<\/title>";
+ }
+
+ # \r parallel title
+ if ($line =~ /^\\r\b\s*(.+)/) {
+ $line = "<title type=\"parallel\">$1<\/title>";
+ }
+
+ # \sp speaker
+ if ($line =~ /^\\sp\b\s*(.+)/) {
+ $line = "<speaker>$1<\/speaker>";
+ }
+
+
+ ### Chapters and Verses--Markers Supported: \c, \v
+ #### Markers Not Yet Supported: \ca...\ca*, \cl, \cp, \cd, \va...\va*, \vp...\vp*
+
+ # \c chapter
+ if ($line =~ /^\\c\b\s*([^ ]*)/) {
+ if ($1 ne "") {
+ $chap = $1;
+ }
+ else {
+ $chap++;
+ }
+
+ push (@outdata, $versClose);
+ $versClose = "";
+ push (@outdata, closeTag("<\/p>"));
+ if ($chapClose =~ /<chapter/) {
+ push (@outdata, $chapClose); # close previous chapter
+ $chapClose = "";
+ } else {
+ push (@outdata, closeTag("<\/div>")); # close introduction div
+ }
+
+ push (@outdata, "<chapter sID=\"$book.$chap\" osisID=\"$book.$chap\"\/>\n");
+ $chapClose = "<chapter eID=\"$book.$chap\"\/>\n";
+ $line =~ s/\\c\b\s*([^ ]*)//;
+ }
+
+ # \v verse
+ if ($line =~ /^\\v\b\s*(\d[^\\ ]*)?/) {
+ if ($1 ne "") {
+ $vers = $1;
+ }
+ else {
+ $vers++;
+ }
+
+ push (@outdata, $versClose);
+ $versClose = "";
+
+ if ($vers =~ /(\d+[^\\\- ]*)\-(\d+[^\\ ]*)/) {
+ $vF = $1;
+ $vT = $2;
+ $vF =~ /^(\d+)/;
+ $vFn = scalar($1);
+ $vT =~ /^(\d+)/;
+ $vTn = scalar($1);
+ $osisID = "$book.$chap.$vF";
+ if ($vTn > $vFn && $vFn > 0) {
+ for ($j = $vFn + 1; $j < $vTn; $j++) {
+ $osisID .=" $book.$chap.$j";
+ }
+ }
+ $osisID .= " $book.$chap.$vT";
+ }
+ else {
+ $osisID = "$book.$chap.$vers";
+ }
+ push (@outdata, "<verse sID=\"$osisID\" osisID=\"$osisID\"\/>\n");
+ $versClose = "<verse eID=\"$osisID\"\/>\n";
+ $line =~ s/\\v\b\s*(\d[^\\ ]*)? *//;
+ }
+
+ ### Paragraphs--Markers Supported: \p, \b, \m
+ #### Markers Not Yet Supported: \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
+
+ # Hack to solve an issue in a module that used <R> for linebreaks in the usfm files--may be commented out (not USFM 2.1)
+ $line =~ s/\\lb\*/<lb \/>/g;
+
+ # \p paragraph (From Chapters and Verses)
+ if ($line =~ /^\\p\b\s*/) {
+ push (@outdata, closeTag("<\/p>"));
+ push (@outdata, "<p>\n");
+ openTag("<\/p>");
+ $line =~ s/\\p\b\s*//;
+ }
+
+ # \b
+ $line =~ s/\\b\b//;
+ # \m
+ $line =~ s/\\m\b//;
+
+ ### Poetry--Markers Supported: \q#, \qs...\qs*, \qc
+ #### Markers Not Yet Supported: \qr, \qa, \qac...\qac*, \qm#, \b
+
+ # \qt...\qt*, OT quotation (handle early)
+ $line =~ s/\\qt\b\s*(.*?)\\qt\*/<seg type="otPassage">$1<\/seg>/g;
+
+ # \q line
+ if ($line =~ /^\\q/) {
+ if ($l != 1) {
+ push (@outdata, "<lg>\n");
+ $l = 1;
+ }
+ if ($line =~ /\\q(c|\d*)$/) {
+ if ($1 eq "") {
+ $line = "<l>\n";
+ }
+ elsif ($1 eq "c") {
+ $line = "<l type=\"x-centered\">";
+ }
+ else {
+ $line = "<l level=\"$1\">\n";
+ }
+ @filedata[$i+1] .= "<\/l>";
+ if (@filedata[$i+2] !~ /\\q(?!t)/) {
+ @filedata[$i+1] .= "\n<\/lg>";
+ $l = 0;
+ }
+ }
+ else {
+ $line =~ s/\\q\b\s*(.+)/<l>$1<\/l>/;
+ $line =~ s/\\q(\d+)\b\s*(.+)/<l level=\"$1\">$2<\/l>/;
+ $line =~ s/\\qc\b\s*(.+)/<l type=\"x-centered\">$1<\/l>/;
+ if (@filedata[$i+1] !~ /\\q(?!t)/) {
+ $line .= "\n<\/lg>";
+ $l = 0;
+ }
+ }
+ }
+
+ # \qs...\qs*, Selah
+ $line =~ s/\\qs\b\s*([^\\]+)\\qs\*/<l type="selah"> $1<\/l>/;
+
+ ### Tables--Markers Supported: \tr, \th#, \tc#, \tcr#
+ ####Markers Not Yet Supported: \thr#
+
+ # \th table heading
+ if ($line =~ /^\\t/) {
+ if ($line =~ /^\\tr\b\s*(\\th.*)/) {
+ $line = "$1";
+ if ($table != 1) {
+ push (@outdata, "<table>\n");
+ $table = 1;
+ }
+ $line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g;
+ $line = "<row>$line<\/row>";
+ }
+
+ if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
+ $line = $1;
+ if ($table != 1) {
+ push (@outdata, "<table>\n");
+ $table = 1;
+ }
+ $line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/<cell>$1<\/cell>/g;
+ $line = "<row>$line<\/row>";
+ if (@filedata[$i+1] !~ /\\tr/) {
+ $line .= "<\/table>\n";
+ $table = 0;
+ }
+ }
+
+ if ($line =~ /^\\th1\b\s*(.*)/) {
+ if ($table != 1) {
+ push (@outdata, "<table>\n");
+ $table = 1;
+ }
+ $line = "<row><cell role=\"label\">$1<\/cell>\n";
+ }
+ elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
+ $line = "<cell role=\"label\">$1<\/cell>\n";
+ }
+
+ if ($line =~ /^\\tb1\b\s*(.*)/) {
+ if ($table != 1) {
+ push (@outdata, "<table>\n");
+ $table = 1;
+ }
+ else {
+ push (@outdata, "<\/row>");
+ }
+ $line = "<row><cell>$1<\/cell>\n";
+ if (@filedata[$i+1] !~ /\\tb/) {
+ $line .= "<\/row><\/table>\n";
+ $table = 0;
+ }
+ }
+ elsif ($line =~ /^\\tb\d+\b\s*(.*)/) {
+ $line = "<cell>$1<\/cell>\n";
+ if (@filedata[$i+1] !~ /\\tb/) {
+ $line .= "<\/row><\/table>\n";
+ $table = 0;
+ }
+ }
+ }
+
+ sub parseRef {
+ $ref = @_[0];
+
+ $ref =~ s/[:\.]\s*$//;
+ $ref =~ s/:/\./g;
+ $ref = "$book.$ref";
+ $ref =~ s/(\d+)\.(\d[^\,]+)\-(\d+)/$1.$2-$book.$1.$3/;
+ $ref =~ s/(\d+)\.(\d[^\-]+)\-+\s*(\d.+)/$1.$2\-$book.$1.$3/;
+
+ return $ref;
+ }
+
+ ### Footnotes--Markers Supported: \fk, \fq, \f...\f*, \fv
+ ####Markers Not Yet Supported: \fe...\fe*, \fr, \fqa, \fl, \fp, \ft, \fdc...\fdc*, \fm...\fm*
+
+ sub footnoteHandler {
+ $note = @_[0];
+ $note = "<note>$note</note>";
+
+ # \fk Catch Words
+ $note =~ s/\\fk\s(.+?)\\fk\*/<catchWord>$1<\/catchWord>/g;
+ $note =~ s/\\fk\s(.+?)(?=\\f)/<catchWord>$1<\/catchWord>/g;
+ $note =~ s/\\fk\*//g;
+
+ # \fq Quotations in Footnotes
+ # CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document.
+ $note =~ s/\\fq\s(.+?)\\fq\*/<catchWord>$1<\/catchWord>/g;
+ $note =~ s/\\fq\s(.+?)(?=\\f)/<catchWord>$1<\/catchWord>/g;
+ $note =~ s/\\fq\*//g;
+
+ # \fv Footnote verse number
+ $note =~ s/\\fv\s*(\d+)\b\s*(?=\\f)/<reference osisID=\"$book.$vers.$1\">$1<\/reference>/g;
+
+ # \fr Footnote origin reference (the verse where the fn appears)
+ while ($note =~ /\\fr\s*(.+?)\s*(?=\\x)/) {
+ $sourceVal = parseRef($1);
+ $nFN++;
+# $note =~ s/\\fr\s*(.+?)\s*(?=\\x)//;
+ $note =~ s/\\fr\s*//;
+ $note =~ s/<note>/<note n="$nFN">/;
+ }
+
+ # \ft Footnote text
+ $note =~ s/\\ft\s//g;
+
+ # \f* Footnote closer
+ $note =~ s/\\f\*//;
+
+ # \f Footnote opener
+ $note =~ s/\\f\b\s*([^\s]\s*)?//;
+
+ return $note;
+ }
+
+ $line =~ s/(\\f\b.+?\\f\*)/footnoteHandler($1)/eg;
+
+ ### Crossreferences--Markers Supported: \x + \xo...\x*, \xk, \xq, \xt
+ #### Markers Not Yet Supported: \xdc...\xdc*
+ sub xrefHandler {
+ $xref = @_[0];
+ $xref = "<note type=\"crossReference\">$xref</note>";
+
+ # \xk Catch Words
+ $xref =~ s/\\xk\s(.+?)\\xk\*/<catchWord>$1<\/catchWord>/g;
+ $xref =~ s/\\xk\s(.+?)(?=\\x)/<catchWord>$1<\/catchWord>/g;
+ $xref =~ s/\\xk\*//g;
+
+ # \xq Quotations in Footnotes
+ # CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document.
+ $xref =~ s/\\xq\s(.+?)\\xq\*/<catchWord>$1<\/catchWord>/g;
+ $xref =~ s/\\xq\s(.+?)(?=\\x)/<catchWord>$1<\/catchWord>/g;
+ $xref =~ s/\\xq\*//g;
+
+ # \xo Footnote origin reference (the verse where the fn appears)
+ while ($xref =~ /\\xo\s*(.+?)\s*(?=\\x)/) {
+ $sourceVal = parseRef($1);
+ $xFN++;
+# $xref =~ s/\\xo\s*(.+?)\s*(?=\\x)//;
+ $xref =~ s/\\xo\s*//;
+ $xref =~ s/<note type=\"crossReference\">/<note type=\"crossReference\" n="$xFN">/;
+ }
+
+ # \xt Crossref itself
+ $xref =~ s/\\xt\s(.+?)\\xt\*/<reference>$1<\/reference>/g;
+ $xref =~ s/\\xt\s(.+?)(?=\\x)/<reference>$1<\/reference>/g;
+ $xref =~ s/\\xt\*//g;
+
+ # \x* Footnote closer
+ $xref =~ s/\\x\*//;
+
+ # \x Footnote opener
+ $xref =~ s/\\x\b\s*([^\s]\s*)?//;
+
+ return $xref;
+ }
+
+ $line =~ s/(\\x\b.+?\\x\*)/xrefHandler($1)/eg;
+
+
+ # crossReference osisRef=""
+ $line =~ s/<reference osisRef="">([^<]+)<\/reference>/<reference osisRef="$1">$1<\/reference>/g;
+ $line =~ s/osisRef="\s/osisRef="\s/g;
+ $line =~ s/\s">/">/g;
+ $line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period
+
+ $line =~ s/<reference osisRef="([^\"]+):([^\"]+)"/<reference osisRef="$1\.$2"/g; # Gen 1:1
+ $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.(\d+)-(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$2\.$4"/g; # Gen 1:1-2
+ $line =~ s/<reference osisRef="([^\.\"]+).(\d+):(\d+)-(\d+).(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$4\.$5"/g; # Gen 1:1-2:3
+ $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>; <reference osisRef="(\d+)\.(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>; <reference osisRef="$1\.$5\.$6"/g; # Gen. 1:1, 2:3
+ $line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.([^\"]+)">([^<]+)<\/reference>, <reference osisRef="(\d+)"/<reference osisRef="$1\.$2\.$3">$4<\/reference>, <reference osisRef="$1\.$2\.$5"/g; # Gen. 1:1, 3
+ $line =~ s/<reference osisRef="([^\"\.]+)\.(\d+)"/<reference osisRef="$1\.1\.$2"/g; # Jude 1
+
+ ### Special Text and Character Styles--Markers Supported: \it...\it*, \nd...\nd*, \pn...\pn*, \tl...\tl*, \qt...\qt*
+ #### Markers Not Yet Supported: Special Text: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \ord...\ord*, \sig...\sig*, \sls...\sls*, \wj...\wj*; Character Styling: \em...\em*, \bd...\bd*, \bdit...\bdit*, \no...\no*, \sc...\sc*; Spacing and Breaks: !$, //, \pb; Special Features: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh*
+
+ # \it...\it*, italic text
+ $line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g;
+
+ # \nd...\nd*, Divine Name
+ $line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g;
+
+ # \pn...\pn*, Proper name
+ $line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g;
+
+ # \tl...\tl*, Foreign Langauge (treated here merely as transliterated text)
+ $line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g;
+
+ $line =~ s/_/ /g;
+
+
+### End USFM 2.1 Items
+
+ if ($line !~ /^\s*$/) {
+ push (@outdata, "$line\n");
+ }
+ }
+}
+
+push (@outdata, closeTag("<\/osis>"));
+
+for ($i = 0; $i < scalar(@outdata); $i++) {
+ #@outdata[$i] =~ s/---/―/g; # m-dash
+ #@outdata[$i] =~ s/--/—/g; # n-dash
+ @outdata[$i] =~ s/([es]ID=\"[^\" ]+) [^\"]*\"/$1\"/;
+}
+
+for ($i = 0; $i < scalar(@outdata); $i++) {
+ if (@outdata[$i] !~ /^\s*$/) {
+ @outdata[$i] =~ s/[\r\n]+/\n/g;
+ @outdata[$i] =~ s/\n?$/\n/;
+ print OUTF @outdata[$i];
+ }
+}
+close (OUTF);
+
+print "Doing some cleanup.\n";
+
+open (INF, "<:utf8", "$outputFilename");
+@filedata = <INF>;
+close (INF);
+open (OUTF, ">:utf8", "$outputFilename");
+
+#bubble chapter down
+for ($i = 0; $i < scalar(@filedata); $i++) {
+ if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) {
+ $temp = @filedata[$i];
+ @filedata[$i] = @filedata[$i-1];
+ @filedata[$i-1] = $temp;
+ $i -= 2;
+ }
+}
+for ($i = 0; $i < scalar(@filedata); $i++) {
+ $fullfile .= @filedata[$i];
+}
+$fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
+
+print "Tagging quotations.\n";
+
+$q = 1;
+
+$fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
+
+$fullfile =~ s/\$/"<milestone type=\"cQuote\" subType=\"x-level-2\"\/>"/eg;
+
+$q = 1;
+
+while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) {
+ $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
+}
+while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) {
+ $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $2 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>" . $3/eg;
+}
+
+$fullfile =~ s/\@([^\#]+?)\#/"<q level=\"1\" sID=\"q1." . $q . "\"\/>" . $1 . "<q level=\"1\" eID=\"q1." . $q++ . "\"\/>"/eg;
+$fullfile =~ s/\@/"<milestone type=\"cQuote\" subType=\"x-level-1\"\/>"/eg;
+
+$fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
+
+print OUTF $fullfile;
+close (OUTF);
+
+print "All done! OSIS file: $outputFilename\n";