summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorChris Little <chrislit@crosswire.org>2012-08-10 11:30:42 +0000
committerChris Little <chrislit@crosswire.org>2012-08-10 11:30:42 +0000
commit88e5189197b7390003a9aeea9def9e41e7279978 (patch)
tree90b88b36822b31bc805f42597590534a30afc773 /modules
parentd4c721752ed56983e40400cd61b6255ba0619a66 (diff)
downloadsword-tools-88e5189197b7390003a9aeea9def9e41e7279978.tar.gz
started organizing & implementing deprecated/obsolete/private-use USFM from stylesheet
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@373 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rwxr-xr-xmodules/python/usfm2osis.py108
1 files changed, 71 insertions, 37 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py
index bccf020..8434d73 100755
--- a/modules/python/usfm2osis.py
+++ b/modules/python/usfm2osis.py
@@ -268,6 +268,41 @@ def convertToOSIS(sFile):
return osis
+ def cvtRelaxedConformanceRemaps(osis, relaxedConformance):
+ if not relaxedConformance:
+ return osis
+
+ # \tr#: DEP: map to \tr
+ osis = re.sub(r'\\tr\d\b', r'\\tr', osis)
+
+ # remapped 2.0 periphs
+ # \pub
+ osis = re.sub(r'\\pub\b\s', '\\periph Publication Data\n', osis)
+ # \toc : \periph Table of Contents
+ osis = re.sub(r'\\toc\b\s', '\\periph Table of Contents\n', osis)
+ # \pref
+ osis = re.sub(r'\\pref\b\s', '\\periph Preface\n', osis)
+ # \maps
+ osis = re.sub(r'\\maps\b\s', '\\periph Map Index\n', osis)
+ # \cov
+ osis = re.sub(r'\\cov\b\s', '\\periph Cover\n', osis)
+ # \spine
+ osis = re.sub(r'\\spine\b\s', '\\periph Spine\n', osis)
+ # \pubinfo
+ osis = re.sub(r'\\pubinfo\b\s', '\\periph Publication Information\n', osis)
+
+ # \intro
+ osis = re.sub(r'\\intro\b\s', '\\id INT\n', osis)
+ # \conc
+ osis = re.sub(r'\\conc\b\s', '\\id CNC\n', osis)
+ # \glo
+ osis = re.sub(r'\\glo\b\s', '\\id GLO\n', osis)
+ # \idx
+ osis = re.sub(r'\\idx\b\s', '\\id TDX\n', osis)
+
+ return osis
+
+
def cvtIdentification(osis, relaxedConformance):
"""
Identification
@@ -289,6 +324,10 @@ def convertToOSIS(sFile):
# \rem_text...
osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis)
+
+ # \restore: unpublished, seek example
+ if relaxedConformance:
+ osis = re.sub(r'\\restore\b\s+(.+)', r'<!-- restore - \1 -->', osis)
# \h#_text...
osis = re.sub(r'\\h\b\s+(.+)\s*\n', r'<title type="runningHead">\1</title>\n', osis)
@@ -533,6 +572,13 @@ def convertToOSIS(sFile):
# \b
osis = re.sub(r'\\b\b\s?', r'<lb type="x-p"/>', osis)
+ if relaxedConformance:
+ # TODO: \phi: DEP: Paragraph text, indented with hanging indent
+ # TODO: \ps: DEP: Paragraph text, no break with next paragraph text at chapter boundary
+ # TODO: \psi: DEP: Paragraph text, indented, with no break with next paragraph text (at chapter boundary)
+ # TODO: \p#: Front or back matter text paragraph, level # (if multiple levels)
+ pass
+
return osis
@@ -669,6 +715,12 @@ def convertToOSIS(sFile):
# \xt_
note = re.sub(r'\\xt\s', r'', note)
+ if relaxedConformance:
+ # TODO: \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
+ # TODO: \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference.
+ pass
+
+
# \xo_##SEP##
note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<reference>\1</reference>', note)
@@ -745,6 +797,11 @@ def convertToOSIS(sFile):
# \sls_...\sls*
osis = re.sub(r'\\sls\b\s*(.+?)\\sls\*', r'<foreign>/1</foreign>', osis, flags=re.DOTALL) # find a better mapping than <foreign>?
+ if relaxedConformance:
+ # TODO: \addpn...\addpn*: For chinese words to be dot underline & underline
+ # TODO: \k#: Concordance main entry text or keyword, level #
+ pass
+
return osis
@@ -838,6 +895,10 @@ def convertToOSIS(sFile):
# \wh_...\wh*
osis = re.sub(r'\\wh\s+(.+?)(\s*)\\wh\*', r'\1<index index="Hebrew" level1="\1"/>\2', osis, flags=re.DOTALL)
+ if relaxedConformance:
+ # TODO: \wr...\wr*: OBS: Auxiliary - Wordlist/Glossary Reference
+ pass
+
return osis
@@ -858,6 +919,7 @@ def convertToOSIS(sFile):
periph += 'x-unknown'
periph += '">\n' + contents + '</div>\n'
return periph
+
osis = re.sub(r'\\periph\s+([^\n]+)\s*\n(.+?)(?=(</div type="book">|\\periph\s+))', tagPeriph, osis, flags=re.DOTALL)
return osis
@@ -891,6 +953,14 @@ def convertToOSIS(sFile):
supported: \z<Extension>
We can't really know what these mean, but will preserve them as <milestone/> elements.
"""
+ if relaxedConformance:
+ # publishing assistant markers
+ # \zpa-xb...\zpa-xb* : \periph Book
+ # \zpa-xc...\zpa-xc* : \periph Chapter
+ # \zpa-xv...\zpa-xv* : \periph Verse
+ # \zpa-xd...\zpa-xd* : \periph Description
+ pass
+
# \z
osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-usfm-z-\1"/>', osis)
@@ -983,6 +1053,7 @@ def convertToOSIS(sFile):
# call individual conversion processors in series
osis = cvtPreprocess(osis, relaxedConformance)
+ osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance)
osis = cvtIdentification(osis, relaxedConformance)
osis = cvtIntroductions(osis, relaxedConformance)
osis = cvtTitles(osis, relaxedConformance)
@@ -1010,7 +1081,6 @@ def convertToOSIS(sFile):
return osis
-
def writeOSISHeader(oFile, workID, lang='en'):
oFile.write('<?xml version="1.0" encoding="UTF-8"?>\n<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.'+OSISversion+'.xsd">\n<osisText osisRefWork="Bible" xml:lang="' + lang + '" osisIDWork="' + workID + '">\n<header>\n<work osisWork="' + workID + '"/>\n</header>\n')
@@ -1187,39 +1257,3 @@ if __name__ == "__main__":
print('Unhandled USFM tags: ' + ', '.join(sorted(unhandledTags)) + ' (' + str(len(unhandledTags)) + ' total)')
if not relaxedConformance:
print('Consider using the -r option for relaxed markup processing.')
-
-
-# TOOD: relaxed tags to add:
-# \restore: unpublished, seek example
-# \addpn...\addpn*: For chinese words to be dot underline & underline
-# \p#: Front or back matter text paragraph, level # (if multiple levels)
-# \k#: Concordance main entry text or keyword, level #
-# \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
-# \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference.
-# \tr#: DEP: map to \tr
-# \phi: DEP: Paragraph text, indented with hanging indent
-# \ps: DEP: Paragraph text, no break with next paragraph text at chapter boundary
-# \psi: DEP: Paragraph text, indented, with no break with next paragraph text (at chapter boundary)
-# \wr...\wr*: OBS: Auxiliary - Wordlist/Glossary Reference
-
-# 2.0 periphs to remap
-# FRONT MATTER
-# \pub : \periph Publication Data
-# \toc : \periph Table of Contents
-# \pref : \periph Preface
-# \intro : \periph Introduction
-# BACK MATTER
-# \conc : \periph Concordance
-# \glo : \periph Glossary
-# \idx : \periph Index
-# \maps : \periph Map Index
-# OTHER
-# \cov : \periph Cover
-# \spine : \periph Spine
-# \pubinfo: \periph Publication Information
-
-# publishing assistant markers
-# \zpa-xb...\zpa-xb* : \periph Book
-# \zpa-xc...\zpa-xc* : \periph Chapter
-# \zpa-xv...\zpa-xv* : \periph Verse
-# \zpa-xd...\zpa-xd* : \periph Description