diff options
author | Chris Little <chrislit@crosswire.org> | 2012-08-10 11:30:42 +0000 |
---|---|---|
committer | Chris Little <chrislit@crosswire.org> | 2012-08-10 11:30:42 +0000 |
commit | 88e5189197b7390003a9aeea9def9e41e7279978 (patch) | |
tree | 90b88b36822b31bc805f42597590534a30afc773 /modules | |
parent | d4c721752ed56983e40400cd61b6255ba0619a66 (diff) | |
download | sword-tools-88e5189197b7390003a9aeea9def9e41e7279978.tar.gz |
started organizing & implementing deprecated/obsolete/private-use USFM from stylesheet
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@373 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rwxr-xr-x | modules/python/usfm2osis.py | 108 |
1 files changed, 71 insertions, 37 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py index bccf020..8434d73 100755 --- a/modules/python/usfm2osis.py +++ b/modules/python/usfm2osis.py @@ -268,6 +268,41 @@ def convertToOSIS(sFile): return osis + def cvtRelaxedConformanceRemaps(osis, relaxedConformance): + if not relaxedConformance: + return osis + + # \tr#: DEP: map to \tr + osis = re.sub(r'\\tr\d\b', r'\\tr', osis) + + # remapped 2.0 periphs + # \pub + osis = re.sub(r'\\pub\b\s', '\\periph Publication Data\n', osis) + # \toc : \periph Table of Contents + osis = re.sub(r'\\toc\b\s', '\\periph Table of Contents\n', osis) + # \pref + osis = re.sub(r'\\pref\b\s', '\\periph Preface\n', osis) + # \maps + osis = re.sub(r'\\maps\b\s', '\\periph Map Index\n', osis) + # \cov + osis = re.sub(r'\\cov\b\s', '\\periph Cover\n', osis) + # \spine + osis = re.sub(r'\\spine\b\s', '\\periph Spine\n', osis) + # \pubinfo + osis = re.sub(r'\\pubinfo\b\s', '\\periph Publication Information\n', osis) + + # \intro + osis = re.sub(r'\\intro\b\s', '\\id INT\n', osis) + # \conc + osis = re.sub(r'\\conc\b\s', '\\id CNC\n', osis) + # \glo + osis = re.sub(r'\\glo\b\s', '\\id GLO\n', osis) + # \idx + osis = re.sub(r'\\idx\b\s', '\\id TDX\n', osis) + + return osis + + def cvtIdentification(osis, relaxedConformance): """ Identification @@ -289,6 +324,10 @@ def convertToOSIS(sFile): # \rem_text... osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis) + + # \restore: unpublished, seek example + if relaxedConformance: + osis = re.sub(r'\\restore\b\s+(.+)', r'<!-- restore - \1 -->', osis) # \h#_text... osis = re.sub(r'\\h\b\s+(.+)\s*\n', r'<title type="runningHead">\1</title>\n', osis) @@ -533,6 +572,13 @@ def convertToOSIS(sFile): # \b osis = re.sub(r'\\b\b\s?', r'<lb type="x-p"/>', osis) + if relaxedConformance: + # TODO: \phi: DEP: Paragraph text, indented with hanging indent + # TODO: \ps: DEP: Paragraph text, no break with next paragraph text at chapter boundary + # TODO: \psi: DEP: Paragraph text, indented, with no break with next paragraph text (at chapter boundary) + # TODO: \p#: Front or back matter text paragraph, level # (if multiple levels) + pass + return osis @@ -669,6 +715,12 @@ def convertToOSIS(sFile): # \xt_ note = re.sub(r'\\xt\s', r'', note) + if relaxedConformance: + # TODO: \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference. + # TODO: \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference. + pass + + # \xo_##SEP## note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+u'))', u''+r'<reference>\1</reference>', note) @@ -745,6 +797,11 @@ def convertToOSIS(sFile): # \sls_...\sls* osis = re.sub(r'\\sls\b\s*(.+?)\\sls\*', r'<foreign>/1</foreign>', osis, flags=re.DOTALL) # find a better mapping than <foreign>? + if relaxedConformance: + # TODO: \addpn...\addpn*: For chinese words to be dot underline & underline + # TODO: \k#: Concordance main entry text or keyword, level # + pass + return osis @@ -838,6 +895,10 @@ def convertToOSIS(sFile): # \wh_...\wh* osis = re.sub(r'\\wh\s+(.+?)(\s*)\\wh\*', r'\1<index index="Hebrew" level1="\1"/>\2', osis, flags=re.DOTALL) + if relaxedConformance: + # TODO: \wr...\wr*: OBS: Auxiliary - Wordlist/Glossary Reference + pass + return osis @@ -858,6 +919,7 @@ def convertToOSIS(sFile): periph += 'x-unknown' periph += '">\n' + contents + '</div>\n' return periph + osis = re.sub(r'\\periph\s+([^\n]+)\s*\n(.+?)(?=(</div type="book">|\\periph\s+))', tagPeriph, osis, flags=re.DOTALL) return osis @@ -891,6 +953,14 @@ def convertToOSIS(sFile): supported: \z<Extension> We can't really know what these mean, but will preserve them as <milestone/> elements. """ + if relaxedConformance: + # publishing assistant markers + # \zpa-xb...\zpa-xb* : \periph Book + # \zpa-xc...\zpa-xc* : \periph Chapter + # \zpa-xv...\zpa-xv* : \periph Verse + # \zpa-xd...\zpa-xd* : \periph Description + pass + # \z osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-usfm-z-\1"/>', osis) @@ -983,6 +1053,7 @@ def convertToOSIS(sFile): # call individual conversion processors in series osis = cvtPreprocess(osis, relaxedConformance) + osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance) osis = cvtIdentification(osis, relaxedConformance) osis = cvtIntroductions(osis, relaxedConformance) osis = cvtTitles(osis, relaxedConformance) @@ -1010,7 +1081,6 @@ def convertToOSIS(sFile): return osis - def writeOSISHeader(oFile, workID, lang='en'): oFile.write('<?xml version="1.0" encoding="UTF-8"?>\n<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.'+OSISversion+'.xsd">\n<osisText osisRefWork="Bible" xml:lang="' + lang + '" osisIDWork="' + workID + '">\n<header>\n<work osisWork="' + workID + '"/>\n</header>\n') @@ -1187,39 +1257,3 @@ if __name__ == "__main__": print('Unhandled USFM tags: ' + ', '.join(sorted(unhandledTags)) + ' (' + str(len(unhandledTags)) + ' total)') if not relaxedConformance: print('Consider using the -r option for relaxed markup processing.') - - -# TOOD: relaxed tags to add: -# \restore: unpublished, seek example -# \addpn...\addpn*: For chinese words to be dot underline & underline -# \p#: Front or back matter text paragraph, level # (if multiple levels) -# \k#: Concordance main entry text or keyword, level # -# \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference. -# \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference. -# \tr#: DEP: map to \tr -# \phi: DEP: Paragraph text, indented with hanging indent -# \ps: DEP: Paragraph text, no break with next paragraph text at chapter boundary -# \psi: DEP: Paragraph text, indented, with no break with next paragraph text (at chapter boundary) -# \wr...\wr*: OBS: Auxiliary - Wordlist/Glossary Reference - -# 2.0 periphs to remap -# FRONT MATTER -# \pub : \periph Publication Data -# \toc : \periph Table of Contents -# \pref : \periph Preface -# \intro : \periph Introduction -# BACK MATTER -# \conc : \periph Concordance -# \glo : \periph Glossary -# \idx : \periph Index -# \maps : \periph Map Index -# OTHER -# \cov : \periph Cover -# \spine : \periph Spine -# \pubinfo: \periph Publication Information - -# publishing assistant markers -# \zpa-xb...\zpa-xb* : \periph Book -# \zpa-xc...\zpa-xc* : \periph Chapter -# \zpa-xv...\zpa-xv* : \periph Verse -# \zpa-xd...\zpa-xd* : \periph Description |