summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorChris Little <chrislit@crosswire.org>2012-08-10 17:09:14 +0000
committerChris Little <chrislit@crosswire.org>2012-08-10 17:09:14 +0000
commit0b7afc544455c9cdc31e129ee603bf8f021e8001 (patch)
treee6a8a146494748d10440eb08a6539119103598c0 /modules
parent901e20627dce8fe68bd9d6adba3173db06d64d7a (diff)
downloadsword-tools-0b7afc544455c9cdc31e129ee603bf8f021e8001.tar.gz
cleaned up spacing in output
fixed output validation errors due to addition of intro tags git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@375 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rwxr-xr-xmodules/python/usfm2osis.py120
1 files changed, 63 insertions, 57 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py
index e18e6da..698292c 100755
--- a/modules/python/usfm2osis.py
+++ b/modules/python/usfm2osis.py
@@ -35,7 +35,7 @@ scriptVersion = '0.5'
# Employ best-practice conformant OSIS
# Employ modularity (functions rather than a big long script)
# Employ the same command-line syntax as usfm2osis.pl
-# Use & abuse Unicode tags (http://unicode.org/charts/PDF/UE0000.pdf) to simplify Regex processing
+# Use non-characters for milestoning
### Roadmap:
# 0.5 initial commit, including full coverage of core USFM tags
@@ -49,8 +49,8 @@ scriptVersion = '0.5'
# 1.x SWORD module output?, requiring SWORD bindings
### Key to non-characters:
-# Used : ﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟
-# Unused : ﷠﷡﷢﷣﷤﷥﷦﷧﷨﷩﷪﷫﷬﷭﷮﷯
+# Used : ﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡
+# Unused : ﷧﷨﷩﷪﷫﷬﷭﷮﷯
# ﷐ book
# ﷑ chapter
# ﷒ verse
@@ -67,6 +67,13 @@ scriptVersion = '0.5'
# ﷝ s4
# ﷞ s5
# ﷟ notes
+# ﷠ intro-list
+# ﷡ intro-outline
+# ﷢ is1
+# ﷣ is2
+# ﷤ is3
+# ﷥ is4
+# ﷦ is5
import sys, codecs, re
from encodings.aliases import aliases
@@ -325,7 +332,7 @@ def convertToOSIS(sFile):
# \rem_text...
osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis)
- # \restore: unpublished, seek example
+ # \restore_text...
if relaxedConformance:
osis = re.sub(r'\\restore\b\s+(.+)', r'<!-- restore - \1 -->', osis)
@@ -359,20 +366,23 @@ def convertToOSIS(sFile):
# \imt#_text...
osis = re.sub(r'\\imt(\d?)\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction">' + m.group(2) + '</title>', osis)
+ # \imte#_text...
+ osis = re.sub(r'\\imte(\d?)\b\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction-end">' + m.group(2) + '</title>', osis)
+
# \is#_text...
osis = re.sub(r'\\is1?\s+(.+)', lambda m: u'﷚<div type="section" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷚[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, re.DOTALL)
- osis = re.sub(r'\\is2\s+(.+)', lambda m: u'﷛<div type="subsection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷛[^﷕﷐﷖﷗﷘﷙﷚﷛]+)', r'\1'+u'</div>﷛\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷚[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\is2\s+(.+)', lambda m: u'﷛<div type="subSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'(﷛[^﷕﷐﷖﷗﷘﷙﷚﷛]+)', r'\1'+u'</div>﷛\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\is3\s+(.+)', lambda m: u'﷜<div type="x-subSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷜[^﷕﷐﷖﷗﷘﷙﷚﷛﷜]+)', r'\1'+u'</div>﷜\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷜[^﷕﷐﷖﷗﷘﷙﷚﷛﷜]+)', r'\1'+u'</div>﷜\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\is4\s+(.+)', lambda m: u'﷝<div type="x-subSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷝[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝]+)', r'\1'+u'</div>﷝\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷝[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝]+)', r'\1'+u'</div>﷝\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\is5\s+(.+)', lambda m: u'﷞<div type="x-subSubSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷞[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝﷞]+)', r'\1'+u'</div>﷞\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷞[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝﷞]+)', r'\1'+u'</div>﷞\n', osis, flags=re.DOTALL)
# \ip_text...
- osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p subType="x-introduction">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p subType="x-introduction">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
# \ipi_text...
# \im_text...
@@ -381,11 +391,11 @@ def convertToOSIS(sFile):
# \imq_text...
# \ipr_text...
pType = {'ipi':'x-indented', 'im':'x-noindent', 'imi':'x-noindent-indented', 'ipq':'x-quote', 'imq':'x-noindent-quote', 'ipr':'x-right'}
- osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
# \iq#_text...
- osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL)
# \ib
osis = re.sub(r'\\ib\b\s?', '<lb type="x-p"/>', osis)
@@ -394,18 +404,18 @@ def convertToOSIS(sFile):
osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace('<lb type="x-p"/>', '</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
# \ili#_text...
- osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-1" subType="x-introduction">﷠\1﷠</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-\1" subType="x-introduction">﷠\2﷠</item>', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
+ osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', ur'﷓<list>\1</list>﷓', osis, flags=re.DOTALL)
# \iot_text...
# \io#_text...(references range)
- osis = re.sub(r'\\io\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="head">\1</item type="head">', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\io\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-1" subType="x-introduction">﷡\1﷡</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="x-indent-\1" subType="x-introduction">﷡\2﷡</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', ur'<item type="head">﷡\1﷡</item type="head">', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<div type="outline"><list>\1</list></div>', osis, flags=re.DOTALL)
+ osis = re.sub(u'(<item [^﷐﷑﷓﷔﷠]+</item>)', ur'﷓<div type="outline"><list>\1</list></div>﷓', osis, flags=re.DOTALL)
osis = re.sub('item type="head"', 'head', osis)
# \ior_text...\ior*
@@ -417,9 +427,6 @@ def convertToOSIS(sFile):
# \iqt_text...\iqt*
osis = re.sub(r'\\iqt\s+(.+?)\\iqt\*', r'<q subType="x-introduction">\1</q>', osis, flags=re.DOTALL)
- # \imte#_text...
- osis = re.sub(r'\\imte(\d?)\b\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction-end">' + m.group(2) + '</title>', osis)
-
# \ie
osis = re.sub(r'\\ie\b\s*', '<milestone type="x-usfm-ie"/>', osis)
@@ -433,33 +440,33 @@ def convertToOSIS(sFile):
"""
# \ms#_text...
osis = re.sub(r'\\ms1?\s+(.+)', lambda m: u'﷕<div type="majorSection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷕[^﷕﷐]+)', r'\1'+u'</div>﷕\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷕[^﷕﷐]+)', r'\1'+u'</div>﷕\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\ms2\s+(.+)', lambda m: u'﷖<div type="majorSection" n="2"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷖[^﷕﷐﷖]+)', r'\1'+u'</div>﷖\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷖[^﷕﷐﷖]+)', r'\1'+u'</div>﷖\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\ms3\s+(.+)', lambda m: u'﷗<div type="majorSection" n="3"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷗[^﷕﷐﷖﷗]+)', r'\1'+u'</div>﷗\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷗[^﷕﷐﷖﷗]+)', r'\1'+u'</div>﷗\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\ms4\s+(.+)', lambda m: u'﷘<div type="majorSection" n="4"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷘[^﷕﷐﷖﷗﷘]+)', r'\1'+u'</div>﷘\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷘[^﷕﷐﷖﷗﷘]+)', r'\1'+u'</div>﷘\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\ms5\s+(.+)', lambda m: u'﷙<div type="majorSection" n="5"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷙[^﷕﷐﷖﷗﷘﷙]+)', r'\1'+u'</div>﷙\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷙[^﷕﷐﷖﷗﷘﷙]+)', r'\1'+u'</div>﷙\n', osis, flags=re.DOTALL)
# \mr_text...
osis = re.sub(r'\\mr\s+(.+)', u'﷔<title type="scope"><reference>'+r'\1</reference></title>', osis)
# \s#_text...
osis = re.sub(r'\\s1?\s+(.+)', lambda m: u'﷚<div type="section"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷚[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷚<div type="section">[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, flags=re.DOTALL)
if relaxedConformance:
osis = re.sub(r'\\ss\s+', r'\\s2 ', osis)
osis = re.sub(r'\\sss\s+', r'\\s3 ', osis)
- osis = re.sub(r'\\s2\s+(.+)', lambda m: u'﷛<div type="subsection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷛[^﷕﷐﷖﷗﷘﷙﷚﷛]+)', r'\1'+u'</div>﷛\n', osis, re.DOTALL)
+ osis = re.sub(r'\\s2\s+(.+)', lambda m: u'﷛<div type="subSection"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'(﷛<div type="subSection">[^﷕﷐﷖﷗﷘﷙﷚﷛]+)', r'\1'+u'</div>﷛\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\s3\s+(.+)', lambda m: u'﷜<div type="x-subSubSection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷜[^﷕﷐﷖﷗﷘﷙﷚﷛﷜]+)', r'\1'+u'</div>﷜\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷜<div type="x-subSubSection">[^﷕﷐﷖﷗﷘﷙﷚﷛﷜]+)', r'\1'+u'</div>﷜\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\s4\s+(.+)', lambda m: u'﷝<div type="x-subSubSubSection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷝[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝]+)', r'\1'+u'</div>﷝\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷝<div type="x-subSubSubSection">[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝]+)', r'\1'+u'</div>﷝\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\s5\s+(.+)', lambda m: u'﷞<div type="x-subSubSubSubSection"><title>' + m.group(1) + '</title>', osis)
- osis = re.sub(u'(﷞[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝﷞]+)', r'\1'+u'</div>﷞\n', osis, re.DOTALL)
+ osis = re.sub(u'(﷞<div type="x-subSubSubSubSection">[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝﷞]+)', r'\1'+u'</div>﷞\n', osis, flags=re.DOTALL)
# \sr_text...
osis = re.sub(r'\\sr\s+(.+)', ur'﷔<title type="scope"><reference>\1</reference></title>', osis)
@@ -564,10 +571,10 @@ def convertToOSIS(sFile):
# \li#(_text...)
osis = re.sub(r'\\ph\b\s*', r'\\li ', osis)
osis = re.sub(r'\\ph(\d)\b\s*', r'\\li\1 ', osis)
- osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
+ osis = re.sub(u'(<item [^﷐﷑﷓﷔﷠﷡]+</item>)', ur'﷓<list>\1</list>﷓', osis, flags=re.DOTALL)
# \b
osis = re.sub(r'\\b\b\s?', '<lb type="x-p"/>', osis)
@@ -591,14 +598,14 @@ def convertToOSIS(sFile):
osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL)
# \q#(_text...)
- osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
# \qr_text...
# \qc_text...
# \qm#(_text...)
qType = {'qr':'x-right', 'qc':'x-center', 'qm':'x-embedded" level="1', 'qm1':'x-embedded" level="1', 'qm2':'x-embedded" level="2', 'qm3':'x-embedded" level="3', 'qm4':'x-embedded" level="4', 'qm5':'x-embedded" level="5'}
- osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: '<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', lambda m: '<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
osis = osis.replace('\n</l>', '</l>\n')
osis = re.sub(u'(<l [^﷐﷑﷓﷔]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
@@ -649,7 +656,7 @@ def convertToOSIS(sFile):
note = re.sub(r'\\ft\s', '', note)
# \fr_##SEP##
- note = re.sub(r'\\fr\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<reference>\1</reference>', note)
+ note = re.sub(r'\\fr\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<reference type="annotateRef">\1</reference>', note)
# \fk_
note = re.sub(r'\\fk\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
@@ -712,21 +719,20 @@ def convertToOSIS(sFile):
# \xq_
note = re.sub(r'\\xq\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
- # \xt_
- note = re.sub(r'\\xt\s', '', note)
+ # \xo_##SEP##
+ note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<reference type="annotateRef">\1</reference>', note)
+ # \xk_
+ note = re.sub(r'\\xk\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
+
+ # \xt_ # This isn't guaranteed to be *the* reference, but it's a good guess.
+ note = re.sub(r'\\xt\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<reference>\1</reference>', note)
+
if relaxedConformance:
# TODO: \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
# TODO: \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference.
pass
-
- # \xo_##SEP##
- note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<reference>\1</reference>', note)
-
- # \xk_
- note = re.sub(r'\\xk\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
-
if relaxedConformance:
note = note.replace(r'\xq*', '')
note = note.replace(r'\xt*', '')
@@ -743,7 +749,7 @@ def convertToOSIS(sFile):
supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc*
"""
# \x_+_...\x*
- osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference"><reference>' + m.group(2) + u'</reference>﷟</note>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: '<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference">' + m.group(2) + u'﷟</note>', osis, flags=re.DOTALL)
osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
@@ -871,7 +877,7 @@ def convertToOSIS(sFile):
if fig_cap:
figure += '<caption>' + fig_cap + '</caption>\n'
if fig_ref:
- figure += '<reference>' + fig_ref + '</reference>\n'
+ figure += '<reference type="annotateRef">' + fig_ref + '</reference>\n'
if fig_desc:
figure += '<!-- fig DESC - ' + fig_desc + ' -->\n'
if fig_loc:
@@ -1019,12 +1025,12 @@ def convertToOSIS(sFile):
osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis)
osis = osis.replace('<lb type="x-p"/>', '<lb/>')
# delete Unicode tags
- for c in u'﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟':
+ for c in u'﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡﷢﷣﷤﷥﷦﷧﷨﷩﷪﷫﷬﷭﷮﷯':
osis = osis.replace(c, '')
- for endBlock in ['p', 'div', 'note', 'l', 'lg', 'chapter', 'verse']:
- osis = re.sub(' +</'+endBlock+'>', '</'+endBlock+r'>', osis)
- osis = re.sub(' +<'+endBlock+'( eID=[^/>]+/>)', '</'+endBlock+r'\1', osis)
+ for endBlock in ['p', 'div', 'note', 'l', 'lg', 'chapter', 'verse', 'head', 'title', 'item', 'list']:
+ osis = re.sub('\s+</'+endBlock+'>', '</'+endBlock+r'>\n', osis)
+ osis = re.sub('\s+<'+endBlock+'( eID=[^/>]+/>)', '<'+endBlock+r'\1'+'\n', osis)
osis = re.sub(' +((</[^>]+>)+) *', r'\1 ', osis)
# strip extra spaces & newlines