summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorChris Little <chrislit@crosswire.org>2012-08-10 10:46:41 +0000
committerChris Little <chrislit@crosswire.org>2012-08-10 10:46:41 +0000
commitd4c721752ed56983e40400cd61b6255ba0619a66 (patch)
tree83351faa9b279723248be8da08fc0545243bfcac /modules
parentc2c9a64c7c469f3d5a75dae3397dd164476f95a5 (diff)
downloadsword-tools-d4c721752ed56983e40400cd61b6255ba0619a66.tar.gz
re-implemented \i- introduction tags, independent of non-intro versions
cleaned up extraneous spaces removed ###TESTED### markers git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@372 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rwxr-xr-xmodules/python/usfm2osis.py202
1 files changed, 122 insertions, 80 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py
index a4812e6..bccf020 100755
--- a/modules/python/usfm2osis.py
+++ b/modules/python/usfm2osis.py
@@ -54,7 +54,7 @@ scriptVersion = '0.5'
# ﷐ book
# ﷑ chapter
# ﷒ verse
-# ﷓ paragraph
+# ﷓ paragraph
# ﷔ title
# ﷕ ms1
# ﷖ ms2
@@ -95,7 +95,7 @@ bookDict = {
# DC - Eastern Orthodox
'3MA':'3Macc', '4MA':'4Macc', '1ES':'1Esd', '2ES':'2Esd', 'MAN':'PrMan', 'PS2':'Ps151',
# Rahlfs' LXX
- 'ODA':'Odes', 'PSS':'PssSol',
+ 'ODA':'Odes', 'PSS':'PssSol',
# Esdrae
'EZA':'4Ezra', '5EZ':'5Ezra', '6EZ':'6Ezra',
# Inconsistency with Esther
@@ -134,8 +134,8 @@ addBookDict = {
'1CL':'1Clem', '2CL':'2Clem', 'SHE':'Herm', 'LBA':'Barn', 'DID':'Did',
###
# Proposed replacements <http://lc.bfbs.org.uk/e107_files/downloads/canonicalissuesinparatext.pdf>
- 'ODE':'Odes',
-
+ 'ODE':'Odes',
+
# Additional biblical books
'ADE':'AddEsth'
}
@@ -148,7 +148,7 @@ canonicalOrder = (
# DC - Catholic
'TOB', 'JDT', 'ESG', 'ADE', 'WIS', 'SIR', 'PSS', 'BAR', 'LJE', 'DAG', 'S3Y', 'SUS', 'BEL', '1MA', '2MA',
# DC - Eastern Orthodox
- '1ES', 'MAN', 'PS2', '3MA', '2ES', '4MA',
+ '1ES', 'MAN', 'PS2', '3MA', '2ES', '4MA',
# NT
'MAT', 'MRK', 'LUK', 'JHN', 'ACT', 'ROM', '1CO', '2CO', 'GAL', 'EPH', 'PHP', 'COL', '1TH', '2TH', '1TI', '2TI',
'TIT', 'PHM', 'HEB', 'JAS', '1PE', '2PE', '1JN', '2JN', '3JN', 'JUD', 'REV',
@@ -157,7 +157,7 @@ canonicalOrder = (
# Esdrae
'EZA', '5EZ', '6EZ',
# Inconsistency with Esther
-
+
# Syriac
'PS3', '2BA', 'LBA',
# Ethiopic
@@ -274,31 +274,31 @@ def convertToOSIS(sFile):
supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3
"""
global loc2osisBk, osis2locBk
- # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.) ###TESTED###
+ # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\\n]*?)\n(.*)(?=\\id|$)', lambda m: u'﷐<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + u'</div type="book">﷐\n' , osis, flags=re.DOTALL)
# keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis)
if osisBook:
osisBook = bookDict[osisBook.group(1)]
- # \ide_<ENCODING> ###TESTED###
+ # \ide_<ENCODING>
osis = re.sub(r'\\ide\b.*\n', r'', osis) # delete, since this was handled above
# \sts_<STATUS CODE>
- osis = re.sub(r'\\sts\b\s+(.+)\s*\n', r'<milestone type="x-sts" n="\1"/>\n', osis)
+ osis = re.sub(r'\\sts\b\s+(.+)\s*\n', r'<milestone type="x-usfm-sts" n="\1"/>\n', osis)
- # \rem_text... ###TESTED###
+ # \rem_text...
osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis)
- # \h#_text... ###TESTED###
+ # \h#_text...
osis = re.sub(r'\\h\b\s+(.+)\s*\n', r'<title type="runningHead">\1</title>\n', osis)
- # TODO: \h1-5
+ osis = re.sub(r'\\h(\d)\b\s+(.+)\s*\n', r'<title type="runningHead" n="\1">\2</title>\n', osis)
# \toc1_text...
- osis = re.sub(r'\\toc1\b\s+(.+)\s*\n', r'<milestone type="x-toc1" n="\1"/>\n', osis)
+ osis = re.sub(r'\\toc1\b\s+(.+)\s*\n', r'<milestone type="x-usfm-toc1" n="\1"/>\n', osis)
# \toc2_text...
- osis = re.sub(r'\\toc2\b\s+(.+)\s*\n', r'<milestone type="x-toc2" n="\1"/>\n', osis)
+ osis = re.sub(r'\\toc2\b\s+(.+)\s*\n', r'<milestone type="x-utfm-toc2" n="\1"/>\n', osis)
# \toc3_text...
locBook = re.search(r'\\toc3\b\s+(.+)\s*\n', osis)
@@ -307,7 +307,7 @@ def convertToOSIS(sFile):
if osisBook:
osis2locBk[osisBook]=locBook
loc2osisBk[locBook]=osisBook
- osis = re.sub(r'\\toc3\b\s+(.+)\s*\n', lambda m: r'<milestone type="x-toc3" n="\1"/>\n', osis)
+ osis = re.sub(r'\\toc3\b\s+(.+)\s*\n', lambda m: r'<milestone type="x-usfm-toc3" n="\1"/>\n', osis)
return osis
@@ -315,33 +315,75 @@ def convertToOSIS(sFile):
def cvtIntroductions(osis, relaxedConformance):
"""
Introductions
- supported:
- unsupported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie
+ supported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili#, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie
"""
- # \imt#
- # \is#
- # \ip
- # \ipi
- # \im
- # \imi
- # \ipq
- # \imq
- # \ipr
- # \iq#
+ # \imt#_text...
+ osis = re.sub(r'\\imt(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-introduction">' + m.group(2) + r'</title>', osis)
+
+ # \is#_text...
+ osis = re.sub(r'\\is1?\s+(.+)', lambda m: u'﷚<div type="section" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'(﷚[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, re.DOTALL)
+ osis = re.sub(r'\\is2\s+(.+)', lambda m: u'﷛<div type="subsection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'(﷛[^﷕﷐﷖﷗﷘﷙﷚﷛]+)', r'\1'+u'</div>﷛\n', osis, re.DOTALL)
+ osis = re.sub(r'\\is3\s+(.+)', lambda m: u'﷜<div type="x-subSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'(﷜[^﷕﷐﷖﷗﷘﷙﷚﷛﷜]+)', r'\1'+u'</div>﷜\n', osis, re.DOTALL)
+ osis = re.sub(r'\\is4\s+(.+)', lambda m: u'﷝<div type="x-subSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'(﷝[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝]+)', r'\1'+u'</div>﷝\n', osis, re.DOTALL)
+ osis = re.sub(r'\\is5\s+(.+)', lambda m: u'﷞<div type="x-subSubSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis)
+ osis = re.sub(u'(﷞[^﷕﷐﷖﷗﷘﷙﷚﷛﷜﷝﷞]+)', r'\1'+u'</div>﷞\n', osis, re.DOTALL)
+
+ # \ip_text...
+ osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p subType="x-introduction">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+
+ # \ipi_text...
+ # \im_text...
+ # \imi_text...
+ # \ipq_text...
+ # \imq_text...
+ # \ipr_text...
+ pType = {'ipi':'x-indented', 'im':'x-noindent', 'imi':'x-noindent-indented', 'ipq':'x-quote', 'imq':'x-noindent-quote', 'ipr':'x-right'}
+ osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
+
+ # \iq#_text...
+ osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL)
+
# \ib
- # \ili#
- # \iot
- # \io#
- # \ior...\ior*
- # \iex
- # \iqt...\iqt*
- # \imte#
+ osis = re.sub(r'\\ib\b\s?', r'<lb type="x-p"/>', osis)
+ osis = osis.replace('\n</l>', '</l>\n')
+ osis = re.sub(u'(<l [^﷐﷑﷓﷔]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
+ osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="x-p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
+
+ # \ili#_text...
+ osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
+ osis = osis.replace('\n</item>', '</item>\n')
+ osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
+
+ # \iot_text...
+ # \io#_text...(references range)
+ osis = re.sub(r'\\io\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="head">\1</item type="head">', osis, flags=re.DOTALL)
+ osis = osis.replace('\n</item>', '</item>\n')
+ osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<div type="outline"><list>\1</list></div>', osis, flags=re.DOTALL)
+ osis = re.sub(r'item type="head"', r'head', osis)
+
+ # \ior_text...\ior*
+ osis = re.sub(r'\\ior\b\s+(.+?)\\ior\*', r'<reference>\1</reference>', osis, flags=re.DOTALL)
+
+ # \iex # TODO: look for example; I have no idea what this would look like in context
+ osis = re.sub(r'\\iex\b\s*(.+?)'+u'?=(\s*(\\c|</div type="book">﷐))', r'<div type="bridge">\1</div>', osis, flags=re.DOTALL)
+
+ # \iqt_text...\iqt*
+ osis = re.sub(r'\\iqt\s+(.+?)\\iqt\*', r'<q subType="x-introduction">\1</q>', osis, flags=re.DOTALL)
+
+ # \imte#_text...
+ osis = re.sub(r'\\imte(\d?)\b\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-introduction-end">' + m.group(2) + r'</title>', osis)
+
# \ie
- # encapsulate introduction elements in a <div>
- #osis = re.sub(r'(\\i(mt|mt\d+|s|d\d+|p|pi|m|mi|pq|mq|pr|q|q\d+|b|li|ot|o|o\d+|or|or*|ex|qt|qt*|mte|e)\b.+?)(?=\n\\(c|s|m|p|d))', u'<div type="introduction">'+r'\1'+u'</div>\n', osis, flags=re.DOTALL)
- # map all introduction elements to their non-introduction equivalents
- #for e in [r'mt', r'mt\d+', r's', r'd\d+', r'p', r'pi', r'm', r'mi', r'pq', r'mq', r'pr', r'q', r'q\d+', r'b', r'li', r'ot', r'o', r'o\d+', r'or', r'or*', r'ex', r'qt', r'qt*', r'mte', r'e']:
- # osis = re.sub(r'\\i('+e+r')\b', r'\\\1', osis)
+ osis = re.sub(r'\\ie\b\s*', r'<milestone type="x-usfm-ie"/>', osis)
+
return osis
@@ -350,7 +392,7 @@ def convertToOSIS(sFile):
Titles, Headings, and Labels
supported: \mt#, \mte#, \ms#, \mr, \s#, \sr, \r, \rq...\rq*, \d, \sp
"""
- # \ms#_text... ###TESTED###
+ # \ms#_text...
osis = re.sub(r'\\ms1?\s+(.+)', lambda m: u'﷕<div type="majorSection"><title>' + m.group(1) + '</title>', osis)
osis = re.sub(u'(﷕[^﷕﷐]+)', r'\1'+u'</div>﷕\n', osis, re.DOTALL)
osis = re.sub(r'\\ms2\s+(.+)', lambda m: u'﷖<div type="majorSection" n="2"><title>' + m.group(1) + '</title>', osis)
@@ -365,7 +407,7 @@ def convertToOSIS(sFile):
# \mr_text...
osis = re.sub(r'\\mr\s+(.+)', u'﷔<title type="scope"><reference>'+r'\1</reference></title>', osis)
- # \s#_text... ###TESTED###
+ # \s#_text...
osis = re.sub(r'\\s1?\s+(.+)', lambda m: u'﷚<div type="section"><title>' + m.group(1) + '</title>', osis)
osis = re.sub(u'(﷚[^﷕﷐﷖﷗﷘﷙﷚]+)', r'\1'+u'</div>﷚\n', osis, re.DOTALL)
if relaxedConformance:
@@ -387,13 +429,13 @@ def convertToOSIS(sFile):
# \rq_text...\rq*
osis = re.sub(r'\\rq\s+(.+?)\\rq\*', u'<reference type="source">'+r'\1</reference>', osis, flags=re.DOTALL)
- # \d_text... ###TESTED###
+ # \d_text...
osis = re.sub(r'\\d\s+(.+)', u'﷔<title canonical="true" type="psalm">'+r'\1</title>', osis)
- # \sp_text... ###TESTED###
+ # \sp_text...
osis = re.sub(r'\\sp\s+(.+)', r'<speaker>\1</speaker>', osis)
- # \mt#_text... ###TESTED###
+ # \mt#_text...
osis = re.sub(r'\\mt(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main">' + m.group(2) + r'</title>', osis)
# \mte#_text...
osis = re.sub(r'\\mte(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-end">' + m.group(2) + r'</title>', osis)
@@ -406,7 +448,7 @@ def convertToOSIS(sFile):
Chapters and Verses
supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
"""
- # \c_# ###TESTED###
+ # \c_#
osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: u'﷑<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + u'<chapter eID="$BOOK$.' + m.group(1) + u'"/>﷓\n', osis, flags=re.DOTALL)
# \cp_#
@@ -432,7 +474,7 @@ def convertToOSIS(sFile):
# \cd_# <--This # seems to be an error
osis = re.sub(r'\\cd\b\s+(.+)', u'﷔<title type="x-description">'+r'\1</title>', osis)
- # \v_# ###TESTED###
+ # \v_#
osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: u'﷒<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + r'" sID="$BOOK$.$CHAP$.' + m.group(1) + r'"/>' + m.group(2) + r'<verse eID="$BOOK$.$CHAP$.' + m.group(1) + u'"/>﷒\n', osis, flags=re.DOTALL)
# \vp_#\vp*
@@ -460,36 +502,36 @@ def convertToOSIS(sFile):
Paragraphs
supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
"""
- # \p(_text...) ###TESTED###
- osis = re.sub(r'\\p\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p>\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+ # \p(_text...)
+ osis = re.sub(r'\\p\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p>\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
# \pc(_text...)
# \pr(_text...)
- # \m(_text...) ###TESTED###
+ # \m(_text...)
# \pmo(_text...)
# \pm(_text...)
# \pmc(_text...)
# \pmr_text... # deprecated: map to same as \pr
# \pi#(_Sample text...)
# \mi(_text...)
- # \nb ###TESTED###
+ # \nb
pType = {'pc':'x-center', 'pr':'x-right', 'm':'x-noindent', 'pmo':'x-embedded-opening', 'pm':'x-embedded', 'pmc':'x-embedded-closing', 'pmr':'x-right', 'pi':'x-indented-1', 'pi1':'x-indented-1', 'pi2':'x-indented-2', 'pi3':'x-indented-3', 'pi4':'x-indented-4', 'pi5':'x-indented-5', 'mi':'x-noindent-indented', 'nb':'x-nobreak'}
- osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + u'﷓</p>\n', osis, flags=re.DOTALL)
# \cls_text...
- osis = re.sub(r'\\m\s+(.+?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<closer>' + m.group(1) + u'﷓</closer>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\m\s+(.+?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<closer>' + m.group(1) + u'﷓</closer>\n', osis, flags=re.DOTALL)
# \ph#(_text...)
- # \li#(_text...) ###TESTED###
+ # \li#(_text...)
osis = re.sub(r'\\ph\b\s*', r'\\li ', osis)
- osis = re.sub(r'\\ph(\d+)\b\s*', r'\\li\1 ', osis)
+ osis = re.sub(r'\\ph(\d)\b\s*', r'\\li\1 ', osis)
osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\li(\d+)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
osis = re.sub(u'(<item [^﷐﷑﷓﷔]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
- # \b ###TESTED###
- osis = re.sub(r'\\b\b\s?', r'<lb type="p"/>', osis)
+ # \b
+ osis = re.sub(r'\\b\b\s?', r'<lb type="x-p"/>', osis)
return osis
@@ -502,21 +544,21 @@ def convertToOSIS(sFile):
# \qs_(Selah)\qs*
osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL)
- # \q#(_text...) ###TESTED###
+ # \q#(_text...)
osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\q(\d+)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
# \qr_text...
# \qc_text...
# \qm#(_text...)
qType = {'qr':'x-right', 'qc':'x-center', 'qm':'x-embedded" level="1', 'qm1':'x-embedded" level="1', 'qm2':'x-embedded" level="2', 'qm3':'x-embedded" level="3', 'qm4':'x-embedded" level="4', 'qm5':'x-embedded" level="5'}
- osis = re.sub(r'\\(qr|qc|qm\d+)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+u'﷐﷑﷓﷔'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
osis = osis.replace('\n</l>', '</l>\n')
osis = re.sub(u'(<l [^﷐﷑﷓﷔]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
- # \b ###TESTED###
- osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
+ # \b
+ osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="x-p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
# \qa_text...
osis = re.sub(r'\\qa\s+(.+)', u'﷔<title type="acrostic">'+r'\1</title>', osis)
@@ -551,13 +593,13 @@ def convertToOSIS(sFile):
# \fdc_refs...\fdc*
note = re.sub(r'\\fdc\b\s(.+?)\\fdc\b\*', r'<seg editions="dc">\1</seg>', note)
- # \fq_ ###TESTED###
+ # \fq_
note = re.sub(r'\\fq\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
- # \fqa_ ###TESTED###
+ # \fqa_
note = re.sub(r'\\fqa\b\s(.+?)(?=(\\f|'+u'﷟))', u'﷟'+r'<rdg type="alternate">\1</rdg>', note)
- # \ft_ ###TESTED###
+ # \ft_
note = re.sub(r'\\ft\s', r'', note)
# \fr_##SEP##
@@ -595,7 +637,7 @@ def convertToOSIS(sFile):
Footnotes
supported:\f...\f*, \fe...\fe*, \fr, \fk, \fq, \fqa, \fl, \fp, \fv, \ft, \fdc...\fdc*, \fm...\fm*
"""
- # \f_+_...\f* ###TESTED###
+ # \f_+_...\f*
osis = re.sub(r'\\f\s+([^\s\\]+)?\s*(.+?)\s*\\f\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="foot">' + m.group(2) + u'﷟</note>', osis, flags=re.DOTALL)
# \fe_+_...\fe*
@@ -624,7 +666,7 @@ def convertToOSIS(sFile):
# \xq_
note = re.sub(r'\\xq\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<catchWord>\1</catchWord>', note)
- # \xt_ ###TESTED###
+ # \xt_
note = re.sub(r'\\xt\s', r'', note)
# \xo_##SEP##
@@ -648,7 +690,7 @@ def convertToOSIS(sFile):
Cross References
supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc*
"""
- # \x_+_...\x* ###TESTED###
+ # \x_+_...\x*
osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference"><reference>' + m.group(2) + u'</reference>﷟</note>', osis, flags=re.DOTALL)
osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
@@ -664,10 +706,10 @@ def convertToOSIS(sFile):
Special Text
supported: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \nd...\nd*, \ord...\ord*, \pn...\pn*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \tl...\tl*, \wj...\wj*
"""
- # \add_...\add* ###TESTED###
+ # \add_...\add*
osis = re.sub(r'\\add\s+(.+?)\\add\*', r'<transChange type="added">\1</transChange>', osis, flags=re.DOTALL)
- # \wj_...\wj* ###TESTED###
+ # \wj_...\wj*
osis = re.sub(r'\\wj\s+(.+?)\\wj\*', r'<q who="Jesus" marker="">\1</q>', osis, flags=re.DOTALL)
# \nd_...\nd*
@@ -676,7 +718,7 @@ def convertToOSIS(sFile):
# \pn_...\pn*
osis = re.sub(r'\\pn\s+(.+?)\\pn\*', r'<name>\1</name>', osis, flags=re.DOTALL)
- # \qt_...\qt*
+ # \qt_...\qt* # TODO:should this be <q>?
osis = re.sub(r'\\qt\s+(.+?)\\qt\*', r'<seg type="otPassage">\1</seg>', osis, flags=re.DOTALL)
# \sig_...\sig*
@@ -688,16 +730,16 @@ def convertToOSIS(sFile):
# \tl_...\tl*
osis = re.sub(r'\\tl\s+(.+?)\\tl\*', r'<foreign>\1</foreign>', osis, flags=re.DOTALL)
- # \bk_...\bk* ###TESTED###
+ # \bk_...\bk*
osis = re.sub(r'\\bk\s+(.+?)\\bk\*', r'<name type="x-workTitle">\1</name>', osis, flags=re.DOTALL)
- # \k_...\k* ###TESTED###
+ # \k_...\k*
osis = re.sub(r'\\k\s+(.+?)\\k\*', r'<seg type="keyword">\1</seg>', osis, flags=re.DOTALL)
# \lit
- osis = re.sub(r'\\lit\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="x-liturgical">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\lit\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'﷓<p type="x-liturgical">\n' + m.group(1) + u'﷓</p>\n', osis, flags=re.DOTALL)
- # \dc_...\dc* #### TODO: Find an example---should this really be transChange?
+ # \dc_...\dc* # TODO: Find an example---should this really be transChange?
osis = re.sub(r'\\dc\b\s*(.+?)\\dc\*', r'<transChange type="added" editions="dc">\1</transChange>', osis, flags=re.DOTALL)
# \sls_...\sls*
@@ -717,7 +759,7 @@ def convertToOSIS(sFile):
# \bd_...\bd*
osis = re.sub(r'\\bd\s+(.+?)\\bd\*', r'<hi type="bold">\1</hi>', osis, flags=re.DOTALL)
- # \it_...\it* ###TESTED###
+ # \it_...\it*
osis = re.sub(r'\\it\s+(.+?)\\it\*', r'<hi type="italic">\1</hi>', osis, flags=re.DOTALL)
# \bdit_...\bdit*
@@ -781,7 +823,7 @@ def convertToOSIS(sFile):
return figure
osis = re.sub(r'\\fig\b\s+([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\\]*)\s*\\fig\*', makeFigure, osis)
- # \ndx_...\ndx* #TODO tag with x-glossary instead of <index/>? Is <index/> containerable?
+ # \ndx_...\ndx* # TODO tag with x-glossary instead of <index/>? Is <index/> containerable?
osis = re.sub(r'\\ndx\s+(.+?)(\s*)\\ndx\*', r'\1<index index="Index" level1="\1"/>\2', osis, flags=re.DOTALL)
# \pro_...\pro*
@@ -834,7 +876,7 @@ def convertToOSIS(sFile):
osis = re.sub(r'\\ex\s+([^\s]+?)\s+(.+?)\s*\\ex\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference" subType="x-study"><reference>' + m.group(2) + u'</reference>﷟</note>', osis, flags=re.DOTALL)
osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
- # \esb...\esbex ### TODO: this likely needs to go much earlier in the process
+ # \esb...\esbex # TODO: this likely needs to go much earlier in the process
osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', '﷕<div type="x-sidebar">\1</div>﷕\n', osis, flags=re.DOTALL)
# \cat_<TAG>\cat*
@@ -850,7 +892,7 @@ def convertToOSIS(sFile):
We can't really know what these mean, but will preserve them as <milestone/> elements.
"""
# \z
- osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-z-\1"/>', osis)
+ osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-usfm-z-\1"/>', osis)
return osis
@@ -905,7 +947,7 @@ def convertToOSIS(sFile):
# delete attributes from end tags (since they are invalid)
osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis)
- osis = osis.replace(r'<lb type="p"/>', r'<lb/>')
+ osis = osis.replace(r'<lb type="x-p"/>', r'<lb/>')
# delete Unicode tags
for c in u'﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟':
osis = osis.replace(c, '')