diff options
author | Chris Little <chrislit@crosswire.org> | 2012-08-10 10:46:41 +0000 |
---|---|---|
committer | Chris Little <chrislit@crosswire.org> | 2012-08-10 10:46:41 +0000 |
commit | d4c721752ed56983e40400cd61b6255ba0619a66 (patch) | |
tree | 83351faa9b279723248be8da08fc0545243bfcac /modules | |
parent | c2c9a64c7c469f3d5a75dae3397dd164476f95a5 (diff) | |
download | sword-tools-d4c721752ed56983e40400cd61b6255ba0619a66.tar.gz |
re-implemented \i- introduction tags, independent of non-intro versions
cleaned up extraneous spaces
removed ###TESTED### markers
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@372 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rwxr-xr-x | modules/python/usfm2osis.py | 202 |
1 files changed, 122 insertions, 80 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py index a4812e6..bccf020 100755 --- a/modules/python/usfm2osis.py +++ b/modules/python/usfm2osis.py @@ -54,7 +54,7 @@ scriptVersion = '0.5' # book # chapter # verse -# paragraph +# paragraph # title # ms1 # ms2 @@ -95,7 +95,7 @@ bookDict = { # DC - Eastern Orthodox '3MA':'3Macc', '4MA':'4Macc', '1ES':'1Esd', '2ES':'2Esd', 'MAN':'PrMan', 'PS2':'Ps151', # Rahlfs' LXX - 'ODA':'Odes', 'PSS':'PssSol', + 'ODA':'Odes', 'PSS':'PssSol', # Esdrae 'EZA':'4Ezra', '5EZ':'5Ezra', '6EZ':'6Ezra', # Inconsistency with Esther @@ -134,8 +134,8 @@ addBookDict = { '1CL':'1Clem', '2CL':'2Clem', 'SHE':'Herm', 'LBA':'Barn', 'DID':'Did', ### # Proposed replacements <http://lc.bfbs.org.uk/e107_files/downloads/canonicalissuesinparatext.pdf> - 'ODE':'Odes', - + 'ODE':'Odes', + # Additional biblical books 'ADE':'AddEsth' } @@ -148,7 +148,7 @@ canonicalOrder = ( # DC - Catholic 'TOB', 'JDT', 'ESG', 'ADE', 'WIS', 'SIR', 'PSS', 'BAR', 'LJE', 'DAG', 'S3Y', 'SUS', 'BEL', '1MA', '2MA', # DC - Eastern Orthodox - '1ES', 'MAN', 'PS2', '3MA', '2ES', '4MA', + '1ES', 'MAN', 'PS2', '3MA', '2ES', '4MA', # NT 'MAT', 'MRK', 'LUK', 'JHN', 'ACT', 'ROM', '1CO', '2CO', 'GAL', 'EPH', 'PHP', 'COL', '1TH', '2TH', '1TI', '2TI', 'TIT', 'PHM', 'HEB', 'JAS', '1PE', '2PE', '1JN', '2JN', '3JN', 'JUD', 'REV', @@ -157,7 +157,7 @@ canonicalOrder = ( # Esdrae 'EZA', '5EZ', '6EZ', # Inconsistency with Esther - + # Syriac 'PS3', '2BA', 'LBA', # Ethiopic @@ -274,31 +274,31 @@ def convertToOSIS(sFile): supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3 """ global loc2osisBk, osis2locBk - # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.) ###TESTED### + # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.) osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\\n]*?)\n(.*)(?=\\id|$)', lambda m: u'<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + u'</div type="book">\n' , osis, flags=re.DOTALL) # keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis) if osisBook: osisBook = bookDict[osisBook.group(1)] - # \ide_<ENCODING> ###TESTED### + # \ide_<ENCODING> osis = re.sub(r'\\ide\b.*\n', r'', osis) # delete, since this was handled above # \sts_<STATUS CODE> - osis = re.sub(r'\\sts\b\s+(.+)\s*\n', r'<milestone type="x-sts" n="\1"/>\n', osis) + osis = re.sub(r'\\sts\b\s+(.+)\s*\n', r'<milestone type="x-usfm-sts" n="\1"/>\n', osis) - # \rem_text... ###TESTED### + # \rem_text... osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis) - # \h#_text... ###TESTED### + # \h#_text... osis = re.sub(r'\\h\b\s+(.+)\s*\n', r'<title type="runningHead">\1</title>\n', osis) - # TODO: \h1-5 + osis = re.sub(r'\\h(\d)\b\s+(.+)\s*\n', r'<title type="runningHead" n="\1">\2</title>\n', osis) # \toc1_text... - osis = re.sub(r'\\toc1\b\s+(.+)\s*\n', r'<milestone type="x-toc1" n="\1"/>\n', osis) + osis = re.sub(r'\\toc1\b\s+(.+)\s*\n', r'<milestone type="x-usfm-toc1" n="\1"/>\n', osis) # \toc2_text... - osis = re.sub(r'\\toc2\b\s+(.+)\s*\n', r'<milestone type="x-toc2" n="\1"/>\n', osis) + osis = re.sub(r'\\toc2\b\s+(.+)\s*\n', r'<milestone type="x-utfm-toc2" n="\1"/>\n', osis) # \toc3_text... locBook = re.search(r'\\toc3\b\s+(.+)\s*\n', osis) @@ -307,7 +307,7 @@ def convertToOSIS(sFile): if osisBook: osis2locBk[osisBook]=locBook loc2osisBk[locBook]=osisBook - osis = re.sub(r'\\toc3\b\s+(.+)\s*\n', lambda m: r'<milestone type="x-toc3" n="\1"/>\n', osis) + osis = re.sub(r'\\toc3\b\s+(.+)\s*\n', lambda m: r'<milestone type="x-usfm-toc3" n="\1"/>\n', osis) return osis @@ -315,33 +315,75 @@ def convertToOSIS(sFile): def cvtIntroductions(osis, relaxedConformance): """ Introductions - supported: - unsupported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie + supported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili#, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie """ - # \imt# - # \is# - # \ip - # \ipi - # \im - # \imi - # \ipq - # \imq - # \ipr - # \iq# + # \imt#_text... + osis = re.sub(r'\\imt(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-introduction">' + m.group(2) + r'</title>', osis) + + # \is#_text... + osis = re.sub(r'\\is1?\s+(.+)', lambda m: u'<div type="section" subType="x-introduction"><title>' + m.group(1) + '</title>', osis) + osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL) + osis = re.sub(r'\\is2\s+(.+)', lambda m: u'<div type="subsection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis) + osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL) + osis = re.sub(r'\\is3\s+(.+)', lambda m: u'<div type="x-subSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis) + osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL) + osis = re.sub(r'\\is4\s+(.+)', lambda m: u'<div type="x-subSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis) + osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL) + osis = re.sub(r'\\is5\s+(.+)', lambda m: u'<div type="x-subSubSubSubSection" subType="x-introduction"><title>' + m.group(1) + '</title>', osis) + osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL) + + # \ip_text... + osis = re.sub(r'\\ip\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p subType="x-introduction">\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL) + + # \ipi_text... + # \im_text... + # \imi_text... + # \ipq_text... + # \imq_text... + # \ipr_text... + pType = {'ipi':'x-indented', 'im':'x-noindent', 'imi':'x-noindent-indented', 'ipq':'x-quote', 'imq':'x-noindent-quote', 'ipr':'x-right'} + osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + u'</p>\n', osis, flags=re.DOTALL) + + # \iq#_text... + osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+u''+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL) + osis = re.sub(r'\\iq(\d)\b\s*(.*?)(?=(['+u''+r']|\\i?q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1" subType="x-introduction">\2</l>', osis, flags=re.DOTALL) + # \ib - # \ili# - # \iot - # \io# - # \ior...\ior* - # \iex - # \iqt...\iqt* - # \imte# + osis = re.sub(r'\\ib\b\s?', r'<lb type="x-p"/>', osis) + osis = osis.replace('\n</l>', '</l>\n') + osis = re.sub(u'(<l [^]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL) + osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="x-p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg> + + # \ili#_text... + osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+u''+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL) + osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+u''+r']|\\ili[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL) + osis = osis.replace('\n</item>', '</item>\n') + osis = re.sub(u'(<item [^]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL) + + # \iot_text... + # \io#_text...(references range) + osis = re.sub(r'\\io\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1" subType="x-introduction">\1</item>', osis, flags=re.DOTALL) + osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1" subType="x-introduction">\2</item>', osis, flags=re.DOTALL) + osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+u''+r']|\\io[t\d\s]|<lb\b|<title\b))', r'<item type="head">\1</item type="head">', osis, flags=re.DOTALL) + osis = osis.replace('\n</item>', '</item>\n') + osis = re.sub(u'(<item [^]+</item>)', r'<div type="outline"><list>\1</list></div>', osis, flags=re.DOTALL) + osis = re.sub(r'item type="head"', r'head', osis) + + # \ior_text...\ior* + osis = re.sub(r'\\ior\b\s+(.+?)\\ior\*', r'<reference>\1</reference>', osis, flags=re.DOTALL) + + # \iex # TODO: look for example; I have no idea what this would look like in context + osis = re.sub(r'\\iex\b\s*(.+?)'+u'?=(\s*(\\c|</div type="book">))', r'<div type="bridge">\1</div>', osis, flags=re.DOTALL) + + # \iqt_text...\iqt* + osis = re.sub(r'\\iqt\s+(.+?)\\iqt\*', r'<q subType="x-introduction">\1</q>', osis, flags=re.DOTALL) + + # \imte#_text... + osis = re.sub(r'\\imte(\d?)\b\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-introduction-end">' + m.group(2) + r'</title>', osis) + # \ie - # encapsulate introduction elements in a <div> - #osis = re.sub(r'(\\i(mt|mt\d+|s|d\d+|p|pi|m|mi|pq|mq|pr|q|q\d+|b|li|ot|o|o\d+|or|or*|ex|qt|qt*|mte|e)\b.+?)(?=\n\\(c|s|m|p|d))', u'<div type="introduction">'+r'\1'+u'</div>\n', osis, flags=re.DOTALL) - # map all introduction elements to their non-introduction equivalents - #for e in [r'mt', r'mt\d+', r's', r'd\d+', r'p', r'pi', r'm', r'mi', r'pq', r'mq', r'pr', r'q', r'q\d+', r'b', r'li', r'ot', r'o', r'o\d+', r'or', r'or*', r'ex', r'qt', r'qt*', r'mte', r'e']: - # osis = re.sub(r'\\i('+e+r')\b', r'\\\1', osis) + osis = re.sub(r'\\ie\b\s*', r'<milestone type="x-usfm-ie"/>', osis) + return osis @@ -350,7 +392,7 @@ def convertToOSIS(sFile): Titles, Headings, and Labels supported: \mt#, \mte#, \ms#, \mr, \s#, \sr, \r, \rq...\rq*, \d, \sp """ - # \ms#_text... ###TESTED### + # \ms#_text... osis = re.sub(r'\\ms1?\s+(.+)', lambda m: u'<div type="majorSection"><title>' + m.group(1) + '</title>', osis) osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL) osis = re.sub(r'\\ms2\s+(.+)', lambda m: u'<div type="majorSection" n="2"><title>' + m.group(1) + '</title>', osis) @@ -365,7 +407,7 @@ def convertToOSIS(sFile): # \mr_text... osis = re.sub(r'\\mr\s+(.+)', u'<title type="scope"><reference>'+r'\1</reference></title>', osis) - # \s#_text... ###TESTED### + # \s#_text... osis = re.sub(r'\\s1?\s+(.+)', lambda m: u'<div type="section"><title>' + m.group(1) + '</title>', osis) osis = re.sub(u'([^]+)', r'\1'+u'</div>\n', osis, re.DOTALL) if relaxedConformance: @@ -387,13 +429,13 @@ def convertToOSIS(sFile): # \rq_text...\rq* osis = re.sub(r'\\rq\s+(.+?)\\rq\*', u'<reference type="source">'+r'\1</reference>', osis, flags=re.DOTALL) - # \d_text... ###TESTED### + # \d_text... osis = re.sub(r'\\d\s+(.+)', u'<title canonical="true" type="psalm">'+r'\1</title>', osis) - # \sp_text... ###TESTED### + # \sp_text... osis = re.sub(r'\\sp\s+(.+)', r'<speaker>\1</speaker>', osis) - # \mt#_text... ###TESTED### + # \mt#_text... osis = re.sub(r'\\mt(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main">' + m.group(2) + r'</title>', osis) # \mte#_text... osis = re.sub(r'\\mte(\d?)\s+(.+)', lambda m: r'<title ' + (r'level="'+m.group(1)+r'" ' if m.group(1) else r'') + r'type="main" subType="x-end">' + m.group(2) + r'</title>', osis) @@ -406,7 +448,7 @@ def convertToOSIS(sFile): Chapters and Verses supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp* """ - # \c_# ###TESTED### + # \c_# osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: u'<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + u'<chapter eID="$BOOK$.' + m.group(1) + u'"/>\n', osis, flags=re.DOTALL) # \cp_# @@ -432,7 +474,7 @@ def convertToOSIS(sFile): # \cd_# <--This # seems to be an error osis = re.sub(r'\\cd\b\s+(.+)', u'<title type="x-description">'+r'\1</title>', osis) - # \v_# ###TESTED### + # \v_# osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: u'<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + r'" sID="$BOOK$.$CHAP$.' + m.group(1) + r'"/>' + m.group(2) + r'<verse eID="$BOOK$.$CHAP$.' + m.group(1) + u'"/>\n', osis, flags=re.DOTALL) # \vp_#\vp* @@ -460,36 +502,36 @@ def convertToOSIS(sFile): Paragraphs supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b """ - # \p(_text...) ###TESTED### - osis = re.sub(r'\\p\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p>\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL) + # \p(_text...) + osis = re.sub(r'\\p\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p>\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL) # \pc(_text...) # \pr(_text...) - # \m(_text...) ###TESTED### + # \m(_text...) # \pmo(_text...) # \pm(_text...) # \pmc(_text...) # \pmr_text... # deprecated: map to same as \pr # \pi#(_Sample text...) # \mi(_text...) - # \nb ###TESTED### + # \nb pType = {'pc':'x-center', 'pr':'x-right', 'm':'x-noindent', 'pmo':'x-embedded-opening', 'pm':'x-embedded', 'pmc':'x-embedded-closing', 'pmr':'x-right', 'pi':'x-indented-1', 'pi1':'x-indented-1', 'pi2':'x-indented-2', 'pi3':'x-indented-3', 'pi4':'x-indented-4', 'pi5':'x-indented-5', 'mi':'x-noindent-indented', 'nb':'x-nobreak'} - osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + u'</p>\n', osis, flags=re.DOTALL) + osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + u'</p>\n', osis, flags=re.DOTALL) # \cls_text... - osis = re.sub(r'\\m\s+(.+?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<closer>' + m.group(1) + u'</closer>\n', osis, flags=re.DOTALL) + osis = re.sub(r'\\m\s+(.+?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<closer>' + m.group(1) + u'</closer>\n', osis, flags=re.DOTALL) # \ph#(_text...) - # \li#(_text...) ###TESTED### + # \li#(_text...) osis = re.sub(r'\\ph\b\s*', r'\\li ', osis) - osis = re.sub(r'\\ph(\d+)\b\s*', r'\\li\1 ', osis) + osis = re.sub(r'\\ph(\d)\b\s*', r'\\li\1 ', osis) osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL) - osis = re.sub(r'\\li(\d+)\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL) + osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+u''+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL) osis = osis.replace('\n</item>', '</item>\n') osis = re.sub(u'(<item [^]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL) - # \b ###TESTED### - osis = re.sub(r'\\b\b\s?', r'<lb type="p"/>', osis) + # \b + osis = re.sub(r'\\b\b\s?', r'<lb type="x-p"/>', osis) return osis @@ -502,21 +544,21 @@ def convertToOSIS(sFile): # \qs_(Selah)\qs* osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL) - # \q#(_text...) ###TESTED### + # \q#(_text...) osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL) - osis = re.sub(r'\\q(\d+)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL) + osis = re.sub(r'\\q(\d)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL) # \qr_text... # \qc_text... # \qm#(_text...) qType = {'qr':'x-right', 'qc':'x-center', 'qm':'x-embedded" level="1', 'qm1':'x-embedded" level="1', 'qm2':'x-embedded" level="2', 'qm3':'x-embedded" level="3', 'qm4':'x-embedded" level="4', 'qm5':'x-embedded" level="5'} - osis = re.sub(r'\\(qr|qc|qm\d+)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL) + osis = re.sub(r'\\(qr|qc|qm\d)\b\s*(.*?)(?=(['+u''+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL) osis = osis.replace('\n</l>', '</l>\n') osis = re.sub(u'(<l [^]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL) - # \b ###TESTED### - osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg> + # \b + osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="x-p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg> # \qa_text... osis = re.sub(r'\\qa\s+(.+)', u'<title type="acrostic">'+r'\1</title>', osis) @@ -551,13 +593,13 @@ def convertToOSIS(sFile): # \fdc_refs...\fdc* note = re.sub(r'\\fdc\b\s(.+?)\\fdc\b\*', r'<seg editions="dc">\1</seg>', note) - # \fq_ ###TESTED### + # \fq_ note = re.sub(r'\\fq\b\s(.+?)(?=(\\f|'+u'))', u''+r'<catchWord>\1</catchWord>', note) - # \fqa_ ###TESTED### + # \fqa_ note = re.sub(r'\\fqa\b\s(.+?)(?=(\\f|'+u'))', u''+r'<rdg type="alternate">\1</rdg>', note) - # \ft_ ###TESTED### + # \ft_ note = re.sub(r'\\ft\s', r'', note) # \fr_##SEP## @@ -595,7 +637,7 @@ def convertToOSIS(sFile): Footnotes supported:\f...\f*, \fe...\fe*, \fr, \fk, \fq, \fqa, \fl, \fp, \fv, \ft, \fdc...\fdc*, \fm...\fm* """ - # \f_+_...\f* ###TESTED### + # \f_+_...\f* osis = re.sub(r'\\f\s+([^\s\\]+)?\s*(.+?)\s*\\f\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="foot">' + m.group(2) + u'</note>', osis, flags=re.DOTALL) # \fe_+_...\fe* @@ -624,7 +666,7 @@ def convertToOSIS(sFile): # \xq_ note = re.sub(r'\\xq\b\s(.+?)(?=(\\x|'+u'))', u''+r'<catchWord>\1</catchWord>', note) - # \xt_ ###TESTED### + # \xt_ note = re.sub(r'\\xt\s', r'', note) # \xo_##SEP## @@ -648,7 +690,7 @@ def convertToOSIS(sFile): Cross References supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc* """ - # \x_+_...\x* ###TESTED### + # \x_+_...\x* osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference"><reference>' + m.group(2) + u'</reference></note>', osis, flags=re.DOTALL) osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL) @@ -664,10 +706,10 @@ def convertToOSIS(sFile): Special Text supported: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \nd...\nd*, \ord...\ord*, \pn...\pn*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \tl...\tl*, \wj...\wj* """ - # \add_...\add* ###TESTED### + # \add_...\add* osis = re.sub(r'\\add\s+(.+?)\\add\*', r'<transChange type="added">\1</transChange>', osis, flags=re.DOTALL) - # \wj_...\wj* ###TESTED### + # \wj_...\wj* osis = re.sub(r'\\wj\s+(.+?)\\wj\*', r'<q who="Jesus" marker="">\1</q>', osis, flags=re.DOTALL) # \nd_...\nd* @@ -676,7 +718,7 @@ def convertToOSIS(sFile): # \pn_...\pn* osis = re.sub(r'\\pn\s+(.+?)\\pn\*', r'<name>\1</name>', osis, flags=re.DOTALL) - # \qt_...\qt* + # \qt_...\qt* # TODO:should this be <q>? osis = re.sub(r'\\qt\s+(.+?)\\qt\*', r'<seg type="otPassage">\1</seg>', osis, flags=re.DOTALL) # \sig_...\sig* @@ -688,16 +730,16 @@ def convertToOSIS(sFile): # \tl_...\tl* osis = re.sub(r'\\tl\s+(.+?)\\tl\*', r'<foreign>\1</foreign>', osis, flags=re.DOTALL) - # \bk_...\bk* ###TESTED### + # \bk_...\bk* osis = re.sub(r'\\bk\s+(.+?)\\bk\*', r'<name type="x-workTitle">\1</name>', osis, flags=re.DOTALL) - # \k_...\k* ###TESTED### + # \k_...\k* osis = re.sub(r'\\k\s+(.+?)\\k\*', r'<seg type="keyword">\1</seg>', osis, flags=re.DOTALL) # \lit - osis = re.sub(r'\\lit\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="x-liturgical">\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL) + osis = re.sub(r'\\lit\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'<p type="x-liturgical">\n' + m.group(1) + u'</p>\n', osis, flags=re.DOTALL) - # \dc_...\dc* #### TODO: Find an example---should this really be transChange? + # \dc_...\dc* # TODO: Find an example---should this really be transChange? osis = re.sub(r'\\dc\b\s*(.+?)\\dc\*', r'<transChange type="added" editions="dc">\1</transChange>', osis, flags=re.DOTALL) # \sls_...\sls* @@ -717,7 +759,7 @@ def convertToOSIS(sFile): # \bd_...\bd* osis = re.sub(r'\\bd\s+(.+?)\\bd\*', r'<hi type="bold">\1</hi>', osis, flags=re.DOTALL) - # \it_...\it* ###TESTED### + # \it_...\it* osis = re.sub(r'\\it\s+(.+?)\\it\*', r'<hi type="italic">\1</hi>', osis, flags=re.DOTALL) # \bdit_...\bdit* @@ -781,7 +823,7 @@ def convertToOSIS(sFile): return figure osis = re.sub(r'\\fig\b\s+([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\|]*)\s*\|([^\\]*)\s*\\fig\*', makeFigure, osis) - # \ndx_...\ndx* #TODO tag with x-glossary instead of <index/>? Is <index/> containerable? + # \ndx_...\ndx* # TODO tag with x-glossary instead of <index/>? Is <index/> containerable? osis = re.sub(r'\\ndx\s+(.+?)(\s*)\\ndx\*', r'\1<index index="Index" level1="\1"/>\2', osis, flags=re.DOTALL) # \pro_...\pro* @@ -834,7 +876,7 @@ def convertToOSIS(sFile): osis = re.sub(r'\\ex\s+([^\s]+?)\s+(.+?)\s*\\ex\*', lambda m: r'<note' + ((' n=""') if (m.group(1) == u'-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference" subType="x-study"><reference>' + m.group(2) + u'</reference></note>', osis, flags=re.DOTALL) osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL) - # \esb...\esbex ### TODO: this likely needs to go much earlier in the process + # \esb...\esbex # TODO: this likely needs to go much earlier in the process osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', '<div type="x-sidebar">\1</div>\n', osis, flags=re.DOTALL) # \cat_<TAG>\cat* @@ -850,7 +892,7 @@ def convertToOSIS(sFile): We can't really know what these mean, but will preserve them as <milestone/> elements. """ # \z - osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-z-\1"/>', osis) + osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-usfm-z-\1"/>', osis) return osis @@ -905,7 +947,7 @@ def convertToOSIS(sFile): # delete attributes from end tags (since they are invalid) osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis) - osis = osis.replace(r'<lb type="p"/>', r'<lb/>') + osis = osis.replace(r'<lb type="x-p"/>', r'<lb/>') # delete Unicode tags for c in u'': osis = osis.replace(c, '') |