summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorChris Little <chrislit@crosswire.org>2012-08-27 07:48:06 +0000
committerChris Little <chrislit@crosswire.org>2012-08-27 07:48:06 +0000
commitcc7f674fae7feff6206b2ce03c44562fedeec47c (patch)
tree4f14ff8f84cf68271e04e2f257bfb7bb68ba705d /modules
parent50cd7a24860a44e186591d1d6fafffe09f431fee (diff)
downloadsword-tools-cc7f674fae7feff6206b2ce03c44562fedeec47c.tar.gz
cleaned up excess spaces
completed Python3 compatibility implementation (still works with (C)Python2 & PyPy, but not Jython due to 2.6+ features (multiprocessing)) git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@400 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rwxr-xr-xmodules/python/usfm2osis.py103
1 files changed, 51 insertions, 52 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py
index 618c932..ab6e6a8 100755
--- a/modules/python/usfm2osis.py
+++ b/modules/python/usfm2osis.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-from __future__ import print_function, unicode_literals
+#from __future__ import print_function, unicode_literals
date = '$Date$'
rev = '$Rev$'
@@ -20,7 +20,7 @@ scriptVersion = '0.5'
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# The full text of the GNU General Public License is available at:
@@ -40,7 +40,7 @@ scriptVersion = '0.5'
### Roadmap:
# 0.5 initial commit, including full coverage of core USFM tags
-# 0.6 file sorting options (natural/alphabetic/canonical/none); expand sub-verses with ! in osisIDs; Python3 compatability; add optional schema validator (lxml probably); docstrings; unittest; make fully OO; PyDev project?
+# 0.6 file sorting options (natural/alphabetic/canonical/none); expand sub-verses with ! in osisIDs; Python3 compatability; add optional schema validator (lxml probably); docstrings; unittest; make fully OO; PyDev project?
# 0.7 test suite incorporating all USFM examples from UBS ICAP and other complex cases
# 0.8 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible
# 1.0 feature complete for release & production use
@@ -51,11 +51,11 @@ scriptVersion = '0.5'
### TODO for 0.6:
# expand sub-verses with ! in osisIDs
-# Python3 compatability
# document functions (docstrings)
# unittest
# make fully OO
-# PyDev project?
+# PyDev project?
+# check Python2/3 compatibility
### Key to non-characters:
# Used : \uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1
@@ -88,7 +88,11 @@ scriptVersion = '0.5'
import sys, codecs, re
from encodings.aliases import aliases
-import multiprocessing, Queue
+import multiprocessing
+if sys.version_info[0] < 3:
+ import Queue
+else:
+ import queue as Queue
import random
date = date.replace('$', '').strip()[6:16]
@@ -170,26 +174,26 @@ canonicalOrder = [
# 1) Book representing parts of protocanonical books follow the primary book
# 2) Variants follow primary forms
# 3) Books that appear in only one tradition or Bible appear following their traditional/attested antecedent
-
+
# There's no fool-proof way to order books without knowing the tradition ahead of time,
# but this ordering should get it right often for many common real Bibles.
-
+
# Front Matter
- 'FRONT', 'INTRODUCTION',
+ 'FRONT', 'INTRODUCTION',
# OT
'Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'JoshA', 'Judg', 'JudgB', 'Ruth',
'1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'PrMan', 'Jub', '1En',
'Ezra', 'Neh', 'Tob', 'TobS', 'Jdt', 'Esth', 'EsthGr', 'AddEsth', '1Meq', '2Meq', '3Meq',
'Job', 'Ps', 'AddPs', '5ApocSyrPss', 'PsMet', 'Odes', 'Prov', 'Reproof', 'Eccl', 'Song',
- 'Wis', 'Sir', 'WSir', 'PrSol', 'PssSol',
- 'Isa', 'Jer', 'Lam', 'PrJer', 'Bar', 'EpJer', '2Bar', 'EpBar', '4Bar',
- 'Ezek', 'Dan', 'DanGr', 'DanTh', 'PrAzar', 'Sus', 'SusTh', 'Bel', 'BelTh',
+ 'Wis', 'Sir', 'WSir', 'PrSol', 'PssSol',
+ 'Isa', 'Jer', 'Lam', 'PrJer', 'Bar', 'EpJer', '2Bar', 'EpBar', '4Bar',
+ 'Ezek', 'Dan', 'DanGr', 'DanTh', 'PrAzar', 'Sus', 'SusTh', 'Bel', 'BelTh',
'Hos', 'Joel', 'Amos', 'Obad', 'Jonah', 'Mic', 'Nah', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal',
# Intertestamentals
'1Esd', '2Esd', '4Ezra', '5Ezra', '6Ezra',
- '1Macc', '2Macc', '3Macc', '4Macc',
+ '1Macc', '2Macc', '3Macc', '4Macc',
# NT
'Matt', 'Mark', 'Luke', 'John', 'Acts', 'Rom', '1Cor', '2Cor',
@@ -203,7 +207,7 @@ canonicalOrder = [
# Private-Use Extensions
'XXA', 'XXB', 'XXC', 'XXD', 'XXE', 'XXF', 'XXG',
-
+
# Back Matter
'BACK', 'CONCORDANCE', 'GLOSSARY',
'INDEX', 'GAZETTEER', 'X-OTHER'
@@ -211,12 +215,12 @@ canonicalOrder = [
usfmNumericOrder = [
# Front Matter
- 'FRONT', 'INTRODUCTION',
+ 'FRONT', 'INTRODUCTION',
# OT 01-39
'Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth',
- '1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'Ezra', 'Neh',
- 'Esth', 'Job', 'Ps', 'Prov', 'Eccl', 'Song', 'Isa', 'Jer',
+ '1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'Ezra', 'Neh',
+ 'Esth', 'Job', 'Ps', 'Prov', 'Eccl', 'Song', 'Isa', 'Jer',
'Lam', 'Ezek', 'Dan', 'Hos', 'Joel', 'Amos', 'Obad', 'Jonah',
'Mic', 'Nah', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal',
@@ -250,10 +254,10 @@ usfmNumericOrder = [
# Books not currently adopted into USFM, recommended for removal by BFBS
'JoshA', 'JudgB', 'TobS', 'DanTh', 'SusTh', 'BelTh',
-
+
# Private-Use Extensions
'XXA', 'XXB', 'XXC', 'XXD', 'XXE', 'XXF', 'XXG',
-
+
# Back Matter
'BACK', 'CONCORDANCE', 'GLOSSARY',
'INDEX', 'GAZETTEER', 'X-OTHER'
@@ -396,7 +400,7 @@ def convertToOsis(sFile):
"""
# \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
- osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n'+']*?)'+'\n'+r'(.*)(?=\\id|$)', lambda m: '\uFDD0<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + '</div type="book">\uFDD0\n' , osis, flags=re.DOTALL)
+ osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n]*?)\n'+r'(.*)(?=\\id|$)', lambda m: '\uFDD0<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + '</div type="book">\uFDD0\n' , osis, flags=re.DOTALL)
# \ide_<ENCODING>
osis = re.sub(r'\\ide\b.*'+'\n', '', osis) # delete, since this was handled above
@@ -406,7 +410,7 @@ def convertToOsis(sFile):
# \rem_text...
osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis)
-
+
# \restore_text...
if relaxedConformance:
osis = re.sub(r'\\restore\b\s+(.+)', r'<!-- restore - \1 -->', osis)
@@ -460,7 +464,7 @@ def convertToOsis(sFile):
# \imq_text...
# \ipr_text...
pType = {'ipi':'x-indented', 'im':'x-noindent', 'imi':'x-noindent-indented', 'ipq':'x-quote', 'imq':'x-noindent-quote', 'ipr':'x-right'}
- osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\(ipi|im|ipq|imq|ipr)\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr|io|iq|i?li|iex?|s)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)] + '" subType="x-introduction">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
# \iq#_text...
osis = re.sub(r'\\iq\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\i?q[\d\s]|\\fig|<l\b|<lb\b|<title\b))', r'<l level="1" subType="x-introduction">\1</l>', osis, flags=re.DOTALL)
@@ -473,24 +477,24 @@ def convertToOsis(sFile):
osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace('<lb type="x-p"/>', '</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
# \ili#_text...
- osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1" subType="x-introduction">\uFDE0\1\uFDE0</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1" subType="x-introduction">\uFDE0\2\uFDE0</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ili\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', '<item type="x-indent-1" subType="x-introduction">\uFDE0'+r'\1'+'\uFDE0</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\ili(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\ili[\d\s]|<lb\b|<title\b|<item\b))', '<item type="x-indent-\1" subType="x-introduction">\uFDE0'+r'\2'+'\uFDE0</item>', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4]+</item>)', r'\uFDD3<list>\1</list>\uFDD3', osis, flags=re.DOTALL)
+ osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4]+</item>)', '\uFDD3<list>'+r'\1'+'</list>\uFDD3', osis, flags=re.DOTALL)
# \iot_text...
# \io#_text...(references range)
- osis = re.sub(r'\\io\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1" subType="x-introduction">\uFDE1\1\uFDE1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1" subType="x-introduction">\uFDE1\2\uFDE1</item>', osis, flags=re.DOTALL)
- osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', r'<item type="head">\uFDE1\1\uFDE1</item type="head">', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\io\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', '<item type="x-indent-1" subType="x-introduction">\uFDE1'+r'\1'+'\uFDE1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\io(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', '<item type="x-indent-\1" subType="x-introduction">\uFDE1'+r'\2'+'\uFDE1</item>', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\iot\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\io[t\d\s]|\\iex?|<lb\b|<title\b|<item\b))', '<item type="head">\uFDE1'+r'\1'+'\uFDE1</item type="head">', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0]+</item>)', r'\uFDD3<div type="outline"><list>\1</list></div>\uFDD3', osis, flags=re.DOTALL)
+ osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0]+</item>)', '\uFDD3<div type="outline"><list>'+r'\1'+'</list></div>\uFDD3', osis, flags=re.DOTALL)
osis = re.sub('item type="head"', 'head', osis)
# \ior_text...\ior*
osis = re.sub(r'\\ior\b\s+(.+?)\\ior\*', r'<reference>\1</reference>', osis, flags=re.DOTALL)
-
- # \iex # TODO: look for example; I have no idea what this would look like in context
+
+ # \iex # TODO: look for example; I have no idea what this would look like in context
osis = re.sub(r'\\iex\b\s*(.+?)'+'?=(\s*(\\c|</div type="book">\uFDD0))', r'<div type="bridge">\1</div>', osis, flags=re.DOTALL)
# \iqt_text...\iqt*
@@ -538,14 +542,14 @@ def convertToOsis(sFile):
osis = re.sub('(\uFDDE<div type="x-subSubSubSubSection">[^\uFDD5\uFDD0\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE]+)', r'\1'+'</div>\uFDDE\n', osis, flags=re.DOTALL)
# \sr_text...
- osis = re.sub(r'\\sr\s+(.+)', r'\uFDD4<title type="scope"><reference>\1</reference></title>', osis)
+ osis = re.sub(r'\\sr\s+(.+)', '\uFDD4<title type="scope"><reference>'+r'\1</reference></title>', osis)
# \r_text...
- osis = re.sub(r'\\r\s+(.+)', r'\uFDD4<title type="parallel"><reference type="parallel">\1</reference></title>', osis)
+ osis = re.sub(r'\\r\s+(.+)', '\uFDD4<title type="parallel"><reference type="parallel">'+r'\1</reference></title>', osis)
# \rq_text...\rq*
osis = re.sub(r'\\rq\s+(.+?)\\rq\*', r'<reference type="source">\1</reference>', osis, flags=re.DOTALL)
# \d_text...
- osis = re.sub(r'\\d\s+(.+)', r'\uFDD4<title canonical="true" type="psalm">\1</title>', osis)
+ osis = re.sub(r'\\d\s+(.+)', '\uFDD4<title canonical="true" type="psalm">'+r'\1</title>', osis)
# \sp_text...
osis = re.sub(r'\\sp\s+(.+)', r'<speaker>\1</speaker>', osis)
@@ -564,7 +568,7 @@ def convertToOsis(sFile):
supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
"""
# \c_#
- osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: '\uFDD1<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + '<chapter eID="$BOOK$.' + m.group(1) + '"/>\uFDD3\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: '\uFDD1<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + '<chapter eID="$BOOK$.' + m.group(1) + '"/>\uFDD3\n', osis, flags=re.DOTALL)
# \cp_#
# \ca_#\ca*
@@ -590,7 +594,7 @@ def convertToOsis(sFile):
osis = re.sub(r'\\cd\b\s+(.+)', '\uFDD4<title type="x-description">'+r'\1</title>', osis)
# \v_#
- osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: '\uFDD2<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + '" sID="$BOOK$.$CHAP$.' + m.group(1) + '"/>' + m.group(2) + '<verse eID="$BOOK$.$CHAP$.' + m.group(1) + '"/>\uFDD2\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: '\uFDD2<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + '" sID="$BOOK$.$CHAP$.' + m.group(1) + '"/>' + m.group(2) + '<verse eID="$BOOK$.$CHAP$.' + m.group(1) + '"/>\uFDD2\n', osis, flags=re.DOTALL)
# \vp_#\vp*
# \va_#\va*
@@ -626,7 +630,7 @@ def convertToOsis(sFile):
# \pmo(_text...)
# \pm(_text...)
# \pmc(_text...)
- # \pmr_text... # deprecated: map to same as \pr
+ # \pmr_text... # deprecated: map to same as \pr
# \pi#(_Sample text...)
# \mi(_text...)
# \nb
@@ -638,7 +642,7 @@ def convertToOsis(sFile):
paragraphregex = 'pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb'
if relaxedConformance:
paragraphregex += '|phi|ps|psi|p1|p2|p3|p4|p5'
- osis = re.sub(r'\\('+paragraphregex+r')\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\('+paragraphregex+r')\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p type="' + pType[m.group(1)] + '">\n' + m.group(2) + '\uFDD3</p>\n', osis, flags=re.DOTALL)
# \cls_text...
osis = re.sub(r'\\m\s+(.+?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<closer>' + m.group(1) + '\uFDD3</closer>\n', osis, flags=re.DOTALL)
@@ -650,7 +654,7 @@ def convertToOsis(sFile):
osis = re.sub(r'\\li\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
osis = re.sub(r'\\li(\d)\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\li[\d\s]|<lb\b|<title\b|<item\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
osis = osis.replace('\n</item>', '</item>\n')
- osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0\uFDE1]+</item>)', r'\uFDD3<list>\1</list>\uFDD3', osis, flags=re.DOTALL)
+ osis = re.sub('(<item [^\uFDD0\uFDD1\uFDD3\uFDD4\uFDE0\uFDE1]+</item>)', '\uFDD3<list>'+r'\1'+'</list>\uFDD3', osis, flags=re.DOTALL)
# \b
osis = re.sub(r'\\b\b\s?', '<lb type="x-p"/>', osis)
@@ -789,7 +793,7 @@ def convertToOsis(sFile):
# \xt_ # This isn't guaranteed to be *the* reference, but it's a good guess.
note = re.sub(r'\\xt\b\s(.+?)(?=(\\x|'+'\uFDDF))', '\uFDDF'+r'<reference>\1</reference>', note)
-
+
if relaxedConformance:
# TODO: move this to a concorance/index-specific section?
# \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
@@ -862,7 +866,7 @@ def convertToOsis(sFile):
osis = re.sub(r'\\dc\b\s*(.+?)\\dc\*', r'<transChange type="added" editions="dc">\1</transChange>', osis, flags=re.DOTALL)
# \sls_...\sls*
- osis = re.sub(r'\\sls\b\s*(.+?)\\sls\*', r'<foreign>/1</foreign>', osis, flags=re.DOTALL) # find a better mapping than <foreign>?
+ osis = re.sub(r'\\sls\b\s*(.+?)\\sls\*', r'<foreign>/1</foreign>', osis, flags=re.DOTALL) # TODO: find a better mapping than <foreign>?
if relaxedConformance:
# \addpn...\addpn*
@@ -873,7 +877,6 @@ def convertToOsis(sFile):
osis = re.sub(r'\\k3\s+(.+?)\\k3\*', r'<seg type="keyword" n="3">\1</seg>', osis, flags=re.DOTALL)
osis = re.sub(r'\\k4\s+(.+?)\\k4\*', r'<seg type="keyword" n="4">\1</seg>', osis, flags=re.DOTALL)
osis = re.sub(r'\\k5\s+(.+?)\\k5\*', r'<seg type="keyword" n="5">\1</seg>', osis, flags=re.DOTALL)
-
return osis
@@ -930,7 +933,7 @@ def convertToOsis(sFile):
def makeFigure(matchObject):
fig_desc,fig_file,fig_size,fig_loc,fig_copy,fig_cap,fig_ref = matchObject.groups()
figure = '<figure'
- if fig_file:
+ if fig_file:
figure += ' src="' + fig_file + '"'
if fig_size:
figure += ' size="' + fig_size + '"'
@@ -990,7 +993,7 @@ def convertToOsis(sFile):
periph += 'introduction" subType="x-' + introPeripherals[periphType]
else:
periph += 'x-unknown'
- periph += '">\n' + contents + '</div>\n'
+ periph += '">\n' + contents + '</div>\n'
return periph
osis = re.sub(r'\\periph\s+([^'+'\n'+r']+)\s*'+'\n'+r'(.+?)(?=(</div type="book">|\\periph\s+))', tagPeriph, osis, flags=re.DOTALL)
@@ -1012,7 +1015,7 @@ def convertToOsis(sFile):
osis = re.sub(r'(<note [^>]*?type="crossReference"[^>]*>.*?</note>)', lambda m: processXref(m.group(1)), osis, flags=re.DOTALL)
# \esb...\esbex # TODO: this likely needs to go much earlier in the process
- osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', r'\uFDD5<div type="x-sidebar">\1</div>\uFDD5'+'\n', osis, flags=re.DOTALL)
+ osis = re.sub(r'\\esb\b\s*(.+?)\\esbe\b\s*', '\uFDD5<div type="x-sidebar">'+r'\1'+'</div>\uFDD5\n', osis, flags=re.DOTALL)
# \cat_<TAG>\cat*
osis = re.sub(r'\\cat\b\s+(.+?)\\cat\*', r'<index index="category" level1="\1"/>', osis)
@@ -1036,7 +1039,7 @@ def convertToOsis(sFile):
# \z{X}...\z{X}*
osis = re.sub(r'\z([^\s]+)\s(.+?)(\z\1\*)', r'<seg type="x-\1">\2</seg>', osis, flags=re.DOTALL)
-
+
# \z{X}
osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-usfm-z-\1"/>', osis)
@@ -1063,7 +1066,6 @@ def convertToOsis(sFile):
return ' '.join(osisID)
osis = re.sub(r'\$BOOK\$\.\$CHAP\$\.(\d+(,\d+)+)"', lambda m: expandSeries(m.group(1))+'"', osis)
-
# fill in book & chapter values
bookChunks = osis.split('\uFDD0')
osis = ''
@@ -1110,7 +1112,6 @@ def convertToOsis(sFile):
osis = re.sub(' ?\n\n+', '\n', osis)
return osis
-
### Processing starts here
if encoding:
osis = codecs.open(sFile, 'r', encoding).read().strip() + '\n'
@@ -1128,7 +1129,6 @@ def convertToOsis(sFile):
print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + sFile + ' as UTF-8.'))
encoding = 'utf-8'
-
# call individual conversion processors in series
osis = cvtPreprocess(osis, relaxedConformance)
osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance)
@@ -1154,7 +1154,7 @@ def convertToOsis(sFile):
# change type on special books
for sb in specialBooks:
- osis = osis.replace('<div type="book" osisID="' + sb + '">', '<div type="' + sb.lower() + '">')
+ osis = osis.replace('<div type="book" osisID="' + sb + '">', '<div type="' + sb.lower() + '">')
if DEBUG:
localUnhandledTags = set(re.findall(r'(\\[^\s\*]+?\b\*?)', osis))
@@ -1366,7 +1366,6 @@ if __name__ == "__main__":
k,v=result_queue.get()
osisSegment[k]=v
-
verbosePrint('Assembling OSIS document...')
osisDoc = '<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.'+osisVersion+'.xsd">\n<osisText osisRefWork="Bible" xml:lang="und" osisIDWork="' + osisWork + '">\n<header>\n<work osisWork="' + osisWork + '"/>\n</header>\n'
@@ -1389,7 +1388,7 @@ if __name__ == "__main__":
except ImportError:
verbosePrint('For schema validation, install lxml')
except etree.XMLSyntaxError as eVal:
- print('XML Validation error: ' + eVal)
+ print('XML Validation error: ' + str(eVal))
osisFile = codecs.open(osisFileName, 'w', 'utf-8')
osisFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')