1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
#!/usr/bin/env python2.5
# Currently using Python 2.5, since PyXML has not been updated
# for Python 2.6
# Currently, the Sword importer, osis2mod, expects the OSIS file to be
# structured like a Bible - i.e. <book>, <chapter>, <verse>
# In general, a commentary might be structured using <div osisRef>
# instead. For importing, we convert to the format osis2mod expects
import sys
import codecs
from xml.dom import minidom as dom
from swordutils.xml.utils import getFileWriter
def usage():
print "Usage: genbookOsis2Commentary.py <osisFile>"
print "Output is OSIS converted for use by osis2mod."
def isRoot(n):
return (n.nodeType == dom.Document.ELEMENT_NODE
and n.nodeName == u'osis')
def isVerseDiv(n):
# we must only match things like: "Ps.1.1"
# and not: "Ps.1"
return n.nodeName == u'div' and n.attributes.has_key(u'osisID') \
and len(n.attributes['osisID'].value.split('.')) == 3
class VerseRef(object):
def __init__(self, ref):
parts = ref.split('.')
if len(parts) > 0:
self.book = parts[0]
else:
self.book = None
if len(parts) > 1:
self.chapter = parts[1]
else:
self.chapter = None
if len(parts) > 2:
self.verse = parts[2]
else:
self.verse = None
def add_book_node(doc, node, verseRef):
# Insert <div type="book"> node around verse
div = doc.createElement(u'div')
div.attributes[u'type'] = u'book'
node.parentNode.insertBefore(div, node)
#head = doc.createElement(u'head')
#head.appendChild(doc.createTextNode(verseRef.book.title()))
#div.appendChild(head)
div.appendChild(node)
return div
def add_chapter_node(doc, node, verseRef):
# Insert <div type="chapter"> node around verse
div = doc.createElement(u'div')
div.attributes[u'type'] = u'chapter'
node.parentNode.insertBefore(div, node)
#head = doc.createElement(u'head')
#head.appendChild(doc.createTextNode('Chapter %s' % verseRef.chapter))
#div.appendChild(head)
div.appendChild(node)
return div
def versify_tree(doc, node, curRef=VerseRef(''), curBookNode=None, curChapterNode=None):
# Traverse tree, modifying divisions to include <div type="book">,
# <div type="chapter"> and <verse>
if isVerseDiv(node):
node.tagName = u'verse'
node.attributes.removeNamedItem(u'type')
vr = VerseRef(node.attributes[u'osisID'].value)
assert vr.book is not None
if vr.book != curRef.book:
curBookNode = add_book_node(doc, node, vr)
curChapterNode = add_chapter_node(doc, node, vr)
else:
if vr.chapter != curRef.chapter:
curChapterNode = add_chapter_node(doc, node, vr)
else:
# move the verse into the current chapter
# We know that curChapterNode != None here,
# because VerseRef('') never matches VerseRef(anythingelse)
curChapterNode.appendChild(node)
curRef = vr
elif node.childNodes.length > 0:
for n in list(node.childNodes):
curRef, curBookNode, curChapterNode = versify_tree(doc, n, curRef=curRef, curBookNode=curBookNode, curChapterNode=curChapterNode)
return (curRef, curBookNode, curChapterNode)
def remove_non_commentary(node):
"""Recursivley removes any body text which is not part of commentary on a verse,
returns True if the current node contains any commentary"""
# The preamble before actual commentary is currently included by
# osis2mod. This is fairly annoying, and this function will
# remove such text so that it is not included in the end product
assert node is not None
inCommentary = \
(node.nodeName == u'div' and node.attributes.get('type') is not None and node.attributes['type'].value in ['book','chapter']) or \
(node.nodeName == u'verse')
inHeader = \
(node.nodeName in [u'osis', u'osisText']) or\
isRoot(node)
if not inHeader and not inCommentary:
# remove text contents of this item, or entire node
# if it has no child elements
for n in list(node.childNodes):
if n.nodeType == n.TEXT_NODE:
node.removeChild(n)
if node.childNodes.length == 0:
# Nothing more to do now, there is definitely
# no commentary here.
return False
if inCommentary:
# keep node and everything that is below node
return True
childrenHaveCommentary = False
for n in list(node.childNodes):
hadCommentary = remove_non_commentary(n)
if hadCommentary:
childrenHaveCommentary = True
else:
node.removeChild(n)
return childrenHaveCommentary
def main(filename):
d = dom.parse(filename)
rootNode = filter(isRoot, d.childNodes)[0]
versify_tree(d, rootNode)
remove_non_commentary(rootNode)
d.writexml(getFileWriter(sys.stdout), encoding="UTF-8")
if __name__ == "__main__":
if len(sys.argv) != 2:
usage()
sys.exit(1)
main(sys.argv[1])
|