#!/usr/bin/env python2.5 # Currently using Python 2.5, since PyXML has not been updated # for Python 2.6 # Currently, the Sword importer, osis2mod, expects the OSIS file to be # structured like a Bible - i.e. , , # In general, a commentary might be structured using
# instead. For importing, we convert to the format osis2mod expects import sys import codecs from xml.dom import minidom as dom from swordutils.xml.utils import getFileWriter def usage(): print "Usage: genbookOsis2Commentary.py " print "Output is OSIS converted for use by osis2mod." def isRoot(n): return (n.nodeType == dom.Document.ELEMENT_NODE and n.nodeName == u'osis') def isVerseDiv(n): # we must only match things like: "Ps.1.1" # and not: "Ps.1" return n.nodeName == u'div' and n.attributes.has_key(u'osisID') \ and len(n.attributes['osisID'].value.split('.')) == 3 class VerseRef(object): def __init__(self, ref): parts = ref.split('.') if len(parts) > 0: self.book = parts[0] else: self.book = None if len(parts) > 1: self.chapter = parts[1] else: self.chapter = None if len(parts) > 2: self.verse = parts[2] else: self.verse = None def add_book_node(doc, node, verseRef): # Insert
node around verse div = doc.createElement(u'div') div.attributes[u'type'] = u'book' node.parentNode.insertBefore(div, node) #head = doc.createElement(u'head') #head.appendChild(doc.createTextNode(verseRef.book.title())) #div.appendChild(head) div.appendChild(node) return div def add_chapter_node(doc, node, verseRef): # Insert
node around verse div = doc.createElement(u'div') div.attributes[u'type'] = u'chapter' node.parentNode.insertBefore(div, node) #head = doc.createElement(u'head') #head.appendChild(doc.createTextNode('Chapter %s' % verseRef.chapter)) #div.appendChild(head) div.appendChild(node) return div def versify_tree(doc, node, curRef=VerseRef(''), curBookNode=None, curChapterNode=None): # Traverse tree, modifying divisions to include
, #
and if isVerseDiv(node): node.tagName = u'verse' node.attributes.removeNamedItem(u'type') vr = VerseRef(node.attributes[u'osisID'].value) assert vr.book is not None if vr.book != curRef.book: curBookNode = add_book_node(doc, node, vr) curChapterNode = add_chapter_node(doc, node, vr) else: if vr.chapter != curRef.chapter: curChapterNode = add_chapter_node(doc, node, vr) else: # move the verse into the current chapter # We know that curChapterNode != None here, # because VerseRef('') never matches VerseRef(anythingelse) curChapterNode.appendChild(node) curRef = vr elif node.childNodes.length > 0: for n in list(node.childNodes): curRef, curBookNode, curChapterNode = versify_tree(doc, n, curRef=curRef, curBookNode=curBookNode, curChapterNode=curChapterNode) return (curRef, curBookNode, curChapterNode) def remove_non_commentary(node): """Recursivley removes any body text which is not part of commentary on a verse, returns True if the current node contains any commentary""" # The preamble before actual commentary is currently included by # osis2mod. This is fairly annoying, and this function will # remove such text so that it is not included in the end product assert node is not None inCommentary = \ (node.nodeName == u'div' and node.attributes.get('type') is not None and node.attributes['type'].value in ['book','chapter']) or \ (node.nodeName == u'verse') inHeader = \ (node.nodeName in [u'osis', u'osisText']) or\ isRoot(node) if not inHeader and not inCommentary: # remove text contents of this item, or entire node # if it has no child elements for n in list(node.childNodes): if n.nodeType == n.TEXT_NODE: node.removeChild(n) if node.childNodes.length == 0: # Nothing more to do now, there is definitely # no commentary here. return False if inCommentary: # keep node and everything that is below node return True childrenHaveCommentary = False for n in list(node.childNodes): hadCommentary = remove_non_commentary(n) if hadCommentary: childrenHaveCommentary = True else: node.removeChild(n) return childrenHaveCommentary def main(filename): d = dom.parse(filename) rootNode = filter(isRoot, d.childNodes)[0] versify_tree(d, rootNode) remove_non_commentary(rootNode) d.writexml(getFileWriter(sys.stdout), encoding="UTF-8") if __name__ == "__main__": if len(sys.argv) != 2: usage() sys.exit(1) main(sys.argv[1])