diff options
author | Luke S. <luke@motimail.com> | 2007-07-19 22:51:32 +0000 |
---|---|---|
committer | Luke S. <luke@motimail.com> | 2007-07-19 22:51:32 +0000 |
commit | 21cf3069a438459f141f256b12ccb5e9d05b21d0 (patch) | |
tree | 8c1cac6bdf2684c6d4e16da37c7d5d392ce63a1f /python | |
parent | 088d12c8c9eba9dd3c8e1e30163b5a00fba7528a (diff) | |
download | sword-tools-21cf3069a438459f141f256b12ccb5e9d05b21d0.tar.gz |
Added Python library of various tools for making modules, and
specific script for creating a combined Calvin's Commentaries module
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@89 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'python')
-rw-r--r-- | python/swordutils/__init__.py | 0 | ||||
-rw-r--r-- | python/swordutils/xml/__init__.py | 0 | ||||
-rw-r--r-- | python/swordutils/xml/combine.py | 29 | ||||
-rw-r--r-- | python/swordutils/xml/thml.py | 87 | ||||
-rw-r--r-- | python/swordutils/xml/utils.py | 65 |
5 files changed, 181 insertions, 0 deletions
diff --git a/python/swordutils/__init__.py b/python/swordutils/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/python/swordutils/__init__.py diff --git a/python/swordutils/xml/__init__.py b/python/swordutils/xml/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/python/swordutils/xml/__init__.py diff --git a/python/swordutils/xml/combine.py b/python/swordutils/xml/combine.py new file mode 100644 index 0000000..1fa7647 --- /dev/null +++ b/python/swordutils/xml/combine.py @@ -0,0 +1,29 @@ +# Utilities for combining multiple module source files +# into one. + +from xml.dom import minidom +from swordutils.xml import utils + +class LazyNodes(object): + # Pulling all the documents in at once uses up too much memory. + # This class is responsible for acting as a replacement + # 'childNodes' which loads documents one at a time, + # does corrections on them and spews out the body nodes + def __init__(self, maindoc, files, alterationfunc, nodepath): + self.maindoc = maindoc # Don't actually need this + self.files = files + self.iterated_count = 0 + self.nodepath = nodepath + self.alterationfunc = alterationfunc + + def __iter__(self): + self.iterated_count += 1 + if self.iterated_count == 2: + # We've got a big performance bug if this happens. + raise Exception('Performance bug') + for f in self.files: + doc = minidom.parse(f) + self.alterationfunc(doc) + body = utils.getNodesFromXPath(doc, self.nodepath)[0] + for n in body.childNodes: + yield n diff --git a/python/swordutils/xml/thml.py b/python/swordutils/xml/thml.py new file mode 100644 index 0000000..16a1956 --- /dev/null +++ b/python/swordutils/xml/thml.py @@ -0,0 +1,87 @@ +# Utility functions for manipulating ThML + +from xml.dom import minidom +from swordutils.xml import utils + + +def isScripCom(node): + return node.nodeName == u'scripCom' + +def findParentDiv(node): + pnode = node.parentNode + if pnode is None: + raise Exception("Cannot find parent div for node %r" % node) + if pnode.nodeType == minidom.Document.ELEMENT_NODE \ + and pnode.nodeName.startswith(u'div'): + return pnode + else: + return findParentDiv(pnode) + +def moveToParent(node, destParent): + if node.parentNode is destParent: + return + else: + pnode = node.parentNode + pnode.removeChild(node) + pnode.parentNode.insertBefore(node, pnode) + return moveToParent(node, destParent) + +def _findNextScripComNode(node, return_parent): + if node is None: + return None + if isScripCom(node): + if return_parent: + return node.parentNode + else: + return node + + else: + # Search deeper, but return node that is on the + # same level as our original node + descendent = _findNextScripComNode(node.firstChild, True) + if descendent is not None: + if return_parent: + return descendent.parentNode + else: + return descendent + else: + return _findNextScripComNode(node.nextSibling, False) + +def _expandScripComNode(scNode): + nextSCN = _findNextScripComNode(scNode.nextSibling, False) + collection = [] + n = scNode.nextSibling + while (n is not None and n is not nextSCN): + collection.append(n) + n = n.nextSibling + for n in collection: + n.parentNode.removeChild(n) + scNode.appendChild(n) + +def expandScripComNodes(node): + """Expands all empty <scripCom> nodes so that they contain + the nodes that they refer to, using neighboring <scripCom> + nodes and the structure of the XML as a guide, + starting at the supplied node""" + + if isScripCom(node): + # Often placed as markers instead of enclosing + # the nodes to which they apply. + if node.nodeValue is None or node.nodeValue == "": + # Try to find scope over which the <scripCom> element + # should actually be placed. + # Rules: + # - move the scripCom element 'up' the tree until is + # a descendent of a `divX' node, placing it before + # any of its parent nodes along the way + # - make all its sibling nodes that are below it + # into child nodes, up to the point where there + # is another <scripCom> element + div = findParentDiv(node) + moveToParent(node, div) + _expandScripComNode(node) + + if node.childNodes.length > 0: + for n in node.childNodes: + expandScripComNodes(n) + diff --git a/python/swordutils/xml/utils.py b/python/swordutils/xml/utils.py new file mode 100644 index 0000000..af726ba --- /dev/null +++ b/python/swordutils/xml/utils.py @@ -0,0 +1,65 @@ +# General XML utilities + +from xml.dom import minidom +from xml import xpath +import codecs + +def getFileWriter(fileHandle): + """Gets a 'writer' for a file object that encodes + as UTF-8""" + return codecs.lookup("UTF-8").streamwriter(fileHandle) + +def writexml(doc, fileHandle): + """Writes an XML document to a file handle""" + doc.writexml(getFileWriter(fileHandle), encoding="UTF-8") + +def getNodesFromXPath(document, path): + """Selects nodes specified by 'path' from 'document', + where path is a string or a compiled xpath object""" + if isinstance(path, basestring): + path = xpath.Compile(path) + return path.select(xpath.CreateContext(document)) + +_rootxpath = xpath.Compile('/') +def getRoot(doc): + """Returns the root node of a document""" + return getNodesFromXPath(doc, _rootxpath)[0] + + +# Classes to help us with modifications +class RemoveNode: + def act(self, node): + if isinstance(node, minidom.Attr): + node.ownerElement.removeAttribute(node.name) + else: + node.parentNode.removeChild(node) + +class GeneralReplaceContents: + """Replace the contents of a node, + with user providable function for calculating replacement text + """ + def __init__(self, replacefunc): + self.replacefunc = replacefunc + def act(self, node): + origText = u''.join(c.toxml() for c in node.childNodes) + + # Usually replacefunc will just return text, + # but we allow it to return xml as well + newNodes = minidom.parseString(u'<dummy>' + self.replacefunc(origText) + u'</dummy>' ) + # newNodes is a DOM instance, and it is has a dummy + # element wrapping the nodes we actually want. + node.childNodes = newNodes.childNodes[0].childNodes + +class ReplaceContents(GeneralReplaceContents): + def __init__(self, replacementtext): + assert isinstance(replacementtext, unicode) + def _replacefunc(text): + return replacementtext + self.replacefunc = _replacefunc + +def do_replacements(doc, replacements): + ctx = xpath.CreateContext(doc) + for path, action in replacements.items(): + xp = xpath.Compile(path) + for n in xp.select(ctx): + action.act(n) |