summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorLuke S. <luke@motimail.com>2007-07-19 22:51:32 +0000
committerLuke S. <luke@motimail.com>2007-07-19 22:51:32 +0000
commit21cf3069a438459f141f256b12ccb5e9d05b21d0 (patch)
tree8c1cac6bdf2684c6d4e16da37c7d5d392ce63a1f /python
parent088d12c8c9eba9dd3c8e1e30163b5a00fba7528a (diff)
downloadsword-tools-21cf3069a438459f141f256b12ccb5e9d05b21d0.tar.gz
Added Python library of various tools for making modules, and
specific script for creating a combined Calvin's Commentaries module git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@89 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'python')
-rw-r--r--python/swordutils/__init__.py0
-rw-r--r--python/swordutils/xml/__init__.py0
-rw-r--r--python/swordutils/xml/combine.py29
-rw-r--r--python/swordutils/xml/thml.py87
-rw-r--r--python/swordutils/xml/utils.py65
5 files changed, 181 insertions, 0 deletions
diff --git a/python/swordutils/__init__.py b/python/swordutils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/python/swordutils/__init__.py
diff --git a/python/swordutils/xml/__init__.py b/python/swordutils/xml/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/python/swordutils/xml/__init__.py
diff --git a/python/swordutils/xml/combine.py b/python/swordutils/xml/combine.py
new file mode 100644
index 0000000..1fa7647
--- /dev/null
+++ b/python/swordutils/xml/combine.py
@@ -0,0 +1,29 @@
+# Utilities for combining multiple module source files
+# into one.
+
+from xml.dom import minidom
+from swordutils.xml import utils
+
+class LazyNodes(object):
+ # Pulling all the documents in at once uses up too much memory.
+ # This class is responsible for acting as a replacement
+ # 'childNodes' which loads documents one at a time,
+ # does corrections on them and spews out the body nodes
+ def __init__(self, maindoc, files, alterationfunc, nodepath):
+ self.maindoc = maindoc # Don't actually need this
+ self.files = files
+ self.iterated_count = 0
+ self.nodepath = nodepath
+ self.alterationfunc = alterationfunc
+
+ def __iter__(self):
+ self.iterated_count += 1
+ if self.iterated_count == 2:
+ # We've got a big performance bug if this happens.
+ raise Exception('Performance bug')
+ for f in self.files:
+ doc = minidom.parse(f)
+ self.alterationfunc(doc)
+ body = utils.getNodesFromXPath(doc, self.nodepath)[0]
+ for n in body.childNodes:
+ yield n
diff --git a/python/swordutils/xml/thml.py b/python/swordutils/xml/thml.py
new file mode 100644
index 0000000..16a1956
--- /dev/null
+++ b/python/swordutils/xml/thml.py
@@ -0,0 +1,87 @@
+# Utility functions for manipulating ThML
+
+from xml.dom import minidom
+from swordutils.xml import utils
+
+
+def isScripCom(node):
+ return node.nodeName == u'scripCom'
+
+def findParentDiv(node):
+ pnode = node.parentNode
+ if pnode is None:
+ raise Exception("Cannot find parent div for node %r" % node)
+ if pnode.nodeType == minidom.Document.ELEMENT_NODE \
+ and pnode.nodeName.startswith(u'div'):
+ return pnode
+ else:
+ return findParentDiv(pnode)
+
+def moveToParent(node, destParent):
+ if node.parentNode is destParent:
+ return
+ else:
+ pnode = node.parentNode
+ pnode.removeChild(node)
+ pnode.parentNode.insertBefore(node, pnode)
+ return moveToParent(node, destParent)
+
+def _findNextScripComNode(node, return_parent):
+ if node is None:
+ return None
+ if isScripCom(node):
+ if return_parent:
+ return node.parentNode
+ else:
+ return node
+
+ else:
+ # Search deeper, but return node that is on the
+ # same level as our original node
+ descendent = _findNextScripComNode(node.firstChild, True)
+ if descendent is not None:
+ if return_parent:
+ return descendent.parentNode
+ else:
+ return descendent
+ else:
+ return _findNextScripComNode(node.nextSibling, False)
+
+def _expandScripComNode(scNode):
+ nextSCN = _findNextScripComNode(scNode.nextSibling, False)
+ collection = []
+ n = scNode.nextSibling
+ while (n is not None and n is not nextSCN):
+ collection.append(n)
+ n = n.nextSibling
+ for n in collection:
+ n.parentNode.removeChild(n)
+ scNode.appendChild(n)
+
+def expandScripComNodes(node):
+ """Expands all empty <scripCom> nodes so that they contain
+ the nodes that they refer to, using neighboring <scripCom>
+ nodes and the structure of the XML as a guide,
+ starting at the supplied node"""
+
+ if isScripCom(node):
+ # Often placed as markers instead of enclosing
+ # the nodes to which they apply.
+ if node.nodeValue is None or node.nodeValue == "":
+ # Try to find scope over which the <scripCom> element
+ # should actually be placed.
+ # Rules:
+ # - move the scripCom element 'up' the tree until is
+ # a descendent of a `divX' node, placing it before
+ # any of its parent nodes along the way
+ # - make all its sibling nodes that are below it
+ # into child nodes, up to the point where there
+ # is another <scripCom> element
+ div = findParentDiv(node)
+ moveToParent(node, div)
+ _expandScripComNode(node)
+
+ if node.childNodes.length > 0:
+ for n in node.childNodes:
+ expandScripComNodes(n)
+
diff --git a/python/swordutils/xml/utils.py b/python/swordutils/xml/utils.py
new file mode 100644
index 0000000..af726ba
--- /dev/null
+++ b/python/swordutils/xml/utils.py
@@ -0,0 +1,65 @@
+# General XML utilities
+
+from xml.dom import minidom
+from xml import xpath
+import codecs
+
+def getFileWriter(fileHandle):
+ """Gets a 'writer' for a file object that encodes
+ as UTF-8"""
+ return codecs.lookup("UTF-8").streamwriter(fileHandle)
+
+def writexml(doc, fileHandle):
+ """Writes an XML document to a file handle"""
+ doc.writexml(getFileWriter(fileHandle), encoding="UTF-8")
+
+def getNodesFromXPath(document, path):
+ """Selects nodes specified by 'path' from 'document',
+ where path is a string or a compiled xpath object"""
+ if isinstance(path, basestring):
+ path = xpath.Compile(path)
+ return path.select(xpath.CreateContext(document))
+
+_rootxpath = xpath.Compile('/')
+def getRoot(doc):
+ """Returns the root node of a document"""
+ return getNodesFromXPath(doc, _rootxpath)[0]
+
+
+# Classes to help us with modifications
+class RemoveNode:
+ def act(self, node):
+ if isinstance(node, minidom.Attr):
+ node.ownerElement.removeAttribute(node.name)
+ else:
+ node.parentNode.removeChild(node)
+
+class GeneralReplaceContents:
+ """Replace the contents of a node,
+ with user providable function for calculating replacement text
+ """
+ def __init__(self, replacefunc):
+ self.replacefunc = replacefunc
+ def act(self, node):
+ origText = u''.join(c.toxml() for c in node.childNodes)
+
+ # Usually replacefunc will just return text,
+ # but we allow it to return xml as well
+ newNodes = minidom.parseString(u'<dummy>' + self.replacefunc(origText) + u'</dummy>' )
+ # newNodes is a DOM instance, and it is has a dummy
+ # element wrapping the nodes we actually want.
+ node.childNodes = newNodes.childNodes[0].childNodes
+
+class ReplaceContents(GeneralReplaceContents):
+ def __init__(self, replacementtext):
+ assert isinstance(replacementtext, unicode)
+ def _replacefunc(text):
+ return replacementtext
+ self.replacefunc = _replacefunc
+
+def do_replacements(doc, replacements):
+ ctx = xpath.CreateContext(doc)
+ for path, action in replacements.items():
+ xp = xpath.Compile(path)
+ for n in xp.select(ctx):
+ action.act(n)