#!/usr/bin/env python2.5
# Converts the source calcom??.xml files into a single
# ThML file, with corrections made to allow it to be
# used as a Sword module
#------------------------------------------------------------
# CONFIG
PUBLISHERID = u"lukeplant.me.uk"
#------------------------------------------------------------
from xml.dom import minidom
from xml import xpath
from datetime import datetime
from swordutils.xml import thml, utils
from swordutils.xml.utils import RemoveNode, GeneralReplaceContents, ReplaceContents, do_replacements
from swordutils.xml.combine import LazyNodes
import sys
now = datetime.now() # for general timestamping purposes
MAGIC_SEPARATOR_START = "%%% combine_calcom.py START %%%"
MAGIC_SEPARATOR_END = "%%% combine_calcom.py END %%%"
def do_head_replacements(doc):
corrections = {
"//DC.Title[@sub='Main']": ReplaceContents(u"Calvin's Combined Commentaries"),
"//DC.Title[@sub='authTitle']": RemoveNode(),
"//DC.Title[@sub='Alternative']": RemoveNode(),
"//printSourceInfo": ReplaceContents(u"Multiple printed works, Baker"),
"//electronicEdInfo/bookID": ReplaceContents(u"calvincommentaries"),
"//DC.Identifier": RemoveNode(), # TODO - new identifier?
"//electronicEdInfo/editorialComments":
GeneralReplaceContents(lambda t: u"Multiple ThML files combined into single ThML file by a script. Original editoral comments: " + t),
"//electronicEdInfo/revisionHistory":
GeneralReplaceContents(lambda t: unicode(now.strftime('%Y-%m-%d')) + u": Multiple ThML files combined into single ThML file by a script. Original revision history:" + t),
"//electronicEdInfo/publisher": ReplaceContents(PUBLISHERID),
}
do_replacements(doc, corrections)
def do_body_corrections(doc):
# Correct
rootNode = utils.getRoot(doc)
thml.expandScripComNodes(rootNode)
# Add a comment that we are going to use later...
body = utils.getNodesFromXPath(doc, '//ThML.body')[0]
body.childNodes.insert(0, doc.createComment(MAGIC_SEPARATOR_START))
body.childNodes.insert(1, doc.createTextNode("\n"))
body.appendChild(doc.createComment(MAGIC_SEPARATOR_END))
body.appendChild(doc.createTextNode("\n"))
# Other corrections
corrections = {
# id attributes can now contain duplicates due to combination
# of multiple files, so we remove them all.
"//@id": RemoveNode(),
}
do_replacements(doc, corrections)
def combine(templatefile, allfiles):
# Get the main one
templatexml = minidom.parse(templatefile)
mainBody = utils.getNodesFromXPath(templatexml, '//ThML.body')[0]
mainBody.childNodes = []
do_head_replacements(templatexml)
# The following childNodes will be lazily evaluated as
# templatexml.writexml iterates over them
mainBody.childNodes = LazyNodes(templatexml, allfiles, do_body_corrections, '//ThML.body')
utils.writexml(templatexml, sys.stdout)
def main(filenames):
combine(filenames[0], filenames)
if __name__ == "__main__":
if len(sys.argv) < 2:
print "Usage: ./combine_and_correct.py filename.xml [filename2.xml ...]"
sys.exit(1)
main(sys.argv[1:])