#!/usr/bin/env python # Converts the source calcom??.xml files into a single # ThML file, with corrections made to allow it to be # used as a Sword module #------------------------------------------------------------ # CONFIG PUBLISHERID = u"lukeplant.me.uk" #------------------------------------------------------------ from xml.dom import minidom from xml import xpath from datetime import datetime from swordutils.xml import thml, utils from swordutils.xml.utils import RemoveNode, GeneralReplaceContents, ReplaceContents, do_replacements from swordutils.xml.combine import LazyNodes import sys now = datetime.now() # for general timestamping purposes MAGIC_SEPARATOR_START = "%%% combine_calcom.py START %%%" MAGIC_SEPARATOR_END = "%%% combine_calcom.py END %%%" def do_head_replacements(doc): corrections = { "//DC.Title[@sub='Main']": ReplaceContents(u"Calvin's Combined Commentaries"), "//DC.Title[@sub='authTitle']": RemoveNode(), "//DC.Title[@sub='Alternative']": RemoveNode(), "//printSourceInfo": ReplaceContents(u"Multiple printed works, Baker"), "//electronicEdInfo/bookID": ReplaceContents(u"calvincommentaries"), "//DC.Identifier": RemoveNode(), # TODO - new identifier? "//electronicEdInfo/editorialComments": GeneralReplaceContents(lambda t: u"Multiple ThML files combined into single ThML file by a script. Original editoral comments: " + t), "//electronicEdInfo/revisionHistory": GeneralReplaceContents(lambda t: unicode(now.strftime('%Y-%m-%d')) + u": Multiple ThML files combined into single ThML file by a script. Original revision history:" + t), "//electronicEdInfo/publisher": ReplaceContents(PUBLISHERID), } do_replacements(doc, corrections) def do_body_corrections(doc): # Correct rootNode = utils.getRoot(doc) thml.expandScripComNodes(rootNode) # Add a comment that we are going to use later... body = utils.getNodesFromXPath(doc, '//ThML.body')[0] body.childNodes.insert(0, doc.createComment(MAGIC_SEPARATOR_START)) body.childNodes.insert(1, doc.createTextNode("\n")) body.appendChild(doc.createComment(MAGIC_SEPARATOR_END)) body.appendChild(doc.createTextNode("\n")) # Other corrections corrections = { # id attributes can now contain duplicates due to combination # of multiple files, so we remove them all. "//@id": RemoveNode(), } do_replacements(doc, corrections) def combine(templatefile, allfiles): # Get the main one templatexml = minidom.parse(templatefile) mainBody = utils.getNodesFromXPath(templatexml, '//ThML.body')[0] mainBody.childNodes = [] do_head_replacements(templatexml) # The following childNodes will be lazily evaluated as # templatexml.writexml iterates over them mainBody.childNodes = LazyNodes(templatexml, allfiles, do_body_corrections, '//ThML.body') utils.writexml(templatexml, sys.stdout) def main(filenames): combine(filenames[0], filenames) if __name__ == "__main__": if len(sys.argv) < 2: print "Usage: ./combine_and_correct.py filename.xml [filename2.xml ...]" sys.exit(1) main(sys.argv[1:])