summaryrefslogtreecommitdiffstats
path: root/modules/calvinscommentaries/combine_calcom.py
blob: bdb410cbf1746e40c5311a2baf3f7ca6e22d9809 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python2.5

# Converts the source calcom??.xml files into a single
# ThML file, with corrections made to allow it to be
# used as a Sword module

#------------------------------------------------------------
# CONFIG

PUBLISHERID = u"lukeplant.me.uk"

#------------------------------------------------------------

from xml.dom import minidom
from xml import xpath
from datetime import datetime
from swordutils.xml import thml, utils
from swordutils.xml.utils import RemoveNode, GeneralReplaceContents, ReplaceContents, do_replacements
from swordutils.xml.combine import LazyNodes
import sys


now = datetime.now() # for general timestamping purposes

MAGIC_SEPARATOR_START = "%%% combine_calcom.py START %%%"
MAGIC_SEPARATOR_END = "%%% combine_calcom.py END %%%"


def do_head_replacements(doc):
    
    corrections = {
        "//DC.Title[@sub='Main']":        ReplaceContents(u"Calvin's Combined Commentaries"),
        "//DC.Title[@sub='authTitle']":   RemoveNode(),
        "//DC.Title[@sub='Alternative']": RemoveNode(),
        "//printSourceInfo":              ReplaceContents(u"<published>Multiple printed works, Baker</published>"),
        "//electronicEdInfo/bookID":      ReplaceContents(u"calvincommentaries"),
        "//DC.Identifier":                RemoveNode(), # TODO - new identifier?
        "//electronicEdInfo/editorialComments":
          GeneralReplaceContents(lambda t: u"Multiple ThML files combined into single ThML file by a script.  Original editoral comments: " + t),
        "//electronicEdInfo/revisionHistory":
          GeneralReplaceContents(lambda t: unicode(now.strftime('%Y-%m-%d')) +   u": Multiple ThML files combined into single ThML file by a script. Original revision history:" + t),
        "//electronicEdInfo/publisher": ReplaceContents(PUBLISHERID),

    }
    do_replacements(doc, corrections)

def do_body_corrections(doc):
    # Correct <scripCom>
    rootNode = utils.getRoot(doc)
    thml.expandScripComNodes(rootNode)
    # Add a comment that we are going to use later...
    body = utils.getNodesFromXPath(doc, '//ThML.body')[0]
    body.childNodes.insert(0, doc.createComment(MAGIC_SEPARATOR_START))
    body.childNodes.insert(1, doc.createTextNode("\n"))
    body.appendChild(doc.createComment(MAGIC_SEPARATOR_END))
    body.appendChild(doc.createTextNode("\n"))
    

    # Other corrections
    corrections = {
        # id attributes can now contain duplicates due to combination
        # of multiple files, so we remove them all.
        "//@id": RemoveNode(),

    }
    do_replacements(doc, corrections)

def combine(templatefile, allfiles):
    # Get the main one
    templatexml = minidom.parse(templatefile)
    mainBody = utils.getNodesFromXPath(templatexml, '//ThML.body')[0]
    mainBody.childNodes = []
    do_head_replacements(templatexml)
    # The following childNodes will be lazily evaluated as
    # templatexml.writexml iterates over them
    mainBody.childNodes = LazyNodes(templatexml, allfiles, do_body_corrections, '//ThML.body')

    utils.writexml(templatexml, sys.stdout)

def main(filenames):
    combine(filenames[0], filenames)

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print "Usage: ./combine_and_correct.py filename.xml [filename2.xml ...]"
        sys.exit(1)
    main(sys.argv[1:])