diff options
author | Luke S. <luke@motimail.com> | 2007-07-19 22:51:32 +0000 |
---|---|---|
committer | Luke S. <luke@motimail.com> | 2007-07-19 22:51:32 +0000 |
commit | 21cf3069a438459f141f256b12ccb5e9d05b21d0 (patch) | |
tree | 8c1cac6bdf2684c6d4e16da37c7d5d392ce63a1f /modules/calvinscommentaries | |
parent | 088d12c8c9eba9dd3c8e1e30163b5a00fba7528a (diff) | |
download | sword-tools-21cf3069a438459f141f256b12ccb5e9d05b21d0.tar.gz |
Added Python library of various tools for making modules, and
specific script for creating a combined Calvin's Commentaries module
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@89 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/calvinscommentaries')
-rw-r--r-- | modules/calvinscommentaries/README | 45 | ||||
-rw-r--r-- | modules/calvinscommentaries/calvinscommentaries.conf | 17 | ||||
-rwxr-xr-x | modules/calvinscommentaries/combine_calcom.py | 78 |
3 files changed, 140 insertions, 0 deletions
diff --git a/modules/calvinscommentaries/README b/modules/calvinscommentaries/README new file mode 100644 index 0000000..9f86798 --- /dev/null +++ b/modules/calvinscommentaries/README @@ -0,0 +1,45 @@ + +Conversion of Calvin's commentaries into OSIS format and a Sword module + +Requirements: +------------- +- ThML sources: calcom??.xml files, as downloaded from CCEL. + For convenience, a recent version of the files can be downloaded here: + http://lukeplant.me.uk/misc/sword/calcom_sources.tar.bz2 + Extract this file. +- thml2osis.xslt from + http://crosswire.org/svn/sword-tools/trunk/thml2osis/xslt/ +- xsltproc for processing the above +- Python for script that combines calcom??.xml files +- Python swordutils library: + http://crosswire.org/svn/sword-tools/trunk/python + A checkout of this directory should be in your PYTHONPATH + +Make the module +--------------- + +$ ./combine_calcom.py calcom_sources/calcom??.xml +(output stored in calvinscommentaries.thml) +$ xsltproc --novalid path/to/thml2osis.xslt calvinscommentaries.thml > calvinscommentaries.osis + +TODO +- convert OSIS commentary to Sword module + +Explanation of these steps +-------------------------- +1) 'Correct' some of the ThML files. In particular, change the + 'scripCom' tags so that they enclose the text they refer to, + rather than just come at the beginning of it. + This is done as part of combine_calcom.py + +2) Combine all the ThML files into one big one, and at the same time: + - modify the header information, using one of the calcom??.xml files + as a template + - make any corrections necessary to the ThML for the new context + + Output: calvinscommentaries.thml + +3) Convert to OSIS, using thml2osis.xslt + +4) TODO - convert to Sword module. The current osis2mod utility expects + commentaries to be marked up like Bibles. diff --git a/modules/calvinscommentaries/calvinscommentaries.conf b/modules/calvinscommentaries/calvinscommentaries.conf new file mode 100644 index 0000000..b201b45 --- /dev/null +++ b/modules/calvinscommentaries/calvinscommentaries.conf @@ -0,0 +1,17 @@ +[CalvinsCommentaries] +DataPath=./modules/comments/zcom/calvinscommentaries/ +ModDrv=zCom +BlockType=CHAPTER +SourceType=OSIS +CompressType=ZIP +Lang=en +Description=Calvin's Collected Commentaries +About=John Calvin's commentaries on many books of the Bible, collected \ +into a single volume from material found at Christian Classics Ethereal Library \par \ +Converted to Sword module format by Luke Plant <L.Plant.98@cantab.net> +Version=1.0 +Encoding=UTF-8 +LCSH=Bible--Commentaries. +DistributionLicense=Public Domain +TextSource=http://www.ccel.org/ +MinimumVersion=1.5.2 diff --git a/modules/calvinscommentaries/combine_calcom.py b/modules/calvinscommentaries/combine_calcom.py new file mode 100755 index 0000000..48be2eb --- /dev/null +++ b/modules/calvinscommentaries/combine_calcom.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +# Converts the source calcom??.xml files into a single +# ThML file, with corrections made to allow it to be +# used as a Sword module + +#------------------------------------------------------------ +# CONFIG + +PUBLISHERID = u"lukeplant.me.uk" + +#------------------------------------------------------------ + +from xml.dom import minidom +from xml import xpath +from datetime import datetime +from swordutils.xml import thml, utils +from swordutils.xml.utils import RemoveNode, GeneralReplaceContents, ReplaceContents, do_replacements +from swordutils.xml.combine import LazyNodes +import sys + + +now = datetime.now() # for general timestamping purposes + + +def do_head_replacements(doc): + + corrections = { + "//DC.Title[@sub='Main']": ReplaceContents(u"Calvin's Combined Commentaries"), + "//DC.Title[@sub='authTitle']": RemoveNode(), + "//DC.Title[@sub='Alternative']": RemoveNode(), + "//printSourceInfo": ReplaceContents(u"<published>Multiple printed works, Baker</published>"), + "//electronicEdInfo/bookID": ReplaceContents(u"calvincommentaries"), + "//DC.Identifier": RemoveNode(), # TODO - new identifier? + "//electronicEdInfo/editorialComments": + GeneralReplaceContents(lambda t: u"Multiple ThML files combined into single ThML file by a script. Original editoral comments: " + t), + "//electronicEdInfo/revisionHistory": + GeneralReplaceContents(lambda t: unicode(now.strftime('%Y-%m-%d')) + u": Multiple ThML files combined into single ThML file by a script. Original revision history:" + t), + "//electronicEdInfo/publisher": ReplaceContents(PUBLISHERID), + + } + do_replacements(doc, corrections) + +def do_body_corrections(doc): + # Correct <scripCom> + rootNode = utils.getRoot(doc) + thml.expandScripComNodes(rootNode) + # Other corrections + corrections = { + # id attributes can now contain duplicates due to combination + # of multiple files, so we remove them all. + "//@id": RemoveNode(), + + } + do_replacements(doc, corrections) + +def combine(templatefile, allfiles): + # Get the main one + templatexml = minidom.parse(templatefile) + mainBody = utils.getNodesFromXPath(templatexml, '//ThML.body')[0] + mainBody.childNodes = [] + do_head_replacements(templatexml) + # The following childNodes will be lazily evaluated as + # templatexml.writexml iterates over them + mainBody.childNodes = LazyNodes(templatexml, allfiles, do_body_corrections, '//ThML.body') + + fh = open('calvinscommentaries.thml', 'wb') + utils.writexml(templatexml, fh) + fh.close() + +def main(filenames): + combine(filenames[0], filenames) + +if __name__ == "__main__": + if len(sys.argv) < 2: + print "Usage: ./combine_and_correct.py filename.xml [filename2.xml ...]" + sys.exit(1) + main(sys.argv[1:]) |