diff options
-rw-r--r-- | modules/calvinscommentaries/README | 38 | ||||
-rwxr-xr-x | modules/calvinscommentaries/bundle_and_install.sh | 132 | ||||
-rwxr-xr-x | modules/calvinscommentaries/combine_calcom.py | 15 |
3 files changed, 172 insertions, 13 deletions
diff --git a/modules/calvinscommentaries/README b/modules/calvinscommentaries/README index 9f86798..134a1bf 100644 --- a/modules/calvinscommentaries/README +++ b/modules/calvinscommentaries/README @@ -17,29 +17,47 @@ Requirements: Make the module --------------- +First edit 'bundle_and_install', setting directories as chosen. If you +don't want the module installed at the end, comment out the last line which +unzips it into place. -$ ./combine_calcom.py calcom_sources/calcom??.xml -(output stored in calvinscommentaries.thml) -$ xsltproc --novalid path/to/thml2osis.xslt calvinscommentaries.thml > calvinscommentaries.osis +TODO: +- get osis2mod to handle commentaries properly (instead of requiring + them to be marked up Bibles as currently. Once this is done, most of + the ugliness in 'bundle_and_install' will go away, and it gets a whole + lot simpler. +- Check the OSIS actually validates -TODO -- convert OSIS commentary to Sword module -Explanation of these steps --------------------------- +Explanation of steps +-------------------- 1) 'Correct' some of the ThML files. In particular, change the 'scripCom' tags so that they enclose the text they refer to, - rather than just come at the beginning of it. + rather than just coming at the beginning of it. This is done as part of combine_calcom.py 2) Combine all the ThML files into one big one, and at the same time: - modify the header information, using one of the calcom??.xml files as a template - make any corrections necessary to the ThML for the new context + + This is the second task of combine_calcom.py. Output: calvinscommentaries.thml 3) Convert to OSIS, using thml2osis.xslt + + Output: calvinscommentaries.osis + +4) Convert to formant required by osis2mod. This uses + 'genbookOsis2Commentary.py' script. Since this script is DOM based, + it uses up too much memory if all of calvinscommentaries.osis is loaded. + To get round this, the OSIS file is split into lots of bits (using + markers inserted earlier), then run through genbookOsis2Commentary. + + Also genbookOsis2Commentary gets rid of some other bits of + 'non-commentary' text that otherwise ends up in the module, and probably + isn't wanted. + +5) Run osis2mod, create the zip file etc. -4) TODO - convert to Sword module. The current osis2mod utility expects - commentaries to be marked up like Bibles. diff --git a/modules/calvinscommentaries/bundle_and_install.sh b/modules/calvinscommentaries/bundle_and_install.sh new file mode 100755 index 0000000..15a67f1 --- /dev/null +++ b/modules/calvinscommentaries/bundle_and_install.sh @@ -0,0 +1,132 @@ +#!/bin/bash + + +echo "Please edit this file first." +exit 1 +## Must modify these: +SWORDTOOLS="$HOME/devel/sword-tools" +CALCOMSOURCES="$HOME/christian/books/John Calvin/Commentaries/calcom_sources" + + +## Leave these to build in subdir 'build' +BUILDDIR="`pwd`/build" +OSIS2MODOUTPUT="$BUILDDIR/modules/comments/zcom/calvinscommentaries" +CONFDIR="$BUILDDIR/mods.d" +THISDIR=`pwd` + +############################################## + + +which csplit > /dev/null || { echo "Cannot find required tool 'csplit'. Exiting."; exit 1;} +which replace > /dev/null || { echo "Cannot find required tool 'replace'. Exiting."; exit 1;} + + +mkdir -p $BUILDDIR +mkdir -p $OSIS2MODOUTPUT +mkdir -p $CONFDIR + + +echo "Running combine_calcom.py..." +./combine_calcom.py "$CALCOMSOURCES"/calcom??.xml > "$BUILDDIR/calvinscommentaries.thml" || exit 1 + +echo "Converting to OSIS..." +xsltproc --novalid "$SWORDTOOLS/thml2osis/xslt/thml2osis.xslt" "$BUILDDIR/calvinscommentaries.thml" > "$BUILDDIR/calvinscommentaries.osis" || exit 1 + + +cd "$BUILDDIR" + + +############################################################################## +# Splitting +# We currently have to use genbookOsis2Commentary (since +# osis2mod doesn't accept format unless it is marked up like a Bible), +# genbookOsis2Commentary is a quick hack, and doesn't work well +# with big files, since it is DOM based. So we split the file +# into lots of small ones, using markers inserted before. +# Then recombine again. This is hacky, should go away once +# osis2mod is fixed. + +# Split +echo "Splitting..." + +rm part* + +COUNT=$(csplit -f 'part' -b '%03d' calvinscommentaries.osis "/combine_calcom.py START/" '{*}' | nl | tail -n 1 | cut -c 1-7 ) + +# $COUNT now contains the number of parts we split into + +FIRSTFILE="part000" +FIRSTFILEALT="firstpart" +LASTFILE="part`printf '%03d' $((COUNT-1))`" +mv $FIRSTFILE $FIRSTFILEALT + +# $LASTFILE is special -- it will have trailing stuff +TMP=`mktemp` +replace '</osis>' '' '</osisText>' '' < $LASTFILE > $TMP || exit 1 +mv $TMP $LASTFILE + + +# Fix individual files +for F in part*; +do + # prepend and append some stuff + TMP=`mktemp` + echo '<?xml version="1.0" encoding="UTF-8"?>' > $TMP + echo '<osis>' >> $TMP + echo '<osisText>' >> $TMP + cat $F >> $TMP + echo '</osisText>' >> $TMP + echo '</osis>' >> $TMP + mv $TMP $F + + echo "re-versifying $F ..." + "$SWORDTOOLS/python/swordutils/osis/genbookOsis2Commentary.py" $F > "$F.versified" || exit 1 + + # Now strip stuff we added + TMP2=`mktemp` + cat "$F.versified" | egrep -v 'xml version' | replace '<osis>' '' '<osisText>' '' '</osis>' '' '</osisText>' '' > $TMP2 + mv $TMP2 "$F.versified" + +done + +# Now combine again +COMBINED="calvinscommentaries.versified.osis" +# Use this cleared up XML instead of the uncleaned stuff in $FIRSTFILEALT +echo '<?xml version="1.0" encoding="UTF-8"?>' > $COMBINED +echo '<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd">' >> $COMBINED +echo '<osisText osisRefWork="bible" canonical="true" osisIDWork="calvincommentaries" xml:lang="en">' >> $COMBINED + +for F in part*.versified; +do + cat $F >> $COMBINED +done + +echo '</osisText>' >> $COMBINED +echo '</osis>' >> $COMBINED + +####################################################################### + +# clean out old stuff +rm "$OSIS2MODOUTPUT/*" + +# xml2gbs +#xml2gbs -fO calvinscommentaries.osis CalvinsCommentaries +#mv CalvinsCommentaries.{bdt,dat,idx} modules/comments/zcom/calvinscommentaries/ + +# osis2mod +echo "Running osis2mod..." +osis2mod "$OSIS2MODOUTPUT" "$BUILDDIR/$COMBINED" 0 2 3 || exit 1 + + +echo "Zipping..." +cp "$THISDIR/calvinscommentaries.conf" "$CONFDIR" + + +cd "$BUILDDIR" + +zip -r CalvinsCommentaries.zip mods.d/ modules/ + +echo "Installing..." +## Install +unzip -o -d $HOME/.sword CalvinsCommentaries.zip + diff --git a/modules/calvinscommentaries/combine_calcom.py b/modules/calvinscommentaries/combine_calcom.py index 48be2eb..37d8e18 100755 --- a/modules/calvinscommentaries/combine_calcom.py +++ b/modules/calvinscommentaries/combine_calcom.py @@ -22,6 +22,9 @@ import sys now = datetime.now() # for general timestamping purposes +MAGIC_SEPARATOR_START = "%%% combine_calcom.py START %%%" +MAGIC_SEPARATOR_END = "%%% combine_calcom.py END %%%" + def do_head_replacements(doc): @@ -45,6 +48,14 @@ def do_body_corrections(doc): # Correct <scripCom> rootNode = utils.getRoot(doc) thml.expandScripComNodes(rootNode) + # Add a comment that we are going to use later... + body = utils.getNodesFromXPath(doc, '//ThML.body')[0] + body.childNodes.insert(0, doc.createComment(MAGIC_SEPARATOR_START)) + body.childNodes.insert(1, doc.createTextNode("\n")) + body.appendChild(doc.createComment(MAGIC_SEPARATOR_END)) + body.appendChild(doc.createTextNode("\n")) + + # Other corrections corrections = { # id attributes can now contain duplicates due to combination @@ -64,9 +75,7 @@ def combine(templatefile, allfiles): # templatexml.writexml iterates over them mainBody.childNodes = LazyNodes(templatexml, allfiles, do_body_corrections, '//ThML.body') - fh = open('calvinscommentaries.thml', 'wb') - utils.writexml(templatexml, fh) - fh.close() + utils.writexml(templatexml, sys.stdout) def main(filenames): combine(filenames[0], filenames) |