#!/usr/bin/perl ############################################################################### # # When run without any arguments, this script reads the CCEL files: # Bible.*.xml. # # Otherwise, supply the script with a list (or wildcard) files and it # will attempt to create a v11n system definition on that basis. # # The --vpl switch instructs the script to interpret files as VPL files. # The --imp switch instructs the script to interpret files as IMP files. # ############################################################################### sub buildBooksArrays { if ($lastBook ne "" && $osis{lc($lastBook)} eq "") { print "ERROR: Unknown book abbreviation: $lastBook in $v11n\n"; } if ($osis{lc($lastBook)} eq "Matt") { $otnt = 1; } $osisBook = $osis{lc($lastBook)}; if ($otnt == 0) { $otbooks .= "\t{\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cCount},\n"; } else { $ntbooks .= "\t{\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cCount},\n"; } } # @canons will contain this list of files, these are in a basic XML format. # Each file lists osisIDs along with the English names associated with the # osisID. These aren't exhaustive, and may or may not overlap (but hopefully # don't). We are only using these to load mappings from osisIDs. @canons = ( "canon.bible.xml", # the Bible, broadly defined "canon.af.xml", # Apostolic Fathers "canon.otp.xml", # OT pseudepigrapha "canon.nta.xml", # NT apocrypha "canon.lds.xml", # Mormon books "canon.naghammadi.xml", # Nag Hammadi Library "canon.qumran.xml", # Qumran mss "canon.classical.xml", # intended for classical works, currently just Josephus ); foreach $mapfile (@canons) { open MAP, "$mapfile"; while () { $line = $_; $line =~ s///g; $line =~ s/\&/\&/g; if ($line =~ /(.+?)<\/id>/) { $id = $1; $osis{lc($id)} = $id; $abbrevs{lc($id)} = "$id;" } elsif ($line =~ /(.+?)<\/abbr>/) { $name = $1; if ($osis{lc($name)} eq "") { $osis{lc($name)} = $id; } else { # print "ERROR: Duplicate mapping from $id found in $mapfile ().\n"; } } elsif ($line =~ /(.+?)<\/name>/) { $name = $1; if ($osis{lc($name)} eq "") { $osis{lc($name)} = $id; $abbrevs{lc($id)} .= "$name;" } else { # print "ERROR: Duplicate mapping from $id found in $mapfile ().\n"; } if ($idmap{$id} eq "") { $idmap{$id} = $name; } else { # Duplicates most likely indicate alternate names, so ignore them. # print "ERROR: Duplicate mapping from $id found in $mapfile.\n"; } } } close (MAP); } if (@ARGV[0] ne "") { $n = 0; $mode = "xml"; while (@ARGV[$n] ne "") { if (@ARGV[$n] eq "--vpl") { $mode = "vpl"; } elsif (@ARGV[$n] eq "--imp") { $mode = "imp"; } elsif (@ARGV[$n] eq "--xml") { $mode = "xml"; } else { push @srcfiles, @ARGV[$n]; } $n++; } } else { opendir (DIR, "."); @srcfiles = grep /Bible\.[^\.]+\.xml$/, readdir DIR; closedir(DIR); $mode = "xml"; } foreach $infile (@srcfiles) { if ($infile =~ /^Bible.+xml$/) { $infile =~ /^Bible\.([^\.]+)\.xml/; $v11n = $1; $outfile = lc("v11n$1.h"); } else { $v11n = $infile; $v11n =~ s/\.[^\.]*$//; $v11n =~ s/^.+\///; $outfile = lc("v11n$v11n.h"); } open INF, $infile; open OUTF, ">$outfile"; print OUTF "/******************************************************************************\n * $outfile.h - Versification data for the $v11n system\n *\n * Copyright 1998-2005 CrossWire Bible Society (http://www.crosswire.org)\n * CrossWire Bible Society\n * P. O. Box 2528\n * Tempe, AZ 85280-2528\n *\n * This program is free software; you can redistribute it and/or modify it\n * under the terms of the GNU General Public License as published by the\n * Free Software Foundation version 2.\n *\n * This program is distributed in the hope that it will be useful, but\n * WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n * General Public License for more details.\n *\n"; print OUTF "\n#ifndef V11N" . uc($v11n) . "_H\n#define V11N" . uc($v11n) . "_H\n\nSWORD_NAMESPACE_START\n\n\n"; $thisBook = ""; $thisChap = ""; $thisVers = ""; $bCount = 0; $cCount = 0; $vCount = 0; $abbrevs = "/******************************************************************************\n * Abbreviations - MUST be in alphabetical order & by PRIORITY\n * RULE: first match of entire key\n * (e.g. key: \"1CH\"; match: \"1CHRONICLES\")\n */\n\nconst struct abbrev builtin_abbrevs$v11n\[\] = {\n"; $abbrevsCloser = "\t{\"\", \"\"}\n};\n\n\n"; $abbrevsList = ""; $vm = "/******************************************************************************\n * Maximum verses per chapter\n */\n\nint vm$v11n\[\] = {"; $otbooks = "/******************************************************************************\n * [on]tbooks$v11n - initialize static instance for all canonical text names\n * and chapmax\n */\nstruct sbook otbooks$v11n\[\] = {\n"; $ntbooks = "struct sbook ntbooks$v11n\[\] = {\n"; $booksCloser = "\t{\"\", \"\", \"\", 0}\n};\n\n"; $otnt = 0; # 0 = ot, 1 = nt while () { $line = $_; $osisID = ""; if ($line =~ //) { $osisID = $1; } elsif ($mode eq "vpl" && $line =~ /^(.+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) { $osisID = "$1.$2.$3"; } elsif ($mode eq "imp" && $line =~ /^\$\$\$(.+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) { $osisID = "$1.$2.$3"; } if ($osisID ne "") { $lastBook = $thisBook; $lastChap = $thisChap; $lastVers = $thisVers; $osisID =~ /([^\.]+)\.([a-zA-Z\d]+)\.([a-zA-Z\d]+)/; $thisBook = $1; $thisChap = $2; $thisVers = $3; if ((($thisBook ne $lastBook) ||($thisChap ne $lastChap)) && $lastVers ne "") { $vm .= "$vCount, "; } if ($thisBook ne $lastBook) { $bCount++; $vm .= "\n\t// $idmap{$osis{lc($thisBook)}}\n\t"; $abbrevsList .= $abbrevs{lc($thisBook)}; if ($lastBook ne "") { buildBooksArrays{}; } $cCount = 1; $vCount = 1; } elsif ($thisChap ne $lastChap) { $cCount++; $vCount = 1; } elsif ($thisVers ne $lastVers) { $vCount++; } } } $vm .= "$vCount\n};\n"; buildBooksArrays(); $otbooks .= $booksCloser; $ntbooks .= $booksCloser; $abbrevsList = lc($abbrevsList); @abbrevsQueue = (); while ($abbrevsList =~ /^(.+?);/) { push @abbrevsQueue, $1; $abbrevsList =~ s/^(.+?);//; } @abbrevsQueue = sort @abbrevsQueue; foreach $a (@abbrevsQueue) { $abbrevs .= "\t{\"" . uc($a) . "\", \"" . $osis{lc($a)} . "\"},\t\t//" . $idmap{$osis{lc($a)}} . "\n"; } $abbrevs .= $abbrevsCloser; print OUTF $otbooks; print OUTF $ntbooks; print OUTF $abbrevs; print OUTF $vm; print OUTF "\n\nSWORD_NAMESPACE_END\n\n\n#endif\n"; close (INF); close (OUTF); }