generate a builtin_abbrevs from XML canon definitions

git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@175 07627401-56e2-0310-80f4-f8cd0041bdcd
author: Chris Little <chrislit@crosswire.org> 2009-03-15 02:23:28 +0000
committer: Chris Little <chrislit@crosswire.org> 2009-03-15 02:23:28 +0000
commit: c22a0e81ca1c65425ee45cdcc2eea68eea955590 (patch)
tree: 9cf08be5b04d9ada4a01372bc8eae4aa7b9af447 /versification/makeabbrevs.pl
parent: c676c58279cbd4d7e1f54e0f11dbd1763ff871d2 (diff)
download: sword-tools-c22a0e81ca1c65425ee45cdcc2eea68eea955590.tar.gz
1 files changed, 72 insertions, 0 deletions
diff --git a/versification/makeabbrevs.pl b/versification/makeabbrevs.pl
new file mode 100644
index 0000000..7789ce7
--- /dev/null
+++ b/versification/makeabbrevs.pl
@@ -0,0 +1,72 @@
+#!/usr/bin/perl
+
+# @canons will contain this list of files, these are in a basic XML format.
+# Each file lists osisIDs along with the English names associated with the
+# osisID. These aren't exhaustive, and may or may not overlap (but hopefully
+# don't). We are only using these to load mappings from osisIDs.
+@canons = (
+    "canon.bible.xml",      # the Bible, broadly defined
+#    "canon.af.xml",         # Apostolic Fathers
+#    "canon.otp.xml",        # OT pseudepigrapha
+#    "canon.nta.xml",        # NT apocrypha
+#    "canon.lds.xml",        # Mormon books
+#    "canon.naghammadi.xml", # Nag Hammadi Library
+#    "canon.qumran.xml",     # Qumran mss
+#    "canon.classical.xml",  # intended for classical works, currently just Josephus
+);
+
+@abbrevsQueue = (); 
+ 
+foreach $mapfile (@canons) {
+    open MAP, "$mapfile";
+    while (<MAP>) {
+	$line = $_;
+	
+	$line =~ s/<!\-\-.+?\-\->//g;
+	$line =~ s/\&amp;/\&/g;
+
+	if ($line =~ /<id>(.+?)<\/id>/) {
+	    $id = $1;
+	    $osis{lc($id)} = $id;
+	    push @abbrevsQueue, "$id"
+	}
+	elsif ($line =~ /<name>(.+?)<\/name>/) {
+	    $name = $1;
+	    if ($osis{lc($name)} eq "") {
+		$osis{lc($name)} = $id;
+		push @abbrevsQueue, "$name"
+	    }
+	    else {
+		if ($warn) {
+		    print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+		}
+	    }
+
+	    if ($idmap{$id} eq "") {
+		$idmap{$id} = $name;
+	    }
+	    else {
+		# Duplicates most likely indicate alternate names, so ignore them.
+		if ($warn) {
+		    print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+		}
+	    }
+	}
+    }
+    close (MAP);
+}
+
+$abbrevs = "/******************************************************************************\n *	Abbreviations - MUST be in alphabetical order & by PRIORITY\n *		RULE: first match of entire key\n *			(e.g. key: \"1CH\"; match: \"1CHRONICLES\")\n */\n\nconst struct abbrev builtin_abbrevs\[\] = {\n";
+
+@abbrevsQueue = sort @abbrevsQueue;
+foreach $a (@abbrevsQueue) {
+    if ($a =~ /^.+\d/) {
+	$abbrevs .= "//";
+    }
+    $abbrevs .= "  {\"" . uc($a) . "\", \"" . $osis{lc($a)} . "\"},\t\t//" . $idmap{$osis{lc($a)}} . "\n";
+}
+$abbrevs .= "  {\"\", \"\"}\n};\n\n\n";
+
+open OUTF, ">builtin_abbrevs.h";
+print OUTF $abbrevs;
+close OUTF;
author	Chris Little <chrislit@crosswire.org>	2009-03-15 02:23:28 +0000
committer	Chris Little <chrislit@crosswire.org>	2009-03-15 02:23:28 +0000
commit	c22a0e81ca1c65425ee45cdcc2eea68eea955590 (patch)
tree	9cf08be5b04d9ada4a01372bc8eae4aa7b9af447 /versification/makeabbrevs.pl
parent	c676c58279cbd4d7e1f54e0f11dbd1763ff871d2 (diff)
download	sword-tools-c22a0e81ca1c65425ee45cdcc2eea68eea955590.tar.gz