versification/v11nsys.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

#!/usr/bin/perl


sub cleanupLine {
    # here we'll do some cleanup of our input, mostly to regularlize input

    $ret = @_[0];

    $ret =~ s/>Pss\./>Ps\//g;

    return $ret;
}

sub buildBooks {
    if ($lastBook eq "Matt") {
	$otnt = 1;
    }
    if ($otnt == 0) {
	$otbooks .= "\t{\"$idmap{$lastBook}\", \"$lastBook\", \"$lastBook\", $cCount},\n";
    }
    else {
	$ntbooks .= "\t{\"$idmap{$lastBook}\", \"$lastBook\", \"$lastBook\", $cCount},\n";
    }
}

# @canons will contain this list of files, these are in a basic XML format.

# Each file lists osisIDs along with the English names associated with the

# osisID. These aren't exhaustive, and may or may not overlap (but hopefully

# don't). We are only using these to load mappings from osisIDs.

@canons = (
    "bible.xml",      # the Bible, broadly defined

    "otp.xml",        # OT pseudepigrapha

    "nta.xml",        # NT apocrpha

    "lds.xml",        # Mormon books

    "naghammadi.xml", # Nag Hammadi codices

    "qumran.xml",     # Qumran mss

    "classical.xml",  # intended for classical works, currently just Josephus

);

foreach $mapfile (@canons) {
    open MAP, "$mapfile";
    while (<MAP>) {
	$line = $_;

	if ($line =~ /<id>(.+?)<\/id>/) {
	    $id = $1;
	}
	elsif ($line =~ /<name>(.+?)<\/name>/) {
	    $name = $1;

	    if ($idmap{$id} eq "") {
		$idmap{$id} = $name;
	    }
	    else {
		# Duplicates most likely indicate alternate names, so ignore them.

#		print "ERROR: Duplicate mapping from $id found in $mapfile.\n"

	    }
	}
    }
    close (MAP);
}

opendir (DIR, ".");
@srcfiles = grep /Bible\.[^\.]+\.xml$/, readdir DIR;
closedir(DIR);

foreach $infile (@srcfiles) {
    
    $infile =~ /Bible\.([^\.]+)\.xml/;
    $v11n = $1;
    $outfile = lc("v11n$1.h");

    open INF, $infile;
    open OUTF, ">$outfile";

    print OUTF "/******************************************************************************\n * $outfile.h - Versification data for the $v11n system\n *\n * Copyright 1998-2005 CrossWire Bible Society (http://www.crosswire.org)\n *	CrossWire Bible Society\n *	P. O. Box 2528\n *	Tempe, AZ  85280-2528\n *\n * This program is free software; you can redistribute it and/or modify it\n * under the terms of the GNU General Public License as published by the\n * Free Software Foundation version 2.\n *\n * This program is distributed in the hope that it will be useful, but\n * WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n * General Public License for more details.\n *\n * \n * This file is based on data from SIL, further processed by CCEL.\n */\n";
    print OUTF "\n#ifndef V11N" . uc($v11n) . "_H\n#define V11N" . uc($v11n) . "_H\n\nSWORD_NAMESPACE_START\n\n";

    $thisBook = "";
    $thisChap = "";
    $thisVers = "";

    $bCount = 0;
    $cCount = 0;
    $vCount = 0;
    
    $vm = "/******************************************************************************\n *	Maximum verses per chapter\n */\n\nint vm$v11n\[\] = {";
    $otbooks = "/******************************************************************************\n * [on]tbooks$v11n - initialize static instance for all canonical text names\n *		and chapmax\n */\nstruct sbook otbooks$v11n\[\] = {\n";
    $ntbooks = "struct sbook ntbooks$v11n\[\] = {\n";
    $booksCloser = "\t{\"\", \"\", \"\", 0}\n};\n";
    
    $otnt = 0; # 0 = ot, 1 = nt

    
    while (<INF>) {
	$line = $_;
	
	$line = cleanupLine($line);
	
	if ($line =~ /<osisID.+?code=\"(.+?)\"\/>/) {
	    $osisID = $1;
	    
	    $lastBook = $thisBook;
	    $lastChap = $thisChap;
	    $lastVers = $thisVers;
	    
	    $osisID =~ /([^\.]+)\.([a-zA-Z\d]+)\.([a-zA-Z\d]+)/;
	    
	    $thisBook = $1;
	    $thisChap = $2;
	    $thisVers = $3;
	    
	    if ((($thisBook ne $lastBook) ||($thisChap ne $lastChap)) && $lastVers ne "") {
		$vm .= "$vCount, ";
	    }

	    if ($thisBook ne $lastBook) {
		$bCount++;
		$vm .= "\n\t// $idmap{$thisBook}\n\t";
		if ($lastBook ne "") {
		    buildBooks{};
		}
		$cCount = 1;
		$vCount = 1;
	    }
	    elsif ($thisChap ne $lastChap) {
		$cCount++;
		$vCount = 1;
	    }
	    elsif ($thisVers ne $lastVers) {
		$vCount++;
	    }
	}
    }
    $vm .= "$vCount\n};\n";
    buildBooks();
    $otbooks .= $booksCloser;
    $ntbooks .= $booksCloser;
    
    print OUTF $otbooks;
    print OUTF $ntbooks;
    print OUTF $vm;
    
    print OUTF "\nSWORD_NAMESPACE_END\n\n#endif\n";
    
    close (INF);
    close (OUTF);
}