summaryrefslogtreecommitdiffstats
path: root/versification/mapper/create_db.pl
blob: 8f22f9d5922a5ca0f6e61f6842c624206a0da5f3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/perl -w

#
# This script is supposed to generate a database with OSIS scheme information
# as well as mapping data from CCEL/SIL/OSIS.
#
# Author: Martin Gruner, mgruner@crosswire.org
# License: GPL
#

use strict;
use utf8;
use DBI;
use DBD::SQLite; #make sure it is installed, we won't use it directly

my (@mappings, @bible, @bible_nab, @bible_njb, @bible_org, @bible_vul, @bible_lxx);

##########################
#LOAD DATA
##########################
sub loadFromFile
{
	my $filename = shift;
	my $arrayref = shift;
	open(FH, $filename) or die "can't open $filename\n";
	@{$arrayref} = <FH>;
	close(FH);
}

##########################
#INIT DBs
##########################
unlink "v11n-mapper.db";
my $dbh_mapper = DBI->connect("dbi:SQLite:dbname=v11n-mapper.db","","") || die "can't connect to SQLite database\n";
$dbh_mapper->{unicode} = 1;
$dbh_mapper->{AutoCommit} = 0;  # enable transactions
$dbh_mapper->{RaiseError} = 0;
unlink "v11n-schema.db";
my $dbh_schema = DBI->connect("dbi:SQLite:dbname=v11n-schema.db","","") || die "can't connect to SQLite database\n";
$dbh_schema->{unicode} = 1;
$dbh_schema->{AutoCommit} = 0;  # enable transactions
$dbh_schema->{RaiseError} = 1;

sub feedSchema
{
	my $dbh = shift;
	my $table_name = shift;
	my $array_ref = shift;

	print "Feeding schema $table_name into DB.\n";

	$dbh->do("CREATE TABLE $table_name (osisID TEXT NOT NULL UNIQUE)") || die $!;
	
	my ($osisID, %osisIDs_in_DB);
	my @lines = grep(m/<osisID code/, @{$array_ref});
	die "no lines available!\n" unless @lines;

	foreach my $osisID_line (@lines)
	{
		($osisID) = $osisID_line =~ m/code="(.+)"/;
		next if (exists($osisIDs_in_DB{$osisID})); #keep osisID column unique
		$dbh->do("INSERT INTO $table_name VALUES (\'$osisID\')") || die "$!\n";
		$osisIDs_in_DB{$osisID} = 1;
	}
	$dbh->commit();
}

sub feedMapping
{
	my $dbh = shift;
	my $scheme1 = shift;
	my $source = shift;
	my $scheme2 = shift;
	my $target = shift;
	my $array_ref = shift;

	print "Feeding mapping \"$scheme1 to $scheme2\" into DB.\n";

	$dbh->do("CREATE TABLE ".$source."_to_".$target." (source TEXT NOT NULL UNIQUE, target TEXT NOT NULL)") || die $!;
	
	my ($source_osisID, $target_osisID, %source_osisIDs_in_DB);

	my @lines = grep(m/$scheme1:.+:$scheme2:.+/, @{$array_ref});
	die "no lines available!\n" unless @lines;
	
	foreach my $mapping_line (@lines)
	{
		($source_osisID, $target_osisID) = $mapping_line =~ m/$scheme1:(.+):$scheme2:(.+)/;
		next if ($source_osisID eq $target_osisID); #don't record something that does not need mapping
		next if ( exists($source_osisIDs_in_DB{$source_osisID}) ); #keep source column unique

		$dbh->do("INSERT INTO ".$source."_to_".$target." VALUES (\'$source_osisID\', \'$target_osisID\')") || die "$!\n";
		$source_osisIDs_in_DB{$source_osisID} = 1;
	}
	$dbh->commit();
}

loadFromFile( "data/Bible.xml", 			\@bible );
&feedSchema($dbh_schema, "bible", \@bible);
@bible=();

loadFromFile( "data/Bible.NAB.xml", 	\@bible_nab );
&feedSchema($dbh_schema, "bible_nab", \@bible_nab);
@bible_nab=();

loadFromFile( "data/Bible.NJB.xml", 	\@bible_njb );
&feedSchema($dbh_schema, "bible_njb", \@bible_njb);
@bible_njb=();

loadFromFile( "data/Bible.ORG.xml", 	\@bible_org );
&feedSchema($dbh_schema, "bible_org", \@bible_org);
@bible_org=();

loadFromFile( "data/Bible.Vul.xml", 	\@bible_vul );
&feedSchema($dbh_schema, "bible_vul", \@bible_vul);
@bible_vul=();

loadFromFile( "data/Bible.LXX.xml", 	\@bible_lxx );
&feedSchema($dbh_schema, "bible_lxx", \@bible_lxx);
@bible_lxx=();

loadFromFile( "data/refsysmap.unl", 	\@mappings );
foreach my $scheme1 ( qw(Bible Bible.NAB Bible.NJB Bible.ORG Bible.Vul Bible.LXX) )
{
	foreach my $scheme2 ( qw(Bible Bible.NAB Bible.NJB Bible.ORG Bible.Vul Bible.LXX) )
	{
		next if ($scheme1 eq $scheme2); #no mapping neccessary
		next if (($scheme1 ne "Bible") && ($scheme2 ne "Bible")); #no data available
		(my $source = $scheme1) =~ s/(.*)\.(.*)/$1_$2/;
		(my $target = $scheme2) =~ s/(.*)\.(.*)/$1_$2/;

		&feedMapping( $dbh_mapper, $scheme1, lc($source), $scheme2, lc($target), \@mappings );
	}
}

$dbh_schema->disconnect();
$dbh_mapper->disconnect();

print "Done.\n"