summaryrefslogtreecommitdiffstats
path: root/versification/mapper/create_db.pl
diff options
context:
space:
mode:
Diffstat (limited to 'versification/mapper/create_db.pl')
-rw-r--r--versification/mapper/create_db.pl118
1 files changed, 91 insertions, 27 deletions
diff --git a/versification/mapper/create_db.pl b/versification/mapper/create_db.pl
index b5fec2e..8f22f9d 100644
--- a/versification/mapper/create_db.pl
+++ b/versification/mapper/create_db.pl
@@ -27,49 +27,113 @@ sub loadFromFile
close(FH);
}
-loadFromFile( "data/refsysmap.unl", \@mappings );
-loadFromFile( "data/Bible.xml", \@bible );
-loadFromFile( "data/Bible.NAB.xml", \@bible_nab );
-loadFromFile( "data/Bible.NJB.xml", \@bible_njb );
-loadFromFile( "data/Bible.ORG.xml", \@bible_org );
-loadFromFile( "data/Bible.Vul.xml", \@bible_vul );
-loadFromFile( "data/Bible.LXX.xml", \@bible_lxx );
-
##########################
-#INIT DB
+#INIT DBs
##########################
unlink "v11n-mapper.db";
-my $dbh = DBI->connect("dbi:SQLite:dbname=v11n-mapper.db","","") || die "can't connect to SQLite database\n";
-$dbh->{unicode} = 1;
-$dbh->{AutoCommit} = 0; # enable transactions
-$dbh->{RaiseError} = 1;
+my $dbh_mapper = DBI->connect("dbi:SQLite:dbname=v11n-mapper.db","","") || die "can't connect to SQLite database\n";
+$dbh_mapper->{unicode} = 1;
+$dbh_mapper->{AutoCommit} = 0; # enable transactions
+$dbh_mapper->{RaiseError} = 0;
+unlink "v11n-schema.db";
+my $dbh_schema = DBI->connect("dbi:SQLite:dbname=v11n-schema.db","","") || die "can't connect to SQLite database\n";
+$dbh_schema->{unicode} = 1;
+$dbh_schema->{AutoCommit} = 0; # enable transactions
+$dbh_schema->{RaiseError} = 1;
-sub feedScheme
+sub feedSchema
{
my $dbh = shift;
my $table_name = shift;
my $array_ref = shift;
- $dbh->do("CREATE TABLE $table_name (osisID TEXT NOT NULL)") || die $!;
- #TODO: ADD INDEX!!
+ print "Feeding schema $table_name into DB.\n";
+
+ $dbh->do("CREATE TABLE $table_name (osisID TEXT NOT NULL UNIQUE)") || die $!;
- foreach my $osisID_line (grep(m/<osisID code/, @{$array_ref}))
+ my ($osisID, %osisIDs_in_DB);
+ my @lines = grep(m/<osisID code/, @{$array_ref});
+ die "no lines available!\n" unless @lines;
+
+ foreach my $osisID_line (@lines)
{
- my ($osisID) = $osisID_line =~ m/code="(.+)"/;
- $dbh->do("INSERT INTO $table_name VALUES (\'$osisID\')") || die $!;
+ ($osisID) = $osisID_line =~ m/code="(.+)"/;
+ next if (exists($osisIDs_in_DB{$osisID})); #keep osisID column unique
+ $dbh->do("INSERT INTO $table_name VALUES (\'$osisID\')") || die "$!\n";
+ $osisIDs_in_DB{$osisID} = 1;
}
$dbh->commit();
}
-&feedScheme($dbh, "scheme_bible", \@bible);
-&feedScheme($dbh, "scheme_bible_nab", \@bible_nab);
-&feedScheme($dbh, "scheme_bible_njb", \@bible_njb);
-&feedScheme($dbh, "scheme_bible_org", \@bible_org);
-&feedScheme($dbh, "scheme_bible_vul", \@bible_vul);
-&feedScheme($dbh, "scheme_bible_lxx", \@bible_lxx);
+sub feedMapping
+{
+ my $dbh = shift;
+ my $scheme1 = shift;
+ my $source = shift;
+ my $scheme2 = shift;
+ my $target = shift;
+ my $array_ref = shift;
+
+ print "Feeding mapping \"$scheme1 to $scheme2\" into DB.\n";
+
+ $dbh->do("CREATE TABLE ".$source."_to_".$target." (source TEXT NOT NULL UNIQUE, target TEXT NOT NULL)") || die $!;
+
+ my ($source_osisID, $target_osisID, %source_osisIDs_in_DB);
-#print @{$dbh->selectcol_arrayref("SELECT osisID FROM scheme_bible")};
+ my @lines = grep(m/$scheme1:.+:$scheme2:.+/, @{$array_ref});
+ die "no lines available!\n" unless @lines;
+
+ foreach my $mapping_line (@lines)
+ {
+ ($source_osisID, $target_osisID) = $mapping_line =~ m/$scheme1:(.+):$scheme2:(.+)/;
+ next if ($source_osisID eq $target_osisID); #don't record something that does not need mapping
+ next if ( exists($source_osisIDs_in_DB{$source_osisID}) ); #keep source column unique
+
+ $dbh->do("INSERT INTO ".$source."_to_".$target." VALUES (\'$source_osisID\', \'$target_osisID\')") || die "$!\n";
+ $source_osisIDs_in_DB{$source_osisID} = 1;
+ }
+ $dbh->commit();
+}
+
+loadFromFile( "data/Bible.xml", \@bible );
+&feedSchema($dbh_schema, "bible", \@bible);
+@bible=();
+
+loadFromFile( "data/Bible.NAB.xml", \@bible_nab );
+&feedSchema($dbh_schema, "bible_nab", \@bible_nab);
+@bible_nab=();
+
+loadFromFile( "data/Bible.NJB.xml", \@bible_njb );
+&feedSchema($dbh_schema, "bible_njb", \@bible_njb);
+@bible_njb=();
+
+loadFromFile( "data/Bible.ORG.xml", \@bible_org );
+&feedSchema($dbh_schema, "bible_org", \@bible_org);
+@bible_org=();
+
+loadFromFile( "data/Bible.Vul.xml", \@bible_vul );
+&feedSchema($dbh_schema, "bible_vul", \@bible_vul);
+@bible_vul=();
+
+loadFromFile( "data/Bible.LXX.xml", \@bible_lxx );
+&feedSchema($dbh_schema, "bible_lxx", \@bible_lxx);
+@bible_lxx=();
+
+loadFromFile( "data/refsysmap.unl", \@mappings );
+foreach my $scheme1 ( qw(Bible Bible.NAB Bible.NJB Bible.ORG Bible.Vul Bible.LXX) )
+{
+ foreach my $scheme2 ( qw(Bible Bible.NAB Bible.NJB Bible.ORG Bible.Vul Bible.LXX) )
+ {
+ next if ($scheme1 eq $scheme2); #no mapping neccessary
+ next if (($scheme1 ne "Bible") && ($scheme2 ne "Bible")); #no data available
+ (my $source = $scheme1) =~ s/(.*)\.(.*)/$1_$2/;
+ (my $target = $scheme2) =~ s/(.*)\.(.*)/$1_$2/;
+
+ &feedMapping( $dbh_mapper, $scheme1, lc($source), $scheme2, lc($target), \@mappings );
+ }
+}
-$dbh->disconnect();
+$dbh_schema->disconnect();
+$dbh_mapper->disconnect();
print "Done.\n" \ No newline at end of file