diff options
Diffstat (limited to 'versification/mapper/create_db.pl')
-rw-r--r-- | versification/mapper/create_db.pl | 118 |
1 files changed, 91 insertions, 27 deletions
diff --git a/versification/mapper/create_db.pl b/versification/mapper/create_db.pl index b5fec2e..8f22f9d 100644 --- a/versification/mapper/create_db.pl +++ b/versification/mapper/create_db.pl @@ -27,49 +27,113 @@ sub loadFromFile close(FH); } -loadFromFile( "data/refsysmap.unl", \@mappings ); -loadFromFile( "data/Bible.xml", \@bible ); -loadFromFile( "data/Bible.NAB.xml", \@bible_nab ); -loadFromFile( "data/Bible.NJB.xml", \@bible_njb ); -loadFromFile( "data/Bible.ORG.xml", \@bible_org ); -loadFromFile( "data/Bible.Vul.xml", \@bible_vul ); -loadFromFile( "data/Bible.LXX.xml", \@bible_lxx ); - ########################## -#INIT DB +#INIT DBs ########################## unlink "v11n-mapper.db"; -my $dbh = DBI->connect("dbi:SQLite:dbname=v11n-mapper.db","","") || die "can't connect to SQLite database\n"; -$dbh->{unicode} = 1; -$dbh->{AutoCommit} = 0; # enable transactions -$dbh->{RaiseError} = 1; +my $dbh_mapper = DBI->connect("dbi:SQLite:dbname=v11n-mapper.db","","") || die "can't connect to SQLite database\n"; +$dbh_mapper->{unicode} = 1; +$dbh_mapper->{AutoCommit} = 0; # enable transactions +$dbh_mapper->{RaiseError} = 0; +unlink "v11n-schema.db"; +my $dbh_schema = DBI->connect("dbi:SQLite:dbname=v11n-schema.db","","") || die "can't connect to SQLite database\n"; +$dbh_schema->{unicode} = 1; +$dbh_schema->{AutoCommit} = 0; # enable transactions +$dbh_schema->{RaiseError} = 1; -sub feedScheme +sub feedSchema { my $dbh = shift; my $table_name = shift; my $array_ref = shift; - $dbh->do("CREATE TABLE $table_name (osisID TEXT NOT NULL)") || die $!; - #TODO: ADD INDEX!! + print "Feeding schema $table_name into DB.\n"; + + $dbh->do("CREATE TABLE $table_name (osisID TEXT NOT NULL UNIQUE)") || die $!; - foreach my $osisID_line (grep(m/<osisID code/, @{$array_ref})) + my ($osisID, %osisIDs_in_DB); + my @lines = grep(m/<osisID code/, @{$array_ref}); + die "no lines available!\n" unless @lines; + + foreach my $osisID_line (@lines) { - my ($osisID) = $osisID_line =~ m/code="(.+)"/; - $dbh->do("INSERT INTO $table_name VALUES (\'$osisID\')") || die $!; + ($osisID) = $osisID_line =~ m/code="(.+)"/; + next if (exists($osisIDs_in_DB{$osisID})); #keep osisID column unique + $dbh->do("INSERT INTO $table_name VALUES (\'$osisID\')") || die "$!\n"; + $osisIDs_in_DB{$osisID} = 1; } $dbh->commit(); } -&feedScheme($dbh, "scheme_bible", \@bible); -&feedScheme($dbh, "scheme_bible_nab", \@bible_nab); -&feedScheme($dbh, "scheme_bible_njb", \@bible_njb); -&feedScheme($dbh, "scheme_bible_org", \@bible_org); -&feedScheme($dbh, "scheme_bible_vul", \@bible_vul); -&feedScheme($dbh, "scheme_bible_lxx", \@bible_lxx); +sub feedMapping +{ + my $dbh = shift; + my $scheme1 = shift; + my $source = shift; + my $scheme2 = shift; + my $target = shift; + my $array_ref = shift; + + print "Feeding mapping \"$scheme1 to $scheme2\" into DB.\n"; + + $dbh->do("CREATE TABLE ".$source."_to_".$target." (source TEXT NOT NULL UNIQUE, target TEXT NOT NULL)") || die $!; + + my ($source_osisID, $target_osisID, %source_osisIDs_in_DB); -#print @{$dbh->selectcol_arrayref("SELECT osisID FROM scheme_bible")}; + my @lines = grep(m/$scheme1:.+:$scheme2:.+/, @{$array_ref}); + die "no lines available!\n" unless @lines; + + foreach my $mapping_line (@lines) + { + ($source_osisID, $target_osisID) = $mapping_line =~ m/$scheme1:(.+):$scheme2:(.+)/; + next if ($source_osisID eq $target_osisID); #don't record something that does not need mapping + next if ( exists($source_osisIDs_in_DB{$source_osisID}) ); #keep source column unique + + $dbh->do("INSERT INTO ".$source."_to_".$target." VALUES (\'$source_osisID\', \'$target_osisID\')") || die "$!\n"; + $source_osisIDs_in_DB{$source_osisID} = 1; + } + $dbh->commit(); +} + +loadFromFile( "data/Bible.xml", \@bible ); +&feedSchema($dbh_schema, "bible", \@bible); +@bible=(); + +loadFromFile( "data/Bible.NAB.xml", \@bible_nab ); +&feedSchema($dbh_schema, "bible_nab", \@bible_nab); +@bible_nab=(); + +loadFromFile( "data/Bible.NJB.xml", \@bible_njb ); +&feedSchema($dbh_schema, "bible_njb", \@bible_njb); +@bible_njb=(); + +loadFromFile( "data/Bible.ORG.xml", \@bible_org ); +&feedSchema($dbh_schema, "bible_org", \@bible_org); +@bible_org=(); + +loadFromFile( "data/Bible.Vul.xml", \@bible_vul ); +&feedSchema($dbh_schema, "bible_vul", \@bible_vul); +@bible_vul=(); + +loadFromFile( "data/Bible.LXX.xml", \@bible_lxx ); +&feedSchema($dbh_schema, "bible_lxx", \@bible_lxx); +@bible_lxx=(); + +loadFromFile( "data/refsysmap.unl", \@mappings ); +foreach my $scheme1 ( qw(Bible Bible.NAB Bible.NJB Bible.ORG Bible.Vul Bible.LXX) ) +{ + foreach my $scheme2 ( qw(Bible Bible.NAB Bible.NJB Bible.ORG Bible.Vul Bible.LXX) ) + { + next if ($scheme1 eq $scheme2); #no mapping neccessary + next if (($scheme1 ne "Bible") && ($scheme2 ne "Bible")); #no data available + (my $source = $scheme1) =~ s/(.*)\.(.*)/$1_$2/; + (my $target = $scheme2) =~ s/(.*)\.(.*)/$1_$2/; + + &feedMapping( $dbh_mapper, $scheme1, lc($source), $scheme2, lc($target), \@mappings ); + } +} -$dbh->disconnect(); +$dbh_schema->disconnect(); +$dbh_mapper->disconnect(); print "Done.\n"
\ No newline at end of file |