blob: 2b28e7d4bc68327311aa3c3695f27195e3180691 (
plain) (
tree)
|
|
#!/usr/bin/perl
sub vs2osis {
my $context = $_[0];
$context =~ s/([12345]?[A-Z][a-z]+)\ .*/$1/;
my $ref = $_[0];
$ref =~ s/,/:/;
my $return = `vs2osisref "$ref" $context de`;
$return =~ s/\n$//;
$return;
}
my @files=`ls -1 wb.abw`;
foreach (@files){
my @lines;
chop;
open USFM, ">>$_.tei";
@lines=`cat $_`;
foreach (@lines) {
s/props\=\".*?\"//g;
# entry (ordinary)
s/<p\ style=\"T3\".*?><c.*?>(.*?)<\/c><c.*?><\/c><\/p>/<\/entryFree>\n<entryFree\ sortKey=\"$1\"\ split=\"$1\"><form\ type=\"headword\"><orth\ rend\=\"bold\">$1<\/orth><\/form>/g;
# entry (referring somewhere else)
s/<p\ style=\"T3\".*?><c.*?>(.*?)<\/c><c.*?><\/c><c.*?>s\.(.*?)<\/c><\/p>/<\/entryFree>\n<entryFree\ sortKey=\"$1\"\ split=\"$1\"><form\ type=\"headword\"><orth\ rend\=\"bold\">$1<\/orth><\/form>\n\t<ref target=\"$2\">s\.\ $2<\/ref>/g;
# content
s/<p.*?>(.*?)<\/p>/\t<def>\n\t\t$1\n\t<\/def>/;
s/<c\ style=\"Kursiv ZF\"\ >(.*?)<\/c>/<hi rend=\"italic\">$1<\/hi>/g;
# references
s/(([12345]?[A-Z][a-z]+)(;?\ [0-9]+(,[0-9]+(\-[0-9]+)?)?)+)(\ |\)|;)/ &vs2osis("$1").$^N/eg;
# s/<r>(.*?)<\/r>/&vs2osisref($1)/eg;
# clean up
s/<c.*?>//g;
s/<\/c\s*>//g;
}
print (USFM "<?xml\ version=\"1.0\"\ encoding=\"utf-8\"?>\n
<TEI\ xmlns=\"http://www.crosswire.org/2008/TEIOSIS/namespace\"\n
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
xsi:schemaLocation=\"http://www.crosswire.org/2008/TEIOSIS/namespace
http://www.crosswire.org/OSIS/teiP5osis.1.4.xsd\">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Worterklaerung Schlachter</title>
<author></author>
</titleStmt>
<editionStmt>
<edition></edition>
</editionStmt>
<publicationStmt>
<publisher></publisher>
<date></date>
</publicationStmt>
<sourceDesc><p>http://sourceforge.net/project/showfiles.php?group_id=89078&package_id=93370&release_id=278981</p></sourceDesc>
</fileDesc>
<revisionDesc>
<change when=\"2008-11-25\">initial conversion to TEI</change>
</revisionDesc>
</teiHeader>
<text>
<body>");
print (USFM @lines,"\n");
print (USFM "</body>\n</text>\n</TEI>");
close USFM;
}
|