blob: fd71f69d5aaaaba76a59d409cb89b21e2763ecfc (
plain) (
tree)
|
|
#!/usr/bin/perl
sub vs2osisref {
my $context = $_[0];
$context =~ s/([12345]?[A-Z][a-z]+).*/$1/;
my $ref = $_[0];
$ref =~ s/([12345]?[A-Z][a-z]+)\ (.*)/$1:$2/;
`vs2osisref $ref $context de`;
chomp;
}
my @files=`ls -1 *.abw`;
foreach (@files){
my @lines;
chop;
open USFM, ">>$_.tei";
@lines=`cat $_`;
foreach (@lines) {
s/props\=\".*?\"//g;
# entry (ordinary)
s/<p\ style=\"T3\".*?><c.*?>(.*?)<\/c><c.*?><\/c><\/p>/<\/entryFree>\n<entryFree\ sortKey=\"$1\"\ split=\"$1\"><form\ type=\"headword\"><orth\ rend\=\"bold\">$1<\/orth><\/form>/g;
# entry (referring somewhere else)
s/<p\ style=\"T3\".*?><c.*?>(.*?)<\/c><c.*?><\/c><c.*?>s\.(.*?)<\/c><\/p>/<\/entryFree>\n<entryFree\ sortKey=\"$1\"\ split=\"$1\"><form\ type=\"headword\"><orth\ rend\=\"bold\">$1<\/orth><\/form>\n\t<ref target=\"$2\">s\.\ $2<\/ref>/g;
# content
s/<p.*?>(.*?)<\/p>/\t<def>\n\t\t$1\n\t<\/def>/;
s/<c\ style=\"Kursiv ZF\"\ >(.*?)<\/c>/<hi rend=\"italic\">$1<\/hi>/g;
# references
s/(([12345]?[A-Z][a-z]+)(;?\ [0-9]+(,[0-9]+(\-[0-9]+)?)?)+)(\ |\)|;)/<r>$1<\/r>$^N/g;
# s/<r>(.*?)<\/r>/&vs2osisref($1)/eg;
# clean up
s/<c.*?>//g;
s/<\/c\s*>//g;
}
print (USFM "<?xml\ version=\"1.0\"\ encoding=\"utf-8\"?>\n<TEI\ xmlns=\"http://www.crosswire.org/2008/TEIOSIS/namespace\"\nxmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
xsi:schemaLocation=\"http://www.crosswire.org/2008/TEIOSIS/namespace
http://www.crosswire.org/OSIS/teiP5osis.1.4.xsd\">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Worterklaerung Schlachter</title>
<author></author>
</titleStmt>
<editionStmt>
<edition></edition>
</editionStmt>
<publicationStmt>
<publisher></publisher>
<date></date>
</publicationStmt>
<sourceDesc><p>http://sourceforge.net/project/showfiles.php?group_id=89078&package_id=93370&release_id=278981</p></sourceDesc>
</fileDesc>
<revisionDesc>
<change when=\"2008-11-25\">initial conversion to TEI</change>
</revisionDesc>
</teiHeader>
<text>
<body>");
print (USFM @lines,"\n");
print (USFM "</body>\n</text>\n</TEI>");
close USFM;
}
|