summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorPeter von Kaehne <refdoc@gmx.net>2015-09-04 01:46:46 +0000
committerPeter von Kaehne <refdoc@gmx.net>2015-09-04 01:46:46 +0000
commitb65ed23c2dd5798c0088205d29dd2bbb3a6dfa5a (patch)
tree9f1773336a825d1d1bd8953dc314aabe9f995fec /modules
parentcd6e034126b245124eb4fd50b5fda955338d14a5 (diff)
downloadsword-tools-b65ed23c2dd5798c0088205d29dd2bbb3a6dfa5a.tar.gz
added handling of UTF8 stripfilters.
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@503 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules')
-rwxr-xr-xmodules/conf/confmaker.pl49
1 files changed, 45 insertions, 4 deletions
diff --git a/modules/conf/confmaker.pl b/modules/conf/confmaker.pl
index 2662ae1..6367ec5 100755
--- a/modules/conf/confmaker.pl
+++ b/modules/conf/confmaker.pl
@@ -41,8 +41,11 @@
use XML::LibXML;
use I18N::LangTags::List;
use Unicode::UCD 'charinfo';
-use open ':std', ':encoding(UTF-8)';
-
+#use open ':std', ':encoding(UTF-8)';
+#use open qw/:std :utf8/;
+use utf8;
+use Sword;
+use HTML::Strip;
## Obtain arguments
if (scalar(@ARGV) < 1) {
@@ -90,6 +93,17 @@ if (@ARGV[$nextarg] eq "-l") {
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file($file);
+my $manager = new Sword::SWMgr();
+
+$manager->setGlobalOption("Hebrew Vowel Points", "Off");
+$manager->setGlobalOption("Hebrew Cantillation", "Off");
+$manager->setGlobalOption("Arabic Vowel Points", "Off");
+$manager->setGlobalOption("Greek Accents", "Off");
+
+my $hs = HTML::Strip->new();
+my $doc_text = new Sword::SWBuf($hs->parse($doc->toString()));
+#my $clean_doc_text = $hs->parse( $doc->toString());
+#$doc_text->append($clean_doc_text);
## obtain name, type and language
@@ -100,7 +114,9 @@ my $doc_name = @elements[0]->getAttribute('osisIDWork');
my $doc_type = @elements[0]->getAttribute('osisRefWork');
my $doc_lang = @elements[0]->getAttribute('xml:lang');
my $doc_lang_name=I18N::LangTags::List::name($doc_lang);
+;
+
if ((length($language)==0) && (length($doc_lang)==0)) {
print STDERR $language."\n", $doc_lang."\n", $doc_lang_name."\n";
@@ -123,7 +139,7 @@ if ((length($language)>0) && (length($doc_lang)==0)) {
my @doc_features = ('title', 'note', 'reference', 'q', 'figure', 'rdg');
my @word_features = ('lemma', 'gloss', 'morph',);
-
+my @char_features = ('Hebrew Vowel Points', 'Arabic Vowel Points', 'Hebrew Cantillation', 'Greek Accents');
my %doc_filters = ( 'title' => "OSISHeadings",
'note' => "OSISFootnotes",
@@ -134,12 +150,20 @@ my %doc_filters = ( 'title' => "OSISHeadings",
'q' => "OSISRedLetterWords",
'rdg' => 'OSISVariants',
);
+
my %doc_feature = ( 'lemma' => 'StrongsNumbers',
'figure' => 'Images',
'p' => 'NoParagraphs',
);
+
+my %diacritics = ( 'Hebrew Vowel Points' => "UTF8HebrewPoints",
+ 'Arabic Vowel Points' => 'UTF8ArabicPoints',
+ 'Hebrew Cantillation' => 'UTF8Cantillation',
+ 'Greek Accents' => 'UTF8GreekAccents',
+ );
+
my %doc_has_feature;
@@ -165,6 +189,8 @@ foreach my $f(@word_features) {
my @paragraphs = $doc->getElementsByTagName('p');
if (@paragraphs==0) {$doc_has_feature{'p'}=true};
+
+
# Assemble and print out
@@ -189,6 +215,21 @@ foreach (@word_features) {
print "GlobalOptionFilter=".$doc_filters{$_}."\n"
}
}
+
+foreach $filter(@char_features) {
+
+ my $tmp = new Sword::SWBuf($hs->parse($doc->toString()));
+
+ $manager->filterText($filter, $tmp);
+
+ if ($tmp->c_str() ne $doc_text->c_str()) {
+ print "GlobalOptionFilter=".%diacritics{$filter}."\n";
+
+ }
+}
+
+
+
foreach (@doc_features) {
if ($doc_has_feature{$_} && exists $doc_feature{$_}) {
print "Feature=".$doc_feature{$_}."\n"
@@ -206,7 +247,7 @@ if ($doc_has_feature{'p'}) {
print "Encoding=UTF-8\n";
print "SourceType=OSIS\n";
print "LCSH=".$doc_type.".".I18N::LangTags::List::name($doc_lang)."\n";
-print "SwordVersionDate=".`date +"%F"`."\n";
+print "SwordVersionDate=".`date +"%F"`;
if (@inputFile>0) {
foreach(@inputFile) {