summaryrefslogtreecommitdiffstats
path: root/modules/misc_cleanup
diff options
context:
space:
mode:
authorPeter von Kaehne <refdoc@gmx.net>2011-04-16 22:50:09 +0000
committerPeter von Kaehne <refdoc@gmx.net>2011-04-16 22:50:09 +0000
commit8829e802e6dc5c77d1b32d253365ea628a5ac5a1 (patch)
tree488faae6db43caeb1b7822221aae910576dbe5b8 /modules/misc_cleanup
parentad82002718a1300b639f31d90d6c10883f70d15b (diff)
downloadsword-tools-8829e802e6dc5c77d1b32d253365ea628a5ac5a1.tar.gz
improved title encoding
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@321 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/misc_cleanup')
-rwxr-xr-xmodules/misc_cleanup/title_cleanup.pl50
1 files changed, 50 insertions, 0 deletions
diff --git a/modules/misc_cleanup/title_cleanup.pl b/modules/misc_cleanup/title_cleanup.pl
new file mode 100755
index 0000000..0877175
--- /dev/null
+++ b/modules/misc_cleanup/title_cleanup.pl
@@ -0,0 +1,50 @@
+#!/usr/bin/perl
+
+use XML::LibXML;
+use utf8;
+use strict;
+
+## Obtain arguments
+if (scalar(@ARGV) < 1) {
+ print "\ntitle_cleanup.pl <osisfile> [-o outputfile]-- - fix output of usfm2osis.pl \n";
+ print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
+ print "- If no -o option is specified for the output filename, the default output file is: \n\t<osisfile>.new\n";
+ print "- The script will improve the encoding of title tag.\n";
+ exit (-1);
+}
+
+my $file = @ARGV[0];
+my $nextarg = 1;
+my $outputFilename;
+
+if (@ARGV[$nextarg] eq "-o") {
+ $outputFilename = "@ARGV[$nextarg+1]";
+ $nextarg += 2;
+}
+else {
+ $outputFilename = "$file.new";
+}
+open (OUTF, , ">", "$outputFilename") or die "Could not open file @ARGV[2] for writing.";
+
+## Initialise OSIS file
+
+my $parser = XML::LibXML->new();
+my $doc = $parser->parse_file($file);
+
+## Search for titles without 'type' info and give parental type. Also add 'subType=x-preverse'
+
+my @titles = $doc->getElementsByTagName('title');
+
+foreach (@titles) {
+
+ if (($_->parentNode->hasAttribute('type')) && !($_->hasAttribute('type'))) {
+ $_->setAttribute('type',$_->parentNode->getAttribute('type'));
+ }
+ if (!($_->hasAttribute('subType')) && ($_->getAttribute('type') eq 'section')) {
+ $_->setAttribute('subType','x-preverse');
+ }
+}
+
+my $whole_doc=$doc->toString('utf8');
+
+print OUTF $whole_doc;