diff options
author | Peter von Kaehne <refdoc@gmx.net> | 2011-04-16 22:50:09 +0000 |
---|---|---|
committer | Peter von Kaehne <refdoc@gmx.net> | 2011-04-16 22:50:09 +0000 |
commit | 8829e802e6dc5c77d1b32d253365ea628a5ac5a1 (patch) | |
tree | 488faae6db43caeb1b7822221aae910576dbe5b8 /modules/misc_cleanup | |
parent | ad82002718a1300b639f31d90d6c10883f70d15b (diff) | |
download | sword-tools-8829e802e6dc5c77d1b32d253365ea628a5ac5a1.tar.gz |
improved title encoding
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@321 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/misc_cleanup')
-rwxr-xr-x | modules/misc_cleanup/title_cleanup.pl | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/modules/misc_cleanup/title_cleanup.pl b/modules/misc_cleanup/title_cleanup.pl new file mode 100755 index 0000000..0877175 --- /dev/null +++ b/modules/misc_cleanup/title_cleanup.pl @@ -0,0 +1,50 @@ +#!/usr/bin/perl + +use XML::LibXML; +use utf8; +use strict; + +## Obtain arguments +if (scalar(@ARGV) < 1) { + print "\ntitle_cleanup.pl <osisfile> [-o outputfile]-- - fix output of usfm2osis.pl \n"; + print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n"; + print "- If no -o option is specified for the output filename, the default output file is: \n\t<osisfile>.new\n"; + print "- The script will improve the encoding of title tag.\n"; + exit (-1); +} + +my $file = @ARGV[0]; +my $nextarg = 1; +my $outputFilename; + +if (@ARGV[$nextarg] eq "-o") { + $outputFilename = "@ARGV[$nextarg+1]"; + $nextarg += 2; +} +else { + $outputFilename = "$file.new"; +} +open (OUTF, , ">", "$outputFilename") or die "Could not open file @ARGV[2] for writing."; + +## Initialise OSIS file + +my $parser = XML::LibXML->new(); +my $doc = $parser->parse_file($file); + +## Search for titles without 'type' info and give parental type. Also add 'subType=x-preverse' + +my @titles = $doc->getElementsByTagName('title'); + +foreach (@titles) { + + if (($_->parentNode->hasAttribute('type')) && !($_->hasAttribute('type'))) { + $_->setAttribute('type',$_->parentNode->getAttribute('type')); + } + if (!($_->hasAttribute('subType')) && ($_->getAttribute('type') eq 'section')) { + $_->setAttribute('subType','x-preverse'); + } +} + +my $whole_doc=$doc->toString('utf8'); + +print OUTF $whole_doc; |