diff options
author | Peter von Kaehne <refdoc@gmx.net> | 2011-07-07 20:44:20 +0000 |
---|---|---|
committer | Peter von Kaehne <refdoc@gmx.net> | 2011-07-07 20:44:20 +0000 |
commit | a886a9646b152c9d4f7de88f19daa02ac0dbb017 (patch) | |
tree | 2661e8d5c3e65a6984c76f39c93e5bdd0944c3b8 /modules/misc_cleanup/numbers.pl | |
parent | 90f1e14bb7379c3c0d524a92facca1e84b1da7b9 (diff) | |
download | sword-tools-a886a9646b152c9d4f7de88f19daa02ac0dbb017.tar.gz |
some small utilities to clean up OSIS files
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@331 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/misc_cleanup/numbers.pl')
-rwxr-xr-x | modules/misc_cleanup/numbers.pl | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/modules/misc_cleanup/numbers.pl b/modules/misc_cleanup/numbers.pl new file mode 100755 index 0000000..567494f --- /dev/null +++ b/modules/misc_cleanup/numbers.pl @@ -0,0 +1,86 @@ +#!/usr/bin/perl + +# numbers.pl translates Western numbers into Arabic-Indic numbers in the textnodes of XML files + +## Licensed under the standard BSD license: + +# Copyright (c) 2002-2011 CrossWire Bible Society <http://www.crosswire.org/> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of the CrossWire Bible Society nor the names of +# its contributors may be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +## For general inquiries, comments, suggestions, bug reports, etc. email: +## sword-support@crosswire.org + +######################################################################### +use XML::LibXML; +use utf8; +use strict; + +## Obtain arguments +if (scalar(@ARGV) < 1) { + print "\nnumbers.pl <osisfile> [-o outputfile]-- - fix Latin numbers in Arabic script text \n"; + print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n"; + print "- If no -o option is specified for the output filename, the default output file is: \n\t<osisfile>.new\n"; + exit (-1); +} + +my $file = @ARGV[0]; +my $nextarg = 1; +my $outputFilename; + +if (@ARGV[$nextarg] eq "-o") { + $outputFilename = "@ARGV[$nextarg+1]"; + open (OUTF, , ">", "$outputFilename") or die "Could not open file @ARGV[2] for writing."; + select(OUTF); +} + +## Initialise OSIS file + +my $parser = XML::LibXML->new(); +my $doc = $parser->parse_file($file); + +&delatinize($doc); + +print $doc->toString(); + +sub delatinize(){ + my $node = @_[0]; + if ($node->nodeType==XML_TEXT_NODE) { + my $text = $node->toString(); + $text =~ tr/[0123456789]/[۰۱۲۳۴۵۶۷۸۹]/; + $node->replaceDataString($node->toString,$text); + } + else { + my @children = $node->childNodes(); + foreach (@children) { + &delatinize($_); + } + } +} +
\ No newline at end of file |