summaryrefslogtreecommitdiffstats
path: root/modules/misc_cleanup/numbers.pl
diff options
context:
space:
mode:
authorPeter von Kaehne <refdoc@gmx.net>2011-07-07 20:44:20 +0000
committerPeter von Kaehne <refdoc@gmx.net>2011-07-07 20:44:20 +0000
commita886a9646b152c9d4f7de88f19daa02ac0dbb017 (patch)
tree2661e8d5c3e65a6984c76f39c93e5bdd0944c3b8 /modules/misc_cleanup/numbers.pl
parent90f1e14bb7379c3c0d524a92facca1e84b1da7b9 (diff)
downloadsword-tools-a886a9646b152c9d4f7de88f19daa02ac0dbb017.tar.gz
some small utilities to clean up OSIS files
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@331 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/misc_cleanup/numbers.pl')
-rwxr-xr-xmodules/misc_cleanup/numbers.pl86
1 files changed, 86 insertions, 0 deletions
diff --git a/modules/misc_cleanup/numbers.pl b/modules/misc_cleanup/numbers.pl
new file mode 100755
index 0000000..567494f
--- /dev/null
+++ b/modules/misc_cleanup/numbers.pl
@@ -0,0 +1,86 @@
+#!/usr/bin/perl
+
+# numbers.pl translates Western numbers into Arabic-Indic numbers in the textnodes of XML files
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2002-2011 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of the CrossWire Bible Society nor the names of
+# its contributors may be used to endorse or promote products
+# derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support@crosswire.org
+
+#########################################################################
+use XML::LibXML;
+use utf8;
+use strict;
+
+## Obtain arguments
+if (scalar(@ARGV) < 1) {
+ print "\nnumbers.pl <osisfile> [-o outputfile]-- - fix Latin numbers in Arabic script text \n";
+ print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
+ print "- If no -o option is specified for the output filename, the default output file is: \n\t<osisfile>.new\n";
+ exit (-1);
+}
+
+my $file = @ARGV[0];
+my $nextarg = 1;
+my $outputFilename;
+
+if (@ARGV[$nextarg] eq "-o") {
+ $outputFilename = "@ARGV[$nextarg+1]";
+ open (OUTF, , ">", "$outputFilename") or die "Could not open file @ARGV[2] for writing.";
+ select(OUTF);
+}
+
+## Initialise OSIS file
+
+my $parser = XML::LibXML->new();
+my $doc = $parser->parse_file($file);
+
+&delatinize($doc);
+
+print $doc->toString();
+
+sub delatinize(){
+ my $node = @_[0];
+ if ($node->nodeType==XML_TEXT_NODE) {
+ my $text = $node->toString();
+ $text =~ tr/[0123456789]/[۰۱۲۳۴۵۶۷۸۹]/;
+ $node->replaceDataString($node->toString,$text);
+ }
+ else {
+ my @children = $node->childNodes();
+ foreach (@children) {
+ &delatinize($_);
+ }
+ }
+}
+ \ No newline at end of file