summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Little <chrislit@crosswire.org>2009-11-14 22:52:59 +0000
committerChris Little <chrislit@crosswire.org>2009-11-14 22:52:59 +0000
commit5f9ffc2b245e4da5b320fd268af46f43ccaba7b9 (patch)
tree2b86a064758a19d7dcde8a11e3c879fbe4a0e3ce
parent0d95a303e4f7a55225506c355a4cb4c8c00b11cd (diff)
downloadsword-tools-5f9ffc2b245e4da5b320fd268af46f43ccaba7b9.tar.gz
changed most calls to mv to cp so that the original files will be left in place in order to avoid unnecessary retrievals of whole files
added retrieval of data from our Wiki and output to localized.txt corrected regexes after SIL's page update git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@249 07627401-56e2-0310-80f4-f8cd0041bdcd
-rw-r--r--locales/updateFiles.pl55
1 files changed, 37 insertions, 18 deletions
diff --git a/locales/updateFiles.pl b/locales/updateFiles.pl
index 4e46329..cb56972 100644
--- a/locales/updateFiles.pl
+++ b/locales/updateFiles.pl
@@ -1,34 +1,53 @@
#!/usr/bin/perl
-#This script calls wget, unzip, and mv, so you'll need to have those binaries installed.
+#This script calls wget, unzip, cp, and mv, so you'll need to have those binaries installed.
-`wget -N http://www.iana.org/assignments/language-subtag-registry`;
-`mv language-subtag-registry language-subtag-registry.txt`;
-`wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt`;
-`wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt`;
-`wget -N http://unicode.org/iso15924/iso15924.txt.zip`;
-`wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt`;
-`wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt`;
+use utf8;
+
+`wget -N http://www.iana.org/assignments/language-subtag-registry -owget.log`;
+`cp -f language-subtag-registry language-subtag-registry.txt`;
+`wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt -owget.log`;
+`wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt -owget.log`;
+`wget -N http://unicode.org/iso15924/iso15924.txt.zip -owget.log`;
+`wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt -owget.log`;
+`wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt -owget.log`;
$ret = `unzip -o iso15924.txt.zip`;
$ret =~ /(iso15924-utf.+)/;
`mv -f \"$1\" \"iso15924-utf8.txt\"`;
`rm iso15924.txt.zip`;
-`wget -N http://www.sil.org/iso639-3/download.asp`;
+`wget -N http://www.sil.org/iso639-3/download.asp -owget.log`;
open DL, "download.asp";
while (<DL>) {$downloadasp .= $_;}
close (DL);
`rm download.asp`;
-$downloadasp =~ /Download ISO 639-3 code set <a HREF=\"([^\"]+)\">UTF-8/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3.tab\"`;
+$downloadasp =~ /<a HREF=\"(iso-639-3_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3.tab\"`;
+
+$downloadasp =~ /<a HREF=\"(iso-639-3_Name_Index_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3_Name_Index.tab\"`;
+
+$downloadasp =~ /<a HREF=\"(iso-639-3_Retirements_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3_Retirements.tab\"`;
+
+`wget "http://www.crosswire.org/wiki/Special:Export/Localized_Language_Names" -Olocalized.txt -owget.log`;
+
+open LOC, "<:utf8", "localized.txt";
+while (<LOC>) {
+ push @content, $_;
+}
-$downloadasp =~ /Download ISO 639-3 Language Names Index <a HREF=\"([^\"]+)\">UTF-8/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3_Name_Index.tab\"`;
+open LOC, ">:utf8", "localized.txt";
-$downloadasp =~ /Download <a HREF=\"([^\"]+)\">ISO 639-3 code retirement mappings/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3_Retirements.tab\"`;
+foreach $line (@content) {
+ if ($line =~ /^\|\s*([a-zA-Z\-]+)\s*\|\|\s*([^\|]+)\s+(\|\|\s*(.+))?/) {
+# if ($4 !~ /dead/) { #turn this on to remove langs tagged as dead
+ print LOC "$1\t$2\n";
+# }
+ }
+}