diff options
author | Chris Little <chrislit@crosswire.org> | 2009-11-14 22:52:59 +0000 |
---|---|---|
committer | Chris Little <chrislit@crosswire.org> | 2009-11-14 22:52:59 +0000 |
commit | 5f9ffc2b245e4da5b320fd268af46f43ccaba7b9 (patch) | |
tree | 2b86a064758a19d7dcde8a11e3c879fbe4a0e3ce | |
parent | 0d95a303e4f7a55225506c355a4cb4c8c00b11cd (diff) | |
download | sword-tools-5f9ffc2b245e4da5b320fd268af46f43ccaba7b9.tar.gz |
changed most calls to mv to cp so that the original files will be left in place in order to avoid unnecessary retrievals of whole files
added retrieval of data from our Wiki and output to localized.txt
corrected regexes after SIL's page update
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@249 07627401-56e2-0310-80f4-f8cd0041bdcd
-rw-r--r-- | locales/updateFiles.pl | 55 |
1 files changed, 37 insertions, 18 deletions
diff --git a/locales/updateFiles.pl b/locales/updateFiles.pl index 4e46329..cb56972 100644 --- a/locales/updateFiles.pl +++ b/locales/updateFiles.pl @@ -1,34 +1,53 @@ #!/usr/bin/perl
-#This script calls wget, unzip, and mv, so you'll need to have those binaries installed.
+#This script calls wget, unzip, cp, and mv, so you'll need to have those binaries installed.
-`wget -N http://www.iana.org/assignments/language-subtag-registry`;
-`mv language-subtag-registry language-subtag-registry.txt`;
-`wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt`;
-`wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt`;
-`wget -N http://unicode.org/iso15924/iso15924.txt.zip`;
-`wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt`;
-`wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt`;
+use utf8;
+
+`wget -N http://www.iana.org/assignments/language-subtag-registry -owget.log`;
+`cp -f language-subtag-registry language-subtag-registry.txt`;
+`wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt -owget.log`;
+`wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt -owget.log`;
+`wget -N http://unicode.org/iso15924/iso15924.txt.zip -owget.log`;
+`wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt -owget.log`;
+`wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt -owget.log`;
$ret = `unzip -o iso15924.txt.zip`;
$ret =~ /(iso15924-utf.+)/;
`mv -f \"$1\" \"iso15924-utf8.txt\"`;
`rm iso15924.txt.zip`;
-`wget -N http://www.sil.org/iso639-3/download.asp`;
+`wget -N http://www.sil.org/iso639-3/download.asp -owget.log`;
open DL, "download.asp";
while (<DL>) {$downloadasp .= $_;}
close (DL);
`rm download.asp`;
-$downloadasp =~ /Download ISO 639-3 code set <a HREF=\"([^\"]+)\">UTF-8/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3.tab\"`;
+$downloadasp =~ /<a HREF=\"(iso-639-3_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3.tab\"`;
+
+$downloadasp =~ /<a HREF=\"(iso-639-3_Name_Index_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3_Name_Index.tab\"`;
+
+$downloadasp =~ /<a HREF=\"(iso-639-3_Retirements_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3_Retirements.tab\"`;
+
+`wget "http://www.crosswire.org/wiki/Special:Export/Localized_Language_Names" -Olocalized.txt -owget.log`;
+
+open LOC, "<:utf8", "localized.txt";
+while (<LOC>) {
+ push @content, $_;
+}
-$downloadasp =~ /Download ISO 639-3 Language Names Index <a HREF=\"([^\"]+)\">UTF-8/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3_Name_Index.tab\"`;
+open LOC, ">:utf8", "localized.txt";
-$downloadasp =~ /Download <a HREF=\"([^\"]+)\">ISO 639-3 code retirement mappings/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3_Retirements.tab\"`;
+foreach $line (@content) {
+ if ($line =~ /^\|\s*([a-zA-Z\-]+)\s*\|\|\s*([^\|]+)\s+(\|\|\s*(.+))?/) {
+# if ($4 !~ /dead/) { #turn this on to remove langs tagged as dead
+ print LOC "$1\t$2\n";
+# }
+ }
+}
|