blob: 68c1b7b7956fa490033d53ac29d335cc6d861382 (
plain) (
tree)
|
|
#!/usr/bin/perl
#This script calls wget, unzip, cp, and mv, so you'll need to have those binaries installed.
$wiki = 0;
use utf8;
`wget -N http://www.iana.org/assignments/language-subtag-registry -owget.log`;
`cp -f language-subtag-registry language-subtag-registry.txt`;
`wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt -owget.log`;
`wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt -owget.log`;
`wget -N http://unicode.org/iso15924/iso15924.txt.zip -owget.log`;
`wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt -owget.log`;
`wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt -owget.log`;
$ret = `unzip -o iso15924.txt.zip`;
$ret =~ /(iso15924-utf.+)/;
`mv -f \"$1\" \"iso15924-utf8.txt\"`;
`rm iso15924.txt.zip`;
`wget -N http://www.sil.org/iso639-3/download.asp -owget.log`;
open DL, "download.asp";
while (<DL>) {$downloadasp .= $_;}
close (DL);
`rm download.asp`;
$downloadasp =~ /<a HREF=\"(iso-639-3_[0-9]+\.tab)\">/;
`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
`cp -f \"$1\" \"iso-639-3.tab\"`;
$downloadasp =~ /<a HREF=\"(iso-639-3_Name_Index_[0-9]+\.tab)\">/;
`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
`cp -f \"$1\" \"iso-639-3_Name_Index.tab\"`;
$downloadasp =~ /<a HREF=\"(iso-639-3_Retirements_[0-9]+\.tab)\">/;
`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
`cp -f \"$1\" \"iso-639-3_Retirements.tab\"`;
`wget "http://www.crosswire.org/wiki/Special:Export/Localized_Language_Names" -Olocalized.txt -owget.log`;
open LOC, "<:utf8", "localized.txt";
while (<LOC>) {
push @content, $_;
}
open LOC, ">:utf8", "localized.txt";
foreach $line (@content) {
if ($line =~ /^\|\s*([a-zA-Z\-]+)\s*\|\|\s*([^\|]+)\s*(\|\|\s*(.+))?/) {
$line = "$1\t$2\t$4";
$line =~ s/\s*$//;
$line =~ s/\s*\t\s*/\t/g;
push @sortcontent, $line;
}
}
if ($wiki == 1) {
open WIKI, ">:utf8", "localized.wiki";
}
@sortcontent = sort @sortcontent;
foreach $line (@sortcontent) {
$line =~ /^([a-zA-Z\-]+)\t([^\t]+)(\t(.+))?$/;
# if ($4 !~ /dead/) { #turn this on to remove langs tagged as dead
print LOC "$1\t$2\n";
# }
if ($wiki == 1) {
$line =~ s/\t/\t\|\|/g;
print WIKI "\|\-\n\|$line\n";
}
}
|