#!/usr/bin/perl #This script calls wget, unzip, cp, and mv, so you'll need to have those binaries installed. $wiki = 0; use utf8; `wget -N http://www.iana.org/assignments/language-subtag-registry -owget.log`; `cp -f language-subtag-registry language-subtag-registry.txt`; `wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt -owget.log`; `wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt -owget.log`; `wget -N http://unicode.org/iso15924/iso15924.txt.zip -owget.log`; `wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt -owget.log`; `wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt -owget.log`; $ret = `unzip -o iso15924.txt.zip`; $ret =~ /(iso15924-utf.+)/; `mv -f \"$1\" \"iso15924-utf8.txt\"`; `rm iso15924.txt.zip`; `wget -N http://www.sil.org/iso639-3/download.asp -owget.log`; open DL, "download.asp"; while (
) {$downloadasp .= $_;} close (DL); `rm download.asp`; $downloadasp =~ //; `wget -N "http://www.sil.org/iso639-3/$1" -owget.log`; `cp -f \"$1\" \"iso-639-3.tab\"`; $downloadasp =~ //; `wget -N "http://www.sil.org/iso639-3/$1" -owget.log`; `cp -f \"$1\" \"iso-639-3_Name_Index.tab\"`; $downloadasp =~ //; `wget -N "http://www.sil.org/iso639-3/$1" -owget.log`; `cp -f \"$1\" \"iso-639-3_Retirements.tab\"`; `wget "http://www.crosswire.org/wiki/Special:Export/Localized_Language_Names" -Olocalized.txt -owget.log`; open LOC, "<:utf8", "localized.txt"; while () { push @content, $_; } open LOC, ">:utf8", "localized.txt"; foreach $line (@content) { if ($line =~ /^\|\s*([a-zA-Z\-]+)\s*\|\|\s*([^\|]+)\s*(\|\|\s*(.+))?/) { $line = "$1\t$2\t$4"; $line =~ s/\s*$//; $line =~ s/\s*\t\s*/\t/g; push @sortcontent, $line; } } if ($wiki == 1) { open WIKI, ">:utf8", "localized.wiki"; } @sortcontent = sort @sortcontent; foreach $line (@sortcontent) { $line =~ /^([a-zA-Z\-]+)\t([^\t]+)(\t(.+))?$/; # if ($4 !~ /dead/) { #turn this on to remove langs tagged as dead print LOC "$1\t$2\n"; # } if ($wiki == 1) { $line =~ s/\t/\t\|\|/g; print WIKI "\|\-\n\|$line\n"; } }