summaryrefslogtreecommitdiffstats
path: root/locales/updateFiles.pl
blob: 68c1b7b7956fa490033d53ac29d335cc6d861382 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/perl


#This script calls wget, unzip, cp, and mv, so you'll need to have those binaries installed.


$wiki = 0;

use utf8;

`wget -N  http://www.iana.org/assignments/language-subtag-registry -owget.log`;
`cp -f language-subtag-registry language-subtag-registry.txt`;
`wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt -owget.log`;
`wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt -owget.log`;
`wget -N http://unicode.org/iso15924/iso15924.txt.zip -owget.log`;
`wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt -owget.log`;
`wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt -owget.log`;

$ret = `unzip -o iso15924.txt.zip`;
$ret =~ /(iso15924-utf.+)/;
`mv -f \"$1\" \"iso15924-utf8.txt\"`;
`rm iso15924.txt.zip`;

`wget -N http://www.sil.org/iso639-3/download.asp -owget.log`;
open DL, "download.asp";
while (<DL>) {$downloadasp .= $_;}
close (DL);
`rm download.asp`;

$downloadasp =~ /<a HREF=\"(iso-639-3_[0-9]+\.tab)\">/;
`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
`cp -f \"$1\" \"iso-639-3.tab\"`;

$downloadasp =~ /<a HREF=\"(iso-639-3_Name_Index_[0-9]+\.tab)\">/;
`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
`cp -f \"$1\" \"iso-639-3_Name_Index.tab\"`;

$downloadasp =~ /<a HREF=\"(iso-639-3_Retirements_[0-9]+\.tab)\">/;
`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
`cp -f \"$1\" \"iso-639-3_Retirements.tab\"`;

`wget "http://www.crosswire.org/wiki/Special:Export/Localized_Language_Names" -Olocalized.txt -owget.log`;

open LOC, "<:utf8", "localized.txt";
while (<LOC>) {
    push @content, $_;
}

open LOC, ">:utf8", "localized.txt";

foreach $line (@content) {
    if ($line =~ /^\|\s*([a-zA-Z\-]+)\s*\|\|\s*([^\|]+)\s*(\|\|\s*(.+))?/) {
	$line = "$1\t$2\t$4";
	$line =~ s/\s*$//;
	$line =~ s/\s*\t\s*/\t/g;
	push @sortcontent, $line;
    }
}

if ($wiki == 1) {
    open WIKI, ">:utf8", "localized.wiki";
}

@sortcontent = sort @sortcontent;

foreach $line (@sortcontent) {
    $line =~ /^([a-zA-Z\-]+)\t([^\t]+)(\t(.+))?$/;
#    if ($4 !~ /dead/) { #turn this on to remove langs tagged as dead

    print LOC "$1\t$2\n"; 
#    }

    if ($wiki == 1) {
	$line =~ s/\t/\t\|\|/g;
	print WIKI "\|\-\n\|$line\n";
    }
}