blob: f02751e48c819e533f7961fe505c2d1337c7eed5 (
plain) (
tree)
|
|
#!/usr/bin/perl
# This file is used to create a Java property file from SIL's ISO639-3 files.
# That file changes frequently both in content and layout.
# Adjust this program as needed.
#
# The files are currently downloaded from:
# http://www.sil.org/iso639-3/iso-639-3_20090210.tab
# http://www.sil.org/iso639-3/iso-639-3_Name_Index_20090210.tab
# http://www.sil.org/iso639-3/iso-639-3_Retirements_20090126.tab
#
# Run the program as:
# makeISO639.pl > iso639.txt
#
# Sort the file if desired with:
# makeISO639.pl | sort -t = -k 2 > iso639.txt
#
# Convert it from UTF-8 to Java's ASCII representation with:
# native2ascii -encoding utf-8 iso639.txt > iso639.properties
use strict;
use Unicode::Normalize;
binmode(STDOUT, ":utf8");
my $nameIndex = "iso-639-3_Name_Index_20090210.tab";
my $langCodes = "iso-639-3_20090210.tab";
my $deadCodes = "iso-639-3_Retirements_20090126.tab";
my %names = ();
open(my $nameIndexFile, "<:utf8", $nameIndex);
# skip the first line
my $firstLine = <$nameIndexFile>;
while (<$nameIndexFile>)
{
# chomp ms-dos line endings
s/\r//o;
chomp();
# Skip blank lines
next if (/^$/o);
# ensure it is normalized to NFC
$_ = NFC($_);
my @line = split(/\t/o, $_);
$names{$line[0],$line[1]} = $line[2];
}
open(my $langFile, "<:utf8", $langCodes);
# skip the first line
$firstLine = <$langFile>;
while (<$langFile>)
{
# chomp ms-dos line endings
s/\r//o;
chomp();
# Skip blank lines
next if (/^$/o);
# ensure it is normalized to NFC
$_ = NFC($_);
my @line = split(/\t/o, $_);
# exclude extinct languages
next if ($line[5] eq 'E');
my $name = $names{$line[0],$line[6]};
print "$line[3]=$name\n" if ($line[3]);
print "$line[0]=$name\n";
}
# The dead codes file is iso-8859-1. This may change at some date.
open(my $deadFile, "<:encoding(iso-8859-1)", $deadCodes);
# skip the first line
$firstLine = <$deadFile>;
while (<$deadFile>)
{
# chomp ms-dos line endings
s/\r//o;
chomp();
# Skip blank lines
next if (/^$/o);
# ensure it is normalized to NFC
$_ = NFC($_);
my @line = split(/\t/o, $_);
print "$line[0]=$line[1]\n";
}
|