summaryrefslogblamecommitdiffstats
path: root/locales/makeCodeList.pl
blob: 4f037c45c3e0d003ff8c87ac264c6ec447d5ad69 (plain) (tree)





















                                                                              
                     
 


                                             
                
 




























                                               

                                     




                                                
                                          














                                         
#!/usr/bin/perl

# This file is used to create a Java property file from SIL's ISO639-3 files.

# That file changes frequently both in content and layout.

# Adjust this program as needed.

#

# The files are currently downloaded from:

#       http://www.sil.org/iso639-3/iso-639-3_20090210.tab

#       http://www.sil.org/iso639-3/iso-639-3_Name_Index_20090210.tab

#       http://www.sil.org/iso639-3/iso-639-3_Retirements_20090126.tab

#

# Run the program as:

#       makeISO639.pl > iso639.txt

#

# Sort the file if desired with:

#       makeISO639.pl | sort -t = -k 2 > iso639.txt

#

# Convert it from UTF-8 to Java's ASCII representation with:

#       native2ascii -encoding utf-8 iso639.txt > iso639.properties


use strict;
use Unicode::Normalize;
binmode(STDOUT, ":utf8");
print "[locales]\n";

my $nameIndex = "iso-639-3_Name_Index.tab";
my $langCodes = "iso-639-3.tab";
my $deadCodes = "iso-639-3_Retirements.tab";
my %names = ();

open(my $nameIndexFile, "<:utf8", $nameIndex);
# skip the first line

my $firstLine = <$nameIndexFile>;
while (<$nameIndexFile>)
{
        # chomp ms-dos line endings

        s/\r//o;
        chomp();
        # Skip blank lines

        next if (/^$/o);
        # ensure it is normalized to NFC

        $_ = NFC($_);
        my @line = split(/\t/o, $_);
        $names{$line[0],$line[1]} = $line[2];
}

open(my $langFile, "<:utf8", $langCodes);
# skip the first line

$firstLine = <$langFile>;
while (<$langFile>)
{
        # chomp ms-dos line endings

        s/\r//o;
        chomp();
        # Skip blank lines

        next if (/^$/o);
        # ensure it is normalized to NFC

        $_ = NFC($_);
        my @line = split(/\t/o, $_);
#        # exclude extinct languages

#        next if ($line[5] eq 'E');

        my $name = $names{$line[0],$line[6]};
        print "$line[3]=$name\n" if ($line[3]);
        print "$line[0]=$name\n";
}

open(my $deadFile, "<:utf8", $deadCodes);
# skip the first line

$firstLine = <$deadFile>;
while (<$deadFile>)
{
        # chomp ms-dos line endings

        s/\r//o;
        chomp();
        # Skip blank lines

        next if (/^$/o);
        # ensure it is normalized to NFC

        $_ = NFC($_);
        my @line = split(/\t/o, $_);
        print "$line[0]=$line[1]\n";
}