#!/usr/bin/perl # This file is used to create a Java property file from SIL's ISO639-3 files. # That file changes frequently both in content and layout. # Adjust this program as needed. # # The files are currently downloaded from: # http://www.sil.org/iso639-3/iso-639-3_20090210.tab # http://www.sil.org/iso639-3/iso-639-3_Name_Index_20090210.tab # http://www.sil.org/iso639-3/iso-639-3_Retirements_20090126.tab # # Run the program as: # makeISO639.pl > iso639.txt # # Sort the file if desired with: # makeISO639.pl | sort -t = -k 2 > iso639.txt # # Convert it from UTF-8 to Java's ASCII representation with: # native2ascii -encoding utf-8 iso639.txt > iso639.properties use strict; use Unicode::Normalize; binmode(STDOUT, ":utf8"); my $nameIndex = "iso-639-3_Name_Index_20090210.tab"; my $langCodes = "iso-639-3_20090210.tab"; my $deadCodes = "iso-639-3_Retirements_20090126.tab"; my %names = (); open(my $nameIndexFile, "<:utf8", $nameIndex); # skip the first line my $firstLine = <$nameIndexFile>; while (<$nameIndexFile>) { # chomp ms-dos line endings s/\r//o; chomp(); # Skip blank lines next if (/^$/o); # ensure it is normalized to NFC $_ = NFC($_); my @line = split(/\t/o, $_); $names{$line[0],$line[1]} = $line[2]; } open(my $langFile, "<:utf8", $langCodes); # skip the first line $firstLine = <$langFile>; while (<$langFile>) { # chomp ms-dos line endings s/\r//o; chomp(); # Skip blank lines next if (/^$/o); # ensure it is normalized to NFC $_ = NFC($_); my @line = split(/\t/o, $_); # exclude extinct languages next if ($line[5] eq 'E'); my $name = $names{$line[0],$line[6]}; print "$line[3]=$name\n" if ($line[3]); print "$line[0]=$name\n"; } # The dead codes file is iso-8859-1. This may change at some date. open(my $deadFile, "<:encoding(iso-8859-1)", $deadCodes); # skip the first line $firstLine = <$deadFile>; while (<$deadFile>) { # chomp ms-dos line endings s/\r//o; chomp(); # Skip blank lines next if (/^$/o); # ensure it is normalized to NFC $_ = NFC($_); my @line = split(/\t/o, $_); print "$line[0]=$line[1]\n"; }