#! /usr/bin/env perl # regenerate_documentlanguages-loc.pl: download the ISO 639-2 files language # and ISO 3166-1 alpha-2 codes and regenerate Texinfo/Documentlanguages.pm # list of languages and regions # # Copyright 2022-2023 Free Software Foundation, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without any warranty. # # Original author: Patrice Dumas # # Calling that script for each release could be a good idea. use strict; use List::Util qw(first); # not in core perl use Text::CSV; # emulates -w BEGIN { $^W = 1; } my $dir = 'maintain'; system ("cd $dir && wget -N https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt"); # the ISO 3166-1 alpha-2 codes are not easily accessible from the ISO website, there is # an interface not a raw download (seems incredible, but true...). # Use the country code project list instead system ("cd $dir && wget -N https://raw.githubusercontent.com/datasets/country-codes/master/data/country-codes.csv"); open (TXT, "$dir/ISO-639-2_utf-8.txt") or die "Open $dir/ISO-639-2_utf-8.txt: $!\n"; binmode(TXT, ":utf8"); my @entries; while () { my @entry = split /\|/; if ($entry[2] ne '') { push @entries, $entry[2]; } } my $fh; open ($fh, "$dir/country-codes.csv") or die "Open $dir/country-codes.csv: $!\n"; binmode($fh, ":utf8"); my $csv = Text::CSV->new(); my @region_codes; my $row = $csv->getline($fh); my $code_header_index = first { $row->[$_] eq 'ISO3166-1-Alpha-2' } 0..scalar(@$row); if (not defined($code_header_index)) { print STDERR "regions_header: ".join('|', @$row)."\n"; die; } my @regions; while (my $row = $csv->getline ($fh)) { if ($row->[$code_header_index] ne '') { push @regions, $row->[$code_header_index]; } } open (OUT, ">Texinfo/Documentlanguages.pm") or die "Open Texinfo/Documentlanguages.pm: $!\n"; print OUT "# This file was automatically generated from $0\n\n"; print OUT "package Texinfo::Documentlanguages;\n\n"; print OUT 'our %language_codes = ('."\n"; foreach my $entry (sort @entries) { print OUT "'$entry' => 1,\n"; } print OUT ");\n\n"; print OUT 'our %region_codes = ('."\n"; foreach my $region (sort @regions) { print OUT "'$region' => 1,\n"; } print OUT ");\n\n1;\n";