#! /usr/bin/env perl # copy_change_file_name_encoding.pl: copy file changing # the encoding of the file name and substituting to have accented letters. # # Copyright 2022-2023 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, # or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Original author: Patrice Dumas use strict; use utf8; use File::Copy; use File::Basename; use File::Spec; use Encode qw(from_to); use Getopt::Long qw(GetOptions); my $from = 'US-ASCII'; my $to = 'ISO-8859-1'; my $result_options = Getopt::Long::GetOptions ( 'from|f=s' => \$from, 'to|t=s' => \$to, ); exit 1 if (!$result_options); my ($src_path, $dest_dir) = @ARGV; if (not defined($src_path)) { die "Need source file\n"; } my $dest_path; if (defined($dest_dir)) { my ($file_name, $dir, $suffix) = fileparse($src_path); $dest_path = File::Spec->catfile($dest_dir, $file_name); } else { $dest_path = $src_path; } my $converted_dest_path = Encode::decode($from, $dest_path); # not that converted_dest_path may not be in UTF-8, depends what perl internally # does. # The character which is used is common to many 8bit codepages. $converted_dest_path =~ s/latin/lat§n/; my $dest_path_in_utf8 = Encode::encode('UTF-8', $converted_dest_path); # use another variable, since from_to argument is converted in-place my $dest_path_in_to_encoding = $dest_path_in_utf8; # NOTE on Windows, when Perl uses the char API and not wchar_t API, # the file name written to the filesystem may not correspond to î, as # it depends on the codepage. If the codepage is not Latin1, Windows will # consider that \xEE, output by Perl for î if $to is ISO-8859-1, is the # \xEE character in the current codepage, and convert to UTF-16 to store on # the filesystem. my $succeeded = from_to($dest_path_in_to_encoding, 'UTF-8', $to); if (not defined($succeeded)) { warn "could not decode, substitute and recode $src_path\n"; exit(1); } my $copy_succeeded = copy($src_path, $dest_path_in_to_encoding); if (not $copy_succeeded) { warn "could not copy $src_path: $!\n"; exit(1); }