#!/usr/bin/perl
# From: ilya@math.ohio-state.edu (Ilya Zakharevich)
# So if you do not want to
# descend into directories, just do
# 	rnm '$_ = lc'
# (with 5.004 or close, with perls which are older than a week you need
# 	rnm '$_ = lc $_'
# ).
# 
# I wrote a similarly nice tool which does a similar thing to
# "find". Use it like this:
# 	pfind . '$_ = lc'
# 
# To lowercase only the files which names contain `blah', do
# 	pfind . /blah/ '$_ = lc'
# 
# To lowercase only the files which contain `blah', do
# 	pfind . "=~ /blah/" '$_ = lc'

# 1.5:	correctly open files with leading/trailing spaces in names.
# 1.6:	handle substitutions, -bak;
#	-nosubdir; 
#	-debug exits;
#       -nosubdir bug fixed
#       inserts a stat($_) iff any filter matching /^!?-(M|A|C)/ is used
#       this allows for    -M _ < 2
#       inserts  `if test'  instead of  `unless ! test' if  !test is given
# 1.8:	Preserves attributes of the file if s///
#	Optimizations of -f to -f _ performed
#	Pod added
# 1.11: prt somehow was not printing the name of the file, but line as well;
#	prt_line added which prints the line as well;
#	-nosubdir was ignored;
# 1.12: Added 'du' convenience function
#	Do not stat() the file at all.  Force File::Find to stat() if needed.
#	The above trick did not work.  Backing up to the older way...
#	Doc updates.
# 1.13: $dir =~ /blah/ is considered as a rule too;
#	rename is more verbose now.

use strict;
use vars qw($Line_Found $rcs $debug $prune $bak 
	    $StartDir $binary $haveS $NeedsStat);
undef $Line_Found;

$rcs = ' $Id: pfind.pl,v 1.14 1998/02/04 01:24:23 ilya Exp ilya $ ' ;

sub usage {
  if ($] < 5.00456 or system 'perldoc', '-F', "$0" ) {
    warn "    perldoc -F $0 could not find us!  Short version follows:\n";
    my $version = (split ' ', $rcs)[2];
    
    print <<EOU;
usage: $0 [-debug] [-nosubdir] [-bin] [-bak=suffix] [--] startdir 
          rule1 rule2 ...
			>>>> Version: $version. <<<< 
	Rules are perl statements to execute. Statements starting with
`-', `/', or `!' are considered filters, a file will be discarded unless
the statement returns true. The rest is executed "as is".
	If only filters are given the default action is `prt' (see below).
	Variables \$_, \$name, \$dir contain the file name, full file name
and the name of the directory. Statements are executed in the directory
of the each processed file.
	If file is not discarded, and \$_ is changed after the perl
statements are executed, the file is renamed to the new value of \$_.
	Convenience function `prt' prints \\n-terminated name of the file.

	If a rule starts with `=~', it is considered as a filter to
match contents of the file. The rest is the regular expression to
match. If regexp contains modifiers `s' or `m', it is matched against
the file as a whole, otherwise it is matched line-by-line. 
	If the regexp-filter fails, file processing stops. Otherwise 
whatever was matched against is available in the variable \$line unless
modifiers `s' or `m' are given.
	If a rule starts with `=~s' it is considered as a filter to
match and MODIFY the contents of the file. Modifiers 's' and 'm' are applicable
as above. To make several modifications at a time, use e.g.
=~ s/abc/ABC/ &&& s/def/DEF/ (modifiers should be the same, though).
In case you give the modifiers 's' or 'm' you should think about using
the modifier 'g', as well.

Option -debug just shows and evals the expression - IT DOES NO PROCESSING !
With option -nosubdir does not descend into subdirectories.
Without option -bin binary files are not processed for substrings,
and files are opened in text mode.
EOU
  }
  ;                             # '; # for Emacs
  exit;
}

sub prt { 
  print "$File::Find::name\n";
}

sub prt_line { 
  print "$File::Find::name";
  print " : $Line_Found"  if  defined($Line_Found);
  print "\n";
}

my $Total_Size;
my $Num_Files;

sub du {
  $Num_Files++;
  $Total_Size += ($NeedsStat ? -s _ : -s $File::Find::name);
}

sub do_rename {
  my ($from, $to) = @_;
  print STDERR "rename `$File::Find::dir/$from'\t===> `$File::Find::dir/$to'\n";
  rename $from, $to
    or warn("Cannot rename: $!\n"), return 0;
}

sub setup_bak {
  my ($file,$bak) = @_;
  return unless -e "$file$bak";
  my ($prefix,$count,$i) = ("", $bak, 0);
  unless ($bak =~ /^\w+\d+$/) {
    ($prefix,$count) = ($bak, 0);
  }
  $count++, $i++ while $i < 1000 and -e "$file$prefix$count";
  die "Cannot find backup name for '$file'\n" if $i >= 1000;
  die "Exiting...\n" unless do_rename "$file$bak", "$file$prefix$count";
}

sub wrapper {
  my ($regexp,$regexp_find) = shift;
  my $SubsIt = $regexp =~ /^\s*s/;
  my $Single = $regexp !~ /\w*[sm]\w*\s*$/;
  if ( $SubsIt ) { 
    $haveS++;
    my @SubsList= split /&&&/,$regexp;
    if ( @SubsList > 1 ) { 
      $regexp_find= join '||',@SubsList;
      $regexp= join ',',@SubsList;
    } else {
      $regexp_find= $regexp;
    }
  }
  my $Code;
  if ($binary) {
  $Code = <<'EOS';
return unless -f _ and -r _;
EOS
  } else {
  $Code = <<'EOS';
return unless -f _ and -r _ and -T _;
EOS
  }
  $Code .= <<'EOS' if $SubsIt ;
  print (STDERR "$_ is not writable\n"), return unless -w _;
EOS
  $Code .= <<'EOS';
  { my ($FileName,$found)= ($_,0);
    $FileName =~ s|^(\s)|./$1|; 
    my $openfile = $FileName;
    $openfile =~ s/(\s)$/$1\0/;
    open(FILE, "< $openfile") or die "cannot open '$FileName': $!";
    local $_;
EOS
  $Code .= <<'EOS' if $binary;
    binmode FILE;
EOS
  if ( $SubsIt ) { 
    my $bmode = '';
    $bmode = <<'EOS' if $binary;
      binmode FILE;
      binmode OUTPUT;
EOS
    my $move_copy = 'rename $FileName, "$FileName$bak"';
    my $op = 'rename';
    my $open_write = 'open(OUTPUT, "> $openfile")';
    my $set_mode = 'chmod $Mode, $FileName';
    if (defined &File::Copy::syscopy) { # There is more than $Mode
      $move_copy = 'File::Copy::syscopy $FileName, "$FileName$bak"';
      $op = 'copy';
      $open_write = 'open(OUTPUT, "+< $openfile") and truncate OUTPUT, 0';
      $set_mode = '';
    }
    if ( $Single ) {		# Single-line substitution
      $Code .= <<EOSS;
    1 while defined (\$_ = <FILE>) and not (\$found = $regexp_find);
    if ( \$found ) { 
      close FILE or die "cannot close '\$FileName' for write: \$!"; 
      setup_bak(\$FileName, \$bak);
      $move_copy
	or die "cannot $op '\$FileName' to '\$FileName\$bak': \$!";
      open (FILE, "< \$FileName\$bak");
      $open_write
	or die "cannot open '\$FileName' for write: \$!";
$bmode
      while (<FILE>) { $regexp; print OUTPUT; }
      close OUTPUT or die "cannot close \$FileName: \$!";
      $set_mode;
      \$FileName .= \$bak;
    }
EOSS
    } else {			# Multi-line substitution
      $Code .= <<EOSB;
    local \$/ = undef;
    \$_ = <FILE>;
    \$found = $regexp_find;
    if ( \$found ) { 
      close FILE or die "cannot close '\$FileName' for write: \$!"; 
      setup_bak(\$FileName, \$bak);
      $move_copy
	or die "cannot rename '\$FileName': \$!";
      open (FILE, "< \$FileName\$bak");
      open(OUTPUT, "> \$openfile") 
	or die "cannot open '\$FileName' for write: \$!";
$bmode
      \$_ = <FILE>; $regexp; print OUTPUT;
      close OUTPUT or die "cannot close '\$FileName' for write: \$!";
      $set_mode;
      \$FileName .= \$bak;
    }
EOSB
    }
  }
  else {
    if ( $Single ) {		# Single line match
      $Code .= <<EOSS;
    while (defined (\$line = <FILE>)) { \$found = 1, last if \$line =~ $regexp; }
EOSS
    } else { 
      $Code .= <<EOSB;		# Multiline match
    local \$/ = undef;
    \$_ = <FILE>;
    \$found = $regexp;
EOSB
    }
  }

  if ( $Single )
  { $Code .= <<'EOSS';
    $Line_Found = $line if $found;
EOSS
  }
  
  $Code .= <<'EOS';
    close FILE or die "cannot close '$FileName': $!";
    return unless $found;
  }
EOS
}

$StartDir = shift;
usage unless @ARGV;
$bak = ".bak";
while ($StartDir =~ /^-/) {
  $debug = 1 if $StartDir eq '-debug';
  $prune = 1 if $StartDir eq '-nosubdir';
  $binary = 1 if $StartDir eq '-bin';
  $bak =  $1 if $StartDir =~ /^-bak=(.*)/;
  $StartDir = shift;
  $StartDir = shift, last if $StartDir eq '--';
}
#die "Starting directory `$StartDir' not found.\n" unless -d $StartDir;
while ( -l $StartDir )
{ my $Link = readlink $StartDir;
  $StartDir= ( $StartDir =~ m|(.*)/[^/]+$|s ? $1 : '.');
  $StartDir= ( substr($Link,0,1) eq '/' ? $Link : $StartDir .'/'.$Link );
}
  
die "Starting directory `$StartDir' not found.\n" unless -d $StartDir;

my $NF = 0;  # no of filter arguments
map { $NF++ if  m:^([-/!]):  || m/^=~/ || m/^\$(name|dir)\s*[=!]~/ } @ARGV;
# Convert -f to -f _, -M < 3 to -M _ < 3
@ARGV = map { s/^\s*((!\s*)?-[a-su-zA-Z])\s*([!=<>]|$)/$1 _ $3/; $_ } @ARGV;
$NeedsStat = 0;
map { $NeedsStat++  if m/^\s*(!\s*)*-[a-su-zA-Z]\s+_\b/ } @ARGV; # skip -t
map { $NeedsStat++  if m/^\s*du\s*$/ } @ARGV; # du
my @rows = map { s:^(([-/]|\$(name|dir)\s*[=!]~).*):return unless\tdo {\t$1\t\t}:s; s:^!(.*):return if\tdo {\t$1\t\t}:s ; $_ } @ARGV;
@rows = map { s! ^ =~ (.*) ! $NeedsStat++; wrapper($1) !xes ; $_ } @rows;

my $text = join "  ;\n  ", @rows;
$text .= "  ;\n  prt"  if $NF == @ARGV;

$text = "my (\$dev,\$inode,\$Mode) = CORE::stat(\$_);\n  " . $text  
  if  $haveS;
#$text = "my (\$dev,\$inode,\$Mode) = CORE::stat(_);\n" . $text  
#  if  $haveS;

$text = "CORE::stat(\$_);\n  " . $text  
  if  $NeedsStat and not $haveS;

# Disable optimizations of File::Find so that _ is always available
# $File::Find::dont_use_nlink = 1 if $NeedsStat or $haveS;	# does not work

@ARGV = ();

my $setup = <<'EOS';
  my $name = $File::Find::name;
  my $dir  = $File::Find::dir;
  my $was = $_;
  my ($line, $found);
EOS

my $PRUNE= ( $prune ? 
             '$File::Find::prune=1  unless $File::Find::name eq $StartDir;'
            : '' );

my $finish = <<'EOS';
  do_rename($was,$_) unless $was eq $_;
EOS

my $wanted = <<EOW;
sub {
$setup
  $PRUNE
  $text;
$finish
}
EOW

print STDERR "$wanted\n" if  $debug ;

my $sub = eval $wanted;
die $@ if $@;

exit if $debug;

use File::Find;

File::Find::find ( $sub, $StartDir );
print "\n$Total_Size byte", ($Total_Size != 1 ? 's' : ''), 
  " in $Num_Files file", ($Num_Files != 1 ? 's' : ''), "\n"
  if defined($Total_Size);

__END__

=head1 NAME

pfind - find-like utility with Perl syntax

=head1 SYNOPSYS

To lowercase only the files names of which contain C<blah>, do

  pfind . /blah/ '$_ = lc'

To lowercase only the files which contain C<blah> inside, do

  pfind . "=~ /blah/" '$_ = lc'

=head1 DESCRIPTION

usage: 

  pfind [-debug] [-nosubdir] [-bin] [-bak=suffix] [--] startdir \
        rule1 rule2 ...

=head2 Rules: filters and actions

Rules are perl statements to execute. Statements starting with C<->,
C</>, C<$dir =~>, C<$dir !~>, C<$name =~>, C<$name !~>, or C<!> are
considered filters, a file will be discarded unless the statement
returns true. The rest is executed I<as is>.

Rules are executed in the directory of the file.

If only filters are given the default action C<prt> (see below) is added.

One can always emulate a filter C<FILT> by giving a rule 

  return unless FILT;

reversely, one can inhibit interpretation of a rule as a filter by
adding leading whitespace.

=head2 Initialization and Termination

Rules of the forms

  use Blah;

or

  BEGIN {BLOCK}

are executed before the start of tree walk, rules of the form

  END   {BLOCK}

are executed on termination.

=head2 File

In rules variables $_, $name, $dir contain the file name, full file
name and the name of the directory. Statements are executed in the
directory of the each processed file.

=head2 Renaming

If file is not discarded, and $_ is changed after the perl statements
are executed, the file is renamed to the new value of $_.

=item C<prt>, C<prt_line> and C<du>

Convenience function C<prt> prints C<\n>-terminated name of the file,

C<prt_line> does the same, but appends the found $line as well.

C<du> increments the count of files/bytes-in-files and arranges for a
message of the form

  43952 bytes in 2 files

to be printed on termination.  The count of files is available in
variable $Num_Files, count of bytes-in-files in $Total_Size.

=head2 Contents of the file

If a rule starts with C<=~>, it is considered as a filter to
match contents of the file. The rest is the regular expression to
match. If regexp contains modifiers C<s> or C<m>, it is matched against
the file as a whole, otherwise it is matched line-by-line. 

If the regexp-filter fails, file processing stops. Otherwise 
whatever was matched against is available in the variable $line, unless
modifiers C<s> or C<m> are given.

=head2 Modifying file

If a rule starts with C<=~ s> it is considered as a filter to match
and MODIFY the contents of the file.  Modifiers C<s> and C<m> are
applicable as above. To make several modifications at a time, use e.g.
C<=~ s/abc/ABC/ &&& s/def/DEF/> (modifiers should be the same, though).
In case you give the modifiers C<s> or C<m> you should think about using
the modifier C<g>, as well.

=head1 OPTIONS

=over 8

=item C<-debug> 

just shows the expression which will be given to File::Find.

=item C<-nosubdir> 

do not descend into subdirectories.

=item C<-bin> 

without this binary files are not processed for substrings, and files
are opened in text mode.

=back

=head1 VERSION

$Revision: 1.14 $, $Date: 1998/02/04 01:24:23 $.

=head1 AUTHOR

Ilya Zakharevich <ilya@math.ohio-state.edu> with significant additions
by Helmut Jarausch <jarausch@igpm.rwth-aachen.de>

=cut