#!/usr/bin/perl # From: ilya@math.ohio-state.edu (Ilya Zakharevich) # So if you do not want to # descend into directories, just do # rnm '$_ = lc' # (with 5.004 or close, with perls which are older than a week you need # rnm '$_ = lc $_' # ). # # I wrote a similarly nice tool which does a similar thing to # "find". Use it like this: # pfind . '$_ = lc' # # To lowercase only the files which names contain `blah', do # pfind . /blah/ '$_ = lc' # # To lowercase only the files which contain `blah', do # pfind . "=~ /blah/" '$_ = lc' # 1.5: correctly open files with leading/trailing spaces in names. # 1.6: handle substitutions, -bak; # -nosubdir; # -debug exits; # -nosubdir bug fixed # inserts a stat($_) iff any filter matching /^!?-(M|A|C)/ is used # this allows for -M _ < 2 # inserts `if test' instead of `unless ! test' if !test is given # 1.8: Preserves attributes of the file if s/// # Optimizations of -f to -f _ performed # Pod added # 1.11: prt() somehow was not printing the name of the file, but line as well; # prt_line() added which prints the line as well; # -nosubdir was ignored; # 1.12: Added du() convenience function # Do not stat() the file at all. Force File::Find to stat() if needed. # The above trick did not work. Backing up to the older way... # Doc updates. # 1.13: $dir =~ /blah/ is considered as a rule too; # rename() is more verbose now. # 1.14+ better error message for non-writable files # die on unknown options. # New option -alllines. # New convenience function skip(). # New option -utf8 # 1.14.2 better error message for unknown option # 1.14.3 New patch from Jarausch: make prt_lines default for -alllines, bugfix # 1.15 Merged patches after 1.14 into the RCS. # 1.16 NeedLStat patch from Jarausch. use strict; use vars qw($Line_Found $rcs $debug $prune $bak $StartDir $binary $haveS $NeedsStat $NeedsLStat $MultiPrint); undef $Line_Found; $rcs = ' $Id: pfind.pl,v 1.16 1998/09/24 01:59:06 ilya Exp ilya $ ' ; sub usage { if ($] < 5.00456 or system 'perldoc', '-F', "$0" ) { warn " perldoc -F $0 could not find us! Short version follows:\n"; my $version = (split ' ', $rcs)[2]; print <>>> Version: $version. <<<< Options are [-debug] [-nosubdir] [-bin] [-bak=suffix] [-alllines]. Rules are perl statements to execute. Statements starting with `-', `/', or `!' are considered filters, a file will be discarded unless the statement returns true. The rest is executed "as is". If only filters are given the default action is `prt' (see below). Variables \$_, \$name, \$dir contain the file name, full file name and the name of the directory. Statements are executed in the directory of the each processed file. If file is not discarded, and \$_ is changed after the perl statements are executed, the file is renamed to the new value of \$_. Convenience function `prt' prints \\n-terminated name of the file. If a rule starts with `=~', it is considered as a filter to match contents of the file. The rest is the regular expression to match. If regexp contains modifiers `s' or `m', it is matched against the file as a whole, otherwise it is matched line-by-line. If the regexp-filter fails, file processing stops. Otherwise whatever was matched against is available in the variable \$line unless modifiers `s' or `m' are given. If a rule starts with `=~s' it is considered as a filter to match and MODIFY the contents of the file. Modifiers 's' and 'm' are applicable as above. To make several modifications at a time, use e.g. =~ s/abc/ABC/ &&& s/def/DEF/ (modifiers should be the same, though). In case you give the modifiers 's' or 'm' you should think about using the modifier 'g', as well. Option -debug just shows and evals the expression - IT DOES NO PROCESSING ! With option -nosubdir does not descend into subdirectories. Without option -bin binary files are not processed for substrings, and files are opened in text mode. -alllines: do for all the matching lines. EOU } ; # '; # for Emacs exit; } sub skip { $File::Find::prune=1; } sub prt { print "$File::Find::name\n"; } sub prt_line { print "$File::Find::name"; print " : $Line_Found" if defined($Line_Found); print "\n" unless $Line_Found =~ /\n\Z/; } my $Total_Size; my $Num_Files; sub du { $Num_Files++; $Total_Size += ($NeedsStat ? -s _ : -s $File::Find::name); } sub do_rename { my ($from, $to) = @_; print STDERR "rename `$File::Find::dir/$from'\t===> `$File::Find::dir/$to'\n"; rename $from, $to or warn("Cannot rename: $!\n"), return 0; } sub setup_bak { my ($file,$bak) = @_; return unless -e "$file$bak"; my ($prefix,$count,$i) = ("", $bak, 0); unless ($bak =~ /^\w+\d+$/) { ($prefix,$count) = ($bak, 0); } $count++, $i++ while $i < 1000 and -e "$file$prefix$count"; die "Cannot find backup name for '$file'\n" if $i >= 1000; die "Exiting...\n" unless do_rename "$file$bak", "$file$prefix$count"; } my $havelabel; sub wrapper { my ($regexp,$regexp_find) = shift; my $SubsIt = $regexp =~ /^\s*s/; my $Single = $regexp !~ /\w*[sm]\w*\s*$/; if ( $SubsIt ) { $haveS++; my @SubsList= split /&&&/,$regexp; if ( @SubsList > 1 ) { $regexp_find= join '||',@SubsList; $regexp= join ',',@SubsList; } else { $regexp_find= $regexp; } } my $Code; if ($binary) { $Code = <<'EOS'; return unless -f _ and -r _; EOS } else { $Code = <<'EOS'; return unless -f _ and -r _ and -T _; EOS } $Code .= <<'EOS' if $SubsIt ; print (STDERR "$name is not writable\n"), return unless -w _; EOS $Code .= <<'EOS' unless $MultiPrint; { EOS $Code .= <<'EOS'; my $FileName = $_; my $found = 0; $FileName =~ s|^(\s)|./$1|; my $openfile = $FileName; $openfile =~ s/(\s)$/$1\0/; open(FILE, "< $openfile") or die "cannot open '$name': $!"; EOS $Code .= <<'EOS' if $binary; binmode FILE; EOS if ( $SubsIt ) { die "-alllines option not supported with substitutions.\n" if $MultiPrint; my $bmode = ''; $bmode = <<'EOS' if $binary; binmode FILE; binmode OUTPUT; EOS my $move_copy = 'rename $FileName, "$FileName$bak"'; my $op = 'rename'; my $open_write = 'open(OUTPUT, "> $openfile")'; my $set_mode = 'chmod $Mode, $FileName'; if (defined &File::Copy::syscopy) { # There is more than $Mode $move_copy = 'File::Copy::syscopy $FileName, "$FileName$bak"'; $op = 'copy'; $open_write = 'open(OUTPUT, "+< $openfile") and truncate OUTPUT, 0'; $set_mode = ''; } if ( $Single ) { # Single-line substitution $Code .= <) and not (\$found = $regexp_find); if ( \$found ) { close FILE or die "cannot close '\$name' for write: \$!"; setup_bak(\$FileName, \$bak); $move_copy or die "cannot $op '\$name' to '\$name\$bak': \$!"; open (FILE, "< \$FileName\$bak"); $open_write or die "cannot open '\$name' for write: \$!"; $bmode while () { $regexp; print OUTPUT; } close OUTPUT or die "cannot close \$name: \$!"; $set_mode; \$FileName .= \$bak; } EOSS } else { # Multi-line substitution $Code .= <; \$found = $regexp_find; if ( \$found ) { close FILE or die "cannot close '\$name' for write: \$!"; setup_bak(\$FileName, \$bak); $move_copy or die "cannot rename '\$name': \$!"; open (FILE, "< \$FileName\$bak"); open(OUTPUT, "> \$openfile") or die "cannot open '\$name' for write: \$!"; $bmode \$_ = ; $regexp; print OUTPUT; close OUTPUT or die "cannot close '\$name' for write: \$!"; $set_mode; \$FileName .= \$bak; } EOSB } } else { # Doing match, not subst if ( $Single ) { # Single line match die "-alllines option not supported with multiple matches.\n" if $MultiPrint and $havelabel; $Code .= <<'EOSL' if $MultiPrint and not $havelabel++; lines: { EOSL $Code .= <)) { \$found = 1, last if \$line =~ $regexp; } EOSS } else { # Multiline match die "-alllines option not supported with multiline match.\n" if $MultiPrint; $Code .= <; \$found = $regexp; EOSB } } if ( $Single ) { $Code .= <<'EOSS'; $Line_Found = $line if $found; EOSS } if ($MultiPrint) { $Code .= <<'EOS'; close FILE or die "cannot close '$name': $!" unless $found; return unless $found; EOS } else { $Code .= <<'EOS'; close FILE or die "cannot close '$name': $!"; return unless $found; } EOS } } $StartDir = shift; usage unless @ARGV; $bak = ".bak"; my $utf; while ($StartDir =~ /^-/) { my $o = $StartDir; $StartDir = shift; last if $o eq '--'; $debug = 1, next if $o eq '-debug'; $prune = 1, next if $o eq '-nosubdir'; $binary = 1, next if $o eq '-bin'; $MultiPrint = 1, next if $o eq '-alllines'; $utf = 1, next if $o eq '-utf8'; $bak = $1, next if $o =~ /^-bak=(.*)/; die "Unknown option `$o'.\n\tStart $0 without arguments for usage info.\n" if $o =~ /^-/; } #die "Starting directory `$StartDir' not found.\n" unless -d $StartDir; while ( -l $StartDir ) { my $Link = readlink $StartDir; $StartDir= ( $StartDir =~ m|(.*)/[^/]+$|s ? $1 : '.'); $StartDir= ( substr($Link,0,1) eq '/' ? $Link : $StartDir .'/'.$Link ); } die "Starting directory `$StartDir' not found.\n" unless -d $StartDir; my $NF = 0; # no of filter arguments map { $NF++ if m:^([-/!]): || m/^=~/ || m/^\$(name|dir)\s*[=!]~/ } @ARGV; # Convert -f to -f _, -M < 3 to -M _ < 3 @ARGV = map { s/^\s*((!\s*)?-[a-su-zA-Z])\s*([!=<>]|$)/$1 _ $3/; $_ } @ARGV; $NeedsStat = 0; $NeedsLStat = 0; map { $NeedsStat++ if m/^\s*(!\s*)*-[a-km-su-zA-Z]\s+_\b/ } @ARGV; # skip -l -t map { $NeedsLStat++ if m/^\s*(!\s*)*-l\s+_\b/ } @ARGV; map { $NeedsStat++ if m/^\s*du\s*$/ } @ARGV; # du my @rows = map { s:^(([-/]|\$(name|dir)\s*[=!]~).*):return unless\tdo {\t$1\t\t}:s; s:^!(.*):return if\tdo {\t$1\t\t}:s ; $_ } @ARGV; @rows = map { s! ^ =~ (.*) ! $NeedsStat++; wrapper($1) !xes ; $_ } @rows; push @rows, " ;\n". ($MultiPrint ? " prt_line" :" prt") if $NF == @ARGV; push @rows, '$found = 0', "redo lines", "}" if $havelabel; my $text = join " ;\n ", @rows; $text = "my (\$dev,\$inode,\$Mode) = CORE::stat(\$_);\n " . $text if $haveS; #$text = "my (\$dev,\$inode,\$Mode) = CORE::stat(_);\n" . $text # if $haveS; $text = "CORE::stat(\$_);\n " . $text if $NeedsStat and not $haveS; $text = "CORE::lstat(\$_);\n " . $text if $NeedsLStat and not $haveS; # Disable optimizations of File::Find so that _ is always available # $File::Find::dont_use_nlink = 1 if $NeedsStat or $haveS; # does not work @ARGV = (); my $setup = <<'EOS'; my $name = $File::Find::name; my $dir = $File::Find::dir; my $was = $_; my ($line, $found); EOS $setup .= <, do pfind . /blah/ '$_ = lc' To lowercase only the files which contain C inside, do pfind . "=~ /blah/" '$_ = lc' To do recursive C in UTF-8 files, do pfind -utf8 -alllines . '/^bar/' '=~ /foo/' prt_line =head1 DESCRIPTION usage: pfind [-debug] [-nosubdir] [-bin] [-bak=suffix] \ [-alllines] [-utf8] [--] startdir \ rule1 rule2 ... =head2 Rules: filters and actions Rules are perl statements to execute. Statements starting with C<->, C, C<$dir =~>, C<$dir !~>, C<$name =~>, C<$name !~>, or C are considered filters, a file will be discarded unless the statement returns true. The rest is executed I. Rules are executed in the directory of the file. If only filters are given the default action C (see below) is added. One can always emulate a filter C by giving a rule return unless FILT; reversely, one can inhibit interpretation of a rule as a filter by adding leading whitespace. =head2 Initialization and Termination Rules of the forms use Blah; or BEGIN {BLOCK} are executed before the start of tree walk, rules of the form END {BLOCK} are executed on termination. =head2 File In rules variables $_, $name, $dir contain the file name, full file name and the name of the directory. Statements are executed in the directory of the each processed file. =head2 Renaming If file is not discarded, and $_ is changed after the perl statements are executed, the file is renamed to the new value of $_. =head2 C, C, C and C Convenience function C prints C<\n>-terminated name of the file, C does the same, but appends the found $line as well. C arranges for files inside the given subdirectory to be skipped. Ignored for non-directories. C increments the count of files/bytes-in-files and arranges for a message of the form 43952 bytes in 2 files to be printed on termination. The count of files is available in variable $Num_Files, count of bytes-in-files in $Total_Size. =head2 Contents of the file If a rule starts with C<=~>, it is considered as a filter to match contents of the file. The rest is the regular expression to match. If regexp contains modifiers C or C, it is matched against the file as a whole, otherwise it is matched line-by-line. If the regexp-filter fails, file processing stops. Otherwise whatever was matched against is available in the variable $line, unless modifiers C or C are given. =head2 Modifying file If a rule starts with C<=~ s> it is considered as a filter to match and MODIFY the contents of the file. Modifiers C and C are applicable as above. To make several modifications at a time, use e.g. C<=~ s/abc/ABC/ &&& s/def/DEF/> (modifiers should be the same, though). In case you give the modifiers C or C you should think about using the modifier C, as well. =head1 OPTIONS =over 8 =item C<-debug> just shows the expression which will be given to File::Find. =item C<-nosubdir> do not descend into subdirectories. =item C<-alllines> Do actions following C<=~ /REGEXP/> filter for all the matching lines. Is supported for one single line match filter only. =item C<-bin> without this binary files are not processed for substrings, and files are opened in text mode. =item C<-utf8> Interpret code as modified by C directive. =back =head1 VERSION $Revision: 1.16 $, $Date: 1998/09/24 01:59:06 $. =head1 AUTHOR Ilya Zakharevich with significant additions by Helmut Jarausch =cut __END__ Here is the newsgroup announcement: pfind is a tiny script which allows one to combine the power and generality of Perl with terseness and convenience of specialized file and directory utilities. By learning 4 shortcuts added to standard Perl, 4 command-line options and 4 convenience functions, one can perform many operations usually done with diverse toolkits. pfind command-lines are only slightly longer than those of a specialized tool. In addition, since pfind can take *arbitrary* Perl expressions as arguments, pfind can do much more - while remaining easy to use. Thus pfind both simplifies the life of a Perl user, freeing him of learning syntax of multiple tools, and enhances his abilities as well.