#!/data/asictools/bin/perl -w
package HTML::TableManipulate
=head1 NAME
HTML::TableManipulate - manipulate the rows and columns of a HTML document containing a single table.
=head1 SYNOPSIS
use HTML::TableManipulate
open(FH,$File) or die "File $File not readable";
while(){$Input.=$_}
close(FH);
open(FH,">Status.html");
my $Tree=Init($Input);
SortColumns($Tree,3,2,1,0,4,5,6);
print FH SortRows($Tree,0);
$Tree->delete();
close FH;
=head1 DESCRITION
This package reads a HTML document as string containing a single table
without colspan or rowspan attributes (a simple grid); outside of this table other HTML entities are allowed (excepting a second table).
=head1 PREREQUISITES
HTML::TreeBuilder
=head1 SCRIPT CATEGORIES
Web
=head1 BUGS
=head1 AUTHOR
Bernard Weiler; Siemens ICN TR ON DA; 2.99; Bernard.Weiler@icn.siemens.de
=head1 FUNCTIONS
=cut
;
#use IgnoreUserlocal;
use strict;
no strict 'vars';
use HTML::TreeBuilder;
#@EXPORT=qw(Init SortColumns Watherfall SortRows FilterRows MkRowSpan);
=head2 Init()
Has to be called one before other subs.
returns reference to a parse-tree (called $p in other subs)
=cut
sub Init($){
my $Input=shift;
my $Tree=HTML::TreeBuilder->new;
$Tree->implicit_tags(1);
$Tree->parse($Input);
return $Tree;
}
=head2 SortColumns($p,1,2,3,...)
Sorts columns according to the column number 1,2,3...
returns as_HTML.
=cut
sub SortColumns($;@){
my $Tree=shift;
local @Conf=@_;
$Tree->traverse(\&_SortColumns);
return $Tree->as_HTML();
}
sub _SortColumns(){
my $Node=shift;
return 1 unless(ref $Node);
if($Node->tag() eq 'table'){
#$Node->attr('cols',scalar @Conf);
return 1;
}
return 1 unless($Node->tag() eq 'tr');
my @il;
foreach(@{$Node->content()}){
next unless(ref $_);
unless(($_->tag() eq 'td')or($_->tag() eq 'th')){
warn"Internal error: non-TD found within TR: ".$_->tag();
next;
}
push(@il,$_);
#print STDERR "element ".((ref $_)? $_->tag(): $_) ."\n";
}
#print STDERR "count ".scalar @il."\n";
@{$Node->content()}=();
foreach(@Conf){
if(($_<0)or($_>=scalar(@il))){
warn"Illegal TD position: $_";
next;
}
$Node->push_content($il[$_]);
}
return 0;
}
=head2 Watherfall($p)
Repeats heading cell-content for every cell containing the special string '""'.
returns as_HTML.
=cut
sub Watherfall($){
my $Tree=shift;
local @Elements;
$Tree->traverse(\&_Watherfall);
return $Tree->as_HTML();
}
sub _Watherfall(){
my $Node=shift;
return 1 unless(ref $Node);
return 1 unless($Node->tag() eq 'tr');
my $ii=-1;
foreach(@{$Node->content()}){
next unless(ref $_);
next unless(($_->tag() eq 'td')or($_->tag() eq 'th'));
$ii++;
my $is=$_->as_HTML;
$is=~s|<.*?>||g;
if($is=~/^\s*""\s*$/){@{$_->content()}=@{$Elements[$ii]->content()}}
else{$Elements[$ii]=$_}
}
return 0;
}
=head2 SortRows($p,$Column)
Sorts the rows of a table according to the strings in column $Column.
Returns as_HTML.
=cut
sub SortRows($$){
my $Tree=shift;
local $Column=shift;
local (%Elements,@Elements2,$Table,$Th);
$Tree->traverse(\&_SortRows1);
my @il;
push(@il,$Th) if($Th);
foreach my $El (sort keys %Elements){
push(@il,@{$Elements{$El}});
}
@{$Table->content()}=@il;
return $Tree->as_HTML();
}
=head2 FilterRows($p,$Column,$Accept,$Deny)
Drop the rows of a table according to the $Accept and $Deny RegExp test for column $Column.
Returns as_HTML.
Ommit $Deny by setting $Deny = ''. Accept all by setting $Accept = '.*'.
=cut
sub FilterRows($$$$){
my $Tree=shift;
local $Column=shift;
local $Accept=shift;
local $Deny=shift;
local (%Elements,@Elements2,$Table,$Th);
$Tree->traverse(\&_SortRows1);
my @il;
push(@il,$Th) if($Th);
foreach my $El (@Elements2){
push(@il,$El->[0]) if(($El->[1] =~ $Accept)and(($Deny eq '')or($El->[1] !~ $Deny)));
}
@{$Table->content()}=@il;
return $Tree->as_HTML();
}
sub _SortRows1(){
my $Node=shift;
return 1 unless(ref $Node);
if($Node->tag() eq 'table'){
$Table=$Node;
return 1;
}
return 1 unless($Node->tag() eq 'tr');
my $ii=-1;
foreach(@{$Node->content()}){
next unless(ref $_);
if($_->tag() eq 'th'){
$Th=$Node;
return 0;
}
$ii++;
#print STDERR $_->tag." $ii $Column\n";
next unless($ii == $Column);
warn"TD tag expected" unless($_->tag() eq 'td');
#print STDERR $_->tag;
my $is=$_->as_HTML;
$is=~s|<.*?>||g;
$is=~s|\s+| |g;
$is=~s/^\s+|\s+$//g;
#print STDERR $is;
#print STDERR $_->tag." $is $Column\n";
$Elements{$is}=[] unless(exists $Elements{$is});
push(@{$Elements{$is}},$Node);
push(@Elements2,[$Node,$is]);
}
return 0;
}
=head2 MkRowSpan($p,$ColNr)
Adjust RowSpan for every cell-tupel with simila content.
The optional $ColNr restrict adjusting to Column $ColNr.
Returns as_HTML.
=cut
sub MkRowSpan($;$){
my $Tree=shift;
local $ColNr=shift;
$ColNr=-1 unless defined $ColNr;
local (@Elements);
$Tree->traverse(\&_MkRowSpan);
return $Tree->as_HTML();
}
sub _MkRowSpan(){
my $Node=shift;
return 1 unless(ref $Node);
return 1 unless($Node->tag() eq 'tr');
my $ii=-1;
foreach(@{$Node->content()}){
next unless(ref $_);
#print STDERR $_->tag." $ii $Column\n";
next unless($_->tag() eq 'td');
$ii++;
#print STDERR $_->tag;
unless(defined $Elements[$ii]){
$Elements[$ii]=$_;
next;
}
my $is=$_->as_HTML;
$is=~s|<.*?>||g;
$is=~s|\s+| |g;
$is=~s/^\s+|\s+$//g;
my $iss=$Elements[$ii]->as_HTML;
$iss=~s|<.*?>||g;
$iss=~s|\s+| |g;
$iss=~s/^\s+|\s+$//g;
#print STDERR ">$is $iss<\n" if($iss =~/mega/);
if((($ColNr<0)or($ColNr == $ii))and($is eq $iss)){
$_='';
my $iii=$Elements[$ii]->attr('rowspan');
$iii=(defined $iii)? $iii+1: 2;
$Elements[$ii]->attr('rowspan',$iii);
}
else{
$Elements[$ii]=$_;
$Elements[$ii]->attr('rowspan',1);
}
}
return 0;
}
#package main;
#use HTML::TableManipulate;
my $File=shift;
my $Header=$File;
$Header=~s|\.html||;
die"require name of TabOrig" unless $File;
my $Input='';
open(FH,$File) or die "File $File not readable";
while(){$Input.=$_}
close(FH);
my $Tree;
foreach my $ii (qw(2 3 4 5 6 7 8)){
#foreach my $ii (qw(5)){
open(FH,">${Header}_Col$ii.html");
$Tree=Init($Input);
FilterRows($Tree,$ii,".+","");
print FH SortColumns($Tree,$ii,0,1,2,3,4,5,6,7,8,9,10);
#print FH SortRows($Tree,1);
$Tree->delete();
close FH;
}
exit 0;