#!/data/asictools/bin/perl -w package HTML::TableManipulate =head1 NAME HTML::TableManipulate - manipulate the rows and columns of a HTML document containing a single table. =head1 SYNOPSIS use HTML::TableManipulate open(FH,$File) or die "File $File not readable"; while(){$Input.=$_} close(FH); open(FH,">Status.html"); my $Tree=Init($Input); SortColumns($Tree,3,2,1,0,4,5,6); print FH SortRows($Tree,0); $Tree->delete(); close FH; =head1 DESCRITION This package reads a HTML document as string containing a single table without colspan or rowspan attributes (a simple grid); outside of this table other HTML entities are allowed (excepting a second table). =head1 PREREQUISITES HTML::TreeBuilder =head1 SCRIPT CATEGORIES Web =head1 BUGS =head1 AUTHOR Bernard Weiler; Siemens ICN TR ON DA; 2.99; Bernard.Weiler@icn.siemens.de =head1 FUNCTIONS =cut ; #use IgnoreUserlocal; use strict; no strict 'vars'; use HTML::TreeBuilder; #@EXPORT=qw(Init SortColumns Watherfall SortRows FilterRows MkRowSpan); =head2 Init() Has to be called one before other subs. returns reference to a parse-tree (called $p in other subs) =cut sub Init($){ my $Input=shift; my $Tree=HTML::TreeBuilder->new; $Tree->implicit_tags(1); $Tree->parse($Input); return $Tree; } =head2 SortColumns($p,1,2,3,...) Sorts columns according to the column number 1,2,3... returns as_HTML. =cut sub SortColumns($;@){ my $Tree=shift; local @Conf=@_; $Tree->traverse(\&_SortColumns); return $Tree->as_HTML(); } sub _SortColumns(){ my $Node=shift; return 1 unless(ref $Node); if($Node->tag() eq 'table'){ #$Node->attr('cols',scalar @Conf); return 1; } return 1 unless($Node->tag() eq 'tr'); my @il; foreach(@{$Node->content()}){ next unless(ref $_); unless(($_->tag() eq 'td')or($_->tag() eq 'th')){ warn"Internal error: non-TD found within TR: ".$_->tag(); next; } push(@il,$_); #print STDERR "element ".((ref $_)? $_->tag(): $_) ."\n"; } #print STDERR "count ".scalar @il."\n"; @{$Node->content()}=(); foreach(@Conf){ if(($_<0)or($_>=scalar(@il))){ warn"Illegal TD position: $_"; next; } $Node->push_content($il[$_]); } return 0; } =head2 Watherfall($p) Repeats heading cell-content for every cell containing the special string '""'. returns as_HTML. =cut sub Watherfall($){ my $Tree=shift; local @Elements; $Tree->traverse(\&_Watherfall); return $Tree->as_HTML(); } sub _Watherfall(){ my $Node=shift; return 1 unless(ref $Node); return 1 unless($Node->tag() eq 'tr'); my $ii=-1; foreach(@{$Node->content()}){ next unless(ref $_); next unless(($_->tag() eq 'td')or($_->tag() eq 'th')); $ii++; my $is=$_->as_HTML; $is=~s|<.*?>||g; if($is=~/^\s*""\s*$/){@{$_->content()}=@{$Elements[$ii]->content()}} else{$Elements[$ii]=$_} } return 0; } =head2 SortRows($p,$Column) Sorts the rows of a table according to the strings in column $Column. Returns as_HTML. =cut sub SortRows($$){ my $Tree=shift; local $Column=shift; local (%Elements,@Elements2,$Table,$Th); $Tree->traverse(\&_SortRows1); my @il; push(@il,$Th) if($Th); foreach my $El (sort keys %Elements){ push(@il,@{$Elements{$El}}); } @{$Table->content()}=@il; return $Tree->as_HTML(); } =head2 FilterRows($p,$Column,$Accept,$Deny) Drop the rows of a table according to the $Accept and $Deny RegExp test for column $Column. Returns as_HTML. Ommit $Deny by setting $Deny = ''. Accept all by setting $Accept = '.*'. =cut sub FilterRows($$$$){ my $Tree=shift; local $Column=shift; local $Accept=shift; local $Deny=shift; local (%Elements,@Elements2,$Table,$Th); $Tree->traverse(\&_SortRows1); my @il; push(@il,$Th) if($Th); foreach my $El (@Elements2){ push(@il,$El->[0]) if(($El->[1] =~ $Accept)and(($Deny eq '')or($El->[1] !~ $Deny))); } @{$Table->content()}=@il; return $Tree->as_HTML(); } sub _SortRows1(){ my $Node=shift; return 1 unless(ref $Node); if($Node->tag() eq 'table'){ $Table=$Node; return 1; } return 1 unless($Node->tag() eq 'tr'); my $ii=-1; foreach(@{$Node->content()}){ next unless(ref $_); if($_->tag() eq 'th'){ $Th=$Node; return 0; } $ii++; #print STDERR $_->tag." $ii $Column\n"; next unless($ii == $Column); warn"TD tag expected" unless($_->tag() eq 'td'); #print STDERR $_->tag; my $is=$_->as_HTML; $is=~s|<.*?>||g; $is=~s|\s+| |g; $is=~s/^\s+|\s+$//g; #print STDERR $is; #print STDERR $_->tag." $is $Column\n"; $Elements{$is}=[] unless(exists $Elements{$is}); push(@{$Elements{$is}},$Node); push(@Elements2,[$Node,$is]); } return 0; } =head2 MkRowSpan($p,$ColNr) Adjust RowSpan for every cell-tupel with simila content. The optional $ColNr restrict adjusting to Column $ColNr. Returns as_HTML. =cut sub MkRowSpan($;$){ my $Tree=shift; local $ColNr=shift; $ColNr=-1 unless defined $ColNr; local (@Elements); $Tree->traverse(\&_MkRowSpan); return $Tree->as_HTML(); } sub _MkRowSpan(){ my $Node=shift; return 1 unless(ref $Node); return 1 unless($Node->tag() eq 'tr'); my $ii=-1; foreach(@{$Node->content()}){ next unless(ref $_); #print STDERR $_->tag." $ii $Column\n"; next unless($_->tag() eq 'td'); $ii++; #print STDERR $_->tag; unless(defined $Elements[$ii]){ $Elements[$ii]=$_; next; } my $is=$_->as_HTML; $is=~s|<.*?>||g; $is=~s|\s+| |g; $is=~s/^\s+|\s+$//g; my $iss=$Elements[$ii]->as_HTML; $iss=~s|<.*?>||g; $iss=~s|\s+| |g; $iss=~s/^\s+|\s+$//g; #print STDERR ">$is $iss<\n" if($iss =~/mega/); if((($ColNr<0)or($ColNr == $ii))and($is eq $iss)){ $_=''; my $iii=$Elements[$ii]->attr('rowspan'); $iii=(defined $iii)? $iii+1: 2; $Elements[$ii]->attr('rowspan',$iii); } else{ $Elements[$ii]=$_; $Elements[$ii]->attr('rowspan',1); } } return 0; } #package main; #use HTML::TableManipulate; my $File=shift; my $Header=$File; $Header=~s|\.html||; die"require name of TabOrig" unless $File; my $Input=''; open(FH,$File) or die "File $File not readable"; while(){$Input.=$_} close(FH); my $Tree; foreach my $ii (qw(2 3 4 5 6 7 8)){ #foreach my $ii (qw(5)){ open(FH,">${Header}_Col$ii.html"); $Tree=Init($Input); FilterRows($Tree,$ii,".+",""); print FH SortColumns($Tree,$ii,0,1,2,3,4,5,6,7,8,9,10); #print FH SortRows($Tree,1); $Tree->delete(); close FH; } exit 0;