package Proxy::HTML;   # I guess this is a bad name....

#
# $Id: HTML.pm,v 1.3 1997/10/15 00:24:11 abigail Exp abigail $
#
# $Log: HTML.pm,v $
# Revision 1.3  1997/10/15 00:24:11  abigail
# On the fly configuration.
# Host dependent image selection.
#
# Revision 1.2  1997/08/24 05:15:11  abigail
# Configurable ignoring tags/attributes. Either unconditionally,
# or by looking at the attribute value.
# Rewriting <img> tags if images are not wanted.
#
# Revision 1.1  1997/08/23 09:32:05  abigail
# Initial revision
#
#
#
# Strip nasty tags/attributes from HTML text.

use strict;
use Carp;

use vars qw /@ISA/;
use vars qw /$IMG %NO_TAGS %NO_ATTR %NO_TAG_BY_ATTR/;

use HTML::Parser;

@ISA = qw /HTML::Parser/;

sub deal_with_img ($$);

my $INLINE = "[INLINE]";   # Text of an inline image with no alt text.

# We mask new to be able to get the inline status.
sub new ($$) {
    my $class = shift;
    my $self  = new HTML::Parser;
    bless $self, $class;
}


# To do interesting things, we keep track of the parsed and modified
# part of html text in an instance variable "mod_text". It is set to ''
# by masking parse ().

sub parse ($@) {
    my $self                =   shift;
    $self -> {mod_text}     =  '';
    $self -> {response}     =   shift;
    $self -> {host}         =  $self -> {response} -> base -> host;
    $self -> {content}      =  $self -> {response} -> content;
    $self -> {last_tag}     =  'NIL';
    $self -> {src_matches}  =   [];
    foreach my $host (keys %$IMG) {
        if ($self -> {host} =~ $host) {
            $self -> {src_matches} = $IMG -> {$host};
            last;
        }
    }
    $self -> netscape_buggy_comment (1);  # Silly netscape.
    $self -> SUPER::parse ($self -> {content});
}

# Return the modified text.
sub parsed_text ($) {
    my $self = shift;
    $self -> {mod_text};
}

# Now we mask the various "call back" functions.

# Declarations are just copied. (But what about comments?)
sub declaration ($$) {
    my $self = shift;
    my $decl = shift;
    $self -> {mod_text} .= "<!$decl>";
}

# This is the interesting function.
# For now, just strip font tags.
sub start ($$$$$) {
    my $self = shift;
    my ($tag, $attributes, $attrseq, $origtext) = @_;

    # Skip multiple <BR>s.
    return if ($tag eq 'br' && $self -> {last_tag} eq 'br');
    $self -> {last_tag} = $tag;

    if ($NO_TAG_BY_ATTR {$tag}) {
        foreach my $attribute (keys %$attributes) {
            my $sub = $NO_TAG_BY_ATTR {$tag} -> {$attribute} or next;
            return if $sub -> ($attributes -> {$attribute}, $self);
        }
    }

    if ($NO_ATTR {$tag}) {
        foreach my $attribute (keys %$attributes) {
            my $sub = $NO_ATTR {$tag} -> {$attribute} or next;
            if ($sub -> ($attributes -> {$attribute}, $self)) {
                delete $attributes -> {$attribute};
            }
        }
        $self -> {mod_text} .= $self -> build_tag ($tag,  $attributes);
        return;
    }
    $self -> {mod_text} .= $origtext unless $NO_TAGS {$tag};
}

sub build_tag ($$$) {
    my $self = shift;
    my ($tag, $attributes) = @_;
    my $line = "<$tag ";
    foreach my $attr (keys %$attributes) {
        # We lost the info whether it was single or double quote delimited.
        # So we take the "safe" approach and use double quote delimited,
        # but first we need to eliminate the existing double quotes.
        $attributes -> {$attr} =~ s/"/&quot;/g;
        $line .= qq ($attr = "$attributes->{$attr}" );
    }
    $line .= ">";
}

# Copy end tags, except for those being filtered out.
sub end ($$) {
    my $self = shift;
    my $tag  = shift;
    do {$self -> {mod_text} .= "</$tag>";
        $self -> {last_tag}  = 'NIL';} unless $NO_TAGS {$tag};
}

# Text is just copied.
sub text ($$) {
    my $self = shift;
    my $text = shift;
    $self -> {last_tag}  = 'NIL' if $text =~ /\S/;
    $text =~ s/(?:&nbsp;?|&#160;?)(?:&nbsp;?|&#160;?|\s)+/ /g;
    $self -> {mod_text} .= $text;
}

# Comments are just copied.
# Hmm, 'man HTML::Parser' is unclear. Comments *are* part of a document
# declaration. And you can have multiple comments in a document declarion.
# Hope this will work....
sub comment ($$) {
    my $self    = shift;
    my $comment = shift;
    $self -> {mod_text} .= "<!--$comment-->";
}


# Replace the image with the alt text.
sub img_to_alt ($$) {
    my $self       = shift;
    my $attributes = shift;

    defined $attributes -> {alt} ? $attributes -> {alt} : $INLINE;
}


sub deal_with_img ($$) {
    my ($src, $self) = @_;

    foreach my $s (@{$self -> {src_matches}}, @{$IMG -> {ALL}}) {
        return 1 if $src =~ /$s/;
    }
}

1;