#!/nfs1/local/bin/perl -Tw # # $Id: proxy.pl,v 1.3 1997/10/15 00:20:26 abigail Exp $ # # $Log: proxy.pl,v $ # Revision 1.3 1997/10/15 00:20:26 abigail # Added on the fly configuration. # # Revision 1.2 1997/08/24 05:14:58 abigail # Added $IGNORE feature. # # Revision 1.1 1997/08/23 09:35:25 abigail # Initial revision # # # # A proxy server that strips out nasty HTTP headers, and nasty HTML tags. # The proxy part is inspired on Randal's Webtechniques column 11. # [http://www.stonehenge.com/merlyn/WebTechniques/col11.html]. use strict; use Carp; use lib qw (/nfs1/home/abigail/Perl); use LWP::UserAgent; use HTTP::Daemon; use Proxy::HTML; sub handle_connection ($); sub fetch ($); sub illegal (%); use vars qw /@NO_REQ_HEADERS @NO_RES_HEADERS/; # This is just to satisfy -T, we don't call other programs. $ENV {PATH} = join ':', qw (/usr/bin); # /bin -> /usr/bin on Solaris. # On the fly configuration file. # Hardcoded file name - not very nice. my $Config = "/nfs1/home/abigail/Perl/Proxy/Config.pm"; # Configuration. my $ME = "localhost"; # The machine I run on. my $LISTEN_PORT = shift || 8888; # That's the port I'll listen to. my $PROXY = "gatekeeper.fnx.com"; # The machine the proxy runs on. my $PROXY_PORT = 8080; # The proxy's port. my @PROXY_SCHEMES = qw /http ftp/; # Proxy accepts this. undef $PROXY; # We don't need the other proxy anymore. my $FORBIDDEN = 403; # HTTP Response code. my $FORBIDDEN_TEXT = 'Forbidden'; # Text of the above. # How I want to be named. # RCS will fill in the version id. my $NAME = q ; my $last_required = 0; # Initialize the daemon. my $daemon = new HTTP::Daemon LocalAddr => $ME, LocalPort => $LISTEN_PORT or croak "HTTP Proxy failed to initialize: $!"; # Initialize agent. my $agent = new LWP::UserAgent; $agent -> agent ($NAME); if (defined $PROXY) { $agent -> proxy (\@PROXY_SCHEMES, "http://$PROXY:$PROXY_PORT/"); } $agent -> env_proxy (); # Set up whatever else you want. # Zombies are bad. $SIG {CHLD} = sub {wait;}; warn "Proxy on $ME ($LISTEN_PORT) initialized\n"; # And loop forever. while (my $slave = $daemon -> accept) { # Reconfigure if the config file changed. my $mod = (stat $Config) [9]; if ($mod > $last_required) { delete $INC {$Config}; require $Config; $last_required = $mod; } handle_connection $slave; } # That's all folks. exit 0; # This handles the connection. Fork, and let the child deal with # the stuff. Child exits after dealing with the stuff. sub handle_connection ($) { my $connection = shift; my $pid = fork; if ($pid) { close $connection; # Can I do $connection -> close; ? If no, why not? return; } my $request = $connection -> get_request; if (defined $request) { my $response = fetch $request; $connection -> send_response ($response); close $connection; } exit 0 if defined $pid; } # Check if a request is valid. If it is, strip info from the client, # fetch the stuff, strip info from the server, and return the result. # For illegal stuff, return an error message. sub fetch ($) { my $request = shift; # Check if valid. if ($request -> url -> scheme ne 'http') { return illegal url => $request -> url, reason => "bad scheme"; } # print "Doing request: ", $request -> url -> as_string, "\n"; my $response = eval {$agent -> request ($request -> strip) -> strip;}; print scalar localtime, "\n$@\n\n" if $@; $response; # Will generate a 'document contains no data' if there's # an error. Needs a fix. } # Create a forbidden response. sub illegal (%) { my %args = @_; my $response = new HTTP::Response $FORBIDDEN, $FORBIDDEN_TEXT; # There's only one illegal action for now.... foreach ($args {reason}) { /^bad scheme/ && do { $response -> content ("Illegal scheme: $args{url}"); last; }; # Any... $response -> content ("Illegal action."); } $response; } # This sub routine strips out HTTP headers we do not want to send. # It's just an extra function added to HTTP::Request. sub HTTP::Request::strip ($) { my $self = shift; $self -> remove_header (@NO_REQ_HEADERS); $self; # Return self for extra coolness. } # Strip out HTTP response headers we do not want to send to the client. # And if the returned type is of text/html, filter it. # We put this function into HTTP::Response. sub HTTP::Response::strip ($) { my $self = shift; $self -> remove_header (@NO_RES_HEADERS); if ($self -> content_type eq 'text/html') { my $html = new Proxy::HTML; # Replace content with the parsed text after parsing the content. $self -> content ($html -> parse ($self) -> parsed_text); } $self; # Return self for extra coolness. }