and is ignored. I do not
* Anything between or and - will not be ignored, but will
really mess Latex up.
=cut
################### END DOCUMENTATION #######################
################### BEGIN DEFENITIONS #######################
# Test what modules we can use
eval {require URI};
$present{'URI'} = 1 unless $@;
eval {require LWP::Simple};
$present{'LWP::Simple'} = 1 unless $@;
eval {require Image::Magick};
$present{'Image::Magick'} = 1 unless $@;
# The configuration file gives a "type" to each tag. This hash tells
# what functions to use on each type
my %types = (
"command" => \&command_handler,
"environment" => \&environment_handler,
"single" => \&single_handler,
"ignore" => \&texify,
"other" => \&other_handler,
"kill" => sub {return ""},
"image" => \&image_handler,
"table" => \&table_handler,
"pre" => \&pre_handler, # Experimental; don't use
);
# Some characters typed in HTML need to be altered to be correct in
# Latex. These must be done this specific order All the foreign
# characters or special ascii characters that need to be altered. *
# next the comment means it doesn't really work or is faked. If it's
# commented out, that means it doesn't work at all.
my @specials = (
['' , '' ], #comments
['\$' , '\$' ],
['\\\\(?!\$)', "\$\\backslash\$"], #\
['<' , '$<$' ],
['>' , '$>$' ],
['&' , '\&' ],
['%' , '\%' ],
['#' , '\#' ],
['{' , '\{' ],
['}' , '\}' ],
['_' , '\_' ],
['\^' , '\^{}' ],
[chr(161), '!`' ], #¡
#[chr(162), '' ], #¢*
[chr(163), '{\\pounds}' ], #£
#[chr(164), '' ], #¤*
[chr(165), '{Y\hspace*{-1.4ex}--}'], #¥*
[chr(166), '$|$' ], #¦*
[chr(167), '{\\S}' ], #§
[chr(168), '\\"{}' ], #¨
[chr(169), '{\\copyright}' ], #©
[chr(170), '$^{\underline{a}}$'], #ª*
[chr(171), '<<' ], #«
[chr(172), '$\\neg$' ], #¬
[chr(173), '$-$' ], #
#[chr(174), '' ], #®*
[chr(175), '$^-$' ], #¯
[chr(176), '$^{\\circ}$' ], #°
[chr(177), '$\\pm$' ], #±
[chr(178), '$^2$' ], #²
[chr(179), '$^3$' ], #³
[chr(180), '$^\\prime$' ], #´
[chr(181), '$\\mu$' ], #µ
[chr(182), '{\P}' ], #¶
[chr(183), '$\cdot$' ], #·
[chr(184), ',' ], #¸*
[chr(185), '$^1$' ], #¹
[chr(186), '$^{\\underline{\\circ}}$'], #º*
[chr(187), '>>' ], #»
[chr(188), '$\frac{1}{4}$' ], #¼
[chr(189), '$\frac{1}{2}$' ], #½
[chr(190), '$\frac{3}{4}$' ], #¾
[chr(191), '?`' ], #¿
[chr(192), '\\`A' ], #À
[chr(193), '\\\'A' ], #Á
[chr(194), '\\^A' ], #A
[chr(195), '\\~A' ], #Ã
[chr(196), '\\"A' ], #Ä
[chr(197), '{\\AA}' ], #Å
[chr(198), '{\\AE}' ], #Æ
[chr(199), '\\c{C}' ], #Ç
[chr(200), '\\`E' ], #È
[chr(201), '\\\'E' ], #É
[chr(202), '\\^E' ], #Ê
[chr(203), '\\"E' ], #Ë
[chr(204), '\\`I' ], #Ì
[chr(205), '\\\'I' ], #Í
[chr(206), '\\^I' ], #I
[chr(207), '\\"I' ], #Ï
[chr(208), '{D\\hspace*{-1.7ex}-\\hspace{.9ex}}'], #Ð*
[chr(209), '\\~N' ], #Ñ
[chr(210), '\\`O' ], #Ò
[chr(211), '\\\'O' ], #Ó
[chr(212), '\\^O' ], #Ô
[chr(213), '\\~O' ], #Õ
[chr(214), '\\"O' ], #Ö
[chr(215), '$\chi$' ], #×
[chr(216), '{\\O}' ], #Ø
[chr(217), '\\`U' ], #Ù
[chr(218), '\\\'U' ], #Ú
[chr(219), '\\^U' ], #Û
[chr(220), '\\"U' ], #Ü
[chr(221), '\\\'Y' ], #Ý*
[chr(222), 'P' ], #Þ*
[chr(223), '"s' ], #ß
[chr(224), '\\`a' ], #á
[chr(225), '\\\'a' ], #à
[chr(226), '\\^a' ], #â
[chr(227), '\\~a' ], #ã
[chr(228), '\\"a' ], #ä
[chr(229), '\\r{a}' ], #å
[chr(230), '{\ae}' ], #æ
[chr(231), '\\c{c}' ], #ç
[chr(232), '\\`e' ], #é
[chr(233), '\\\'e' ], #è
[chr(234), '\\^e' ], #ê
[chr(235), '\\"e' ], #ë
[chr(236), '\\`{\i}' ], #ì
[chr(237), '\\\'{\\i}' ], #í
[chr(238), '\\^{\\i}' ], #î
[chr(239), '\\"{\\i}' ], #ï
[chr(240), '\\v{o}' ], #ð
[chr(241), '\\~n' ], #ñ
[chr(242), '\\`o' ], #ò
[chr(243), '\\\'o' ], #ó
[chr(244), '\\^o' ], #ô
[chr(245), '\\~o' ], #õ
[chr(246), '\\"o' ], #ö
[chr(247), '$\\div$' ], #÷
[chr(248), '{\\o}' ], #ø
[chr(249), '\\`u' ], #ù
[chr(250), '\\\'u' ], #ú
[chr(251), '\\^u' ], #û
[chr(252), '\\"u' ], #ü
[chr(253), '\\\'y' ], #ý
[chr(254), 'p' ], #þ*
[chr(255), '\\"y' ], #ÿ
);
# complie matchings
foreach my $set (@specials) {
$set->[0] = qr|$set->[0]|;
}
################### END DEFENITIONS #######################
# Paramaters for HANDLERs and SUBs are shown as and [N]. N is the
# number of the parameter, starting with 1. So, the first paramater
# would be <1>, the second <2>, and so on. means the paramater is
# mandatory. [N] means it is optional.
##################### BEGIN METHODS ######################
# initializes options with optional configuration file
sub new {
my ($class,$conffilename) = @_;
my $conffile;
if(defined($conffilename)){
$conffile = IO::File->new("< $conffilename");
} else {
$conffile = \*DATA;
}
my $conf = XMLin($conffile,forcearray => ['tex','pre','ban']);
# moved @banned from an array to a hash for fast lookup later
my $banned_ref = $conf->{ban};
$conf->{ban} = {};
foreach my $banned (@$banned_ref){
$conf->{ban}{$banned}++;
}
# make any refrences in @tex (see handlers below) to empty strings and new lines
# Ugly, I know. Perhaps XML::Simple is too simple.
foreach my $tag (keys %{$conf->{tag}}){
foreach my $tex (@{$conf->{tag}{$tag}{tex}}){ #some derefrence, eh?
$tex = (ref($tex) ? '' : $tex); # {} => ''
$tex =~ s/\\N/\n/g; # \N => newline
# if it's a verbatim and not banned
push @{$conf->{pre}}, $tag if ($tex eq 'verbatim' && !$conf->{ban}{$tag});
}
}
#open logging files
$conf->{log} = $conf->{conf}{options} ?
FileHandle->new($conf->{options}{log},'w') :
\*STDERR;
return bless $conf,$class;
}
# converts html2latex using &texify.
# <1> The html filename or filehandle
# <2> optional second filehandle
sub html2latex {
my ($conf,$in,$out) = @_;
#global to functions called below, which is what we want
local $packages = $conf->{package} || [];
local $heads = $conf->{head} || [];
local $tags = $conf->{tag} || {};
local $options = $conf->{options} || {};
local $banned = $conf->{ban} || {};
local $pres = $conf->{pre} || {};
local $LOG = $conf->{log};
$options->{store} =~ s/^\s*~/$ENV{HOME}/ if exists $ENV{HOME};
print $LOG Dumper $conf if $options->{debug} > 1;
#open files.
my($filenamein,$filenameout);
unless(ref $in and ref $out){ #filenhadles -- leave them alone.
($in,$out,$filenamein,$filenameout) = open_files($in,1) if defined($in);
}
#if you have a uri and it exists
#build the HTML tree
if($in && $out){
my $tree = HTML::TreeBuilder->new;
$tree->warn(1);
my $result = $tree->parse_file($in);
#here's where all the big magic happens
print $out &preamble_handler($tree->root);
#destroy the HTML tree
$tree->delete;
return ($filenamein,$filenameout) if ($filenamein && $filenameout);
return $result; #If you recieved filehandles, just return the return of $tree->parse
} else {
# print $LOG "You better give html2latex() a valid filename if you want it to do anything.\n";
return;
}
}
sub parse_string {
my ($conf,$input,$full) = @_;
return unless defined($input);
local $packages = $conf->{package} || [];
local $heads = $conf->{head} || [];
local $tags = $conf->{tag} || {};
local $options = $conf->{options} || {};
local $banned = $conf->{ban} || {};
local $pres = $conf->{pre} || {};
local $LOG = $conf->{log};
$options->{store} =~ s/^\s*~/$ENV{HOME}/ if exists $ENV{HOME};
print $LOG Dumper $conf if $options->{debug} > 1;
my $tree = HTML::TreeBuilder->new;
$tree->warn(1);
$tree->parse($input);
my $result;
if($full){
$result = preamble_handler($tree->root); # Print whole file
} else {
$result = texify($tree->find_by_tag_name('body'));
}
$tree->delete;
return $result;
}
# set options for running html2latex
# <1> is a hash refrence of options
sub set_option {
my ($conf,$options) = @_;
my @old_values = ();
while(my ($key,$value) = each %$options){
if(defined($value)){
push @old_values, $conf->{options}{$key};
$conf->{options}{$key} = $value;
}
}
}
sub add_package {
my $conf = shift;
push @{$conf->{package}}, @_;
}
sub add_head {
my $conf = shift;
push @{$conf->{head}}, @_;
}
sub ban_tag {
my $conf = shift;
foreach my $banned (@_){
$conf->{ban}{$banned}++;
}
}
#set log file to $logfile
#return FileHandle to log file.
sub set_log {
my ($conf,$logfile) = @_;
if(ref $logfile){
$conf->{log} = $logfile;
} else {
$conf->{log} = FileHandle->new($logfile,'w') or
die "FILE: Bad logfile: $logfile";
}
return $conf->{log};
}
##################### END HANDLERS #########################
##################### BEGIN HANDLERS #######################
# All HANDLERs are called like so:
# &HANDLER($html_element,@tex);
# @tex is a list of latex strings $html_elmemnt is a node in the HTML
# tree. HTML::ELement man page for more on that.
# Anyway, the comments for each HANDLER represent the starting HTML
# string and the output tex string. Anything inbetween HTML tags is
# recursivly texified by the big sub &texify, which then calls other
# HANDLERs.
# HTML input form: Bar
# Latex output form: \command{bar}
sub command_handler{
my($html_element,$command) = @_;
return "\\$command\{" . texify($html_element) . "\}\n";
}
# HTML input form: Bar
# Latex output form: tex1 bar tex2
sub other_handler{
my($html_element,@tex) = @_;
return $tex[0] . texify($html_element) . $tex[1];
}
# HTML input form: Bar
# Latex output form: \begin{tex} Bar \end{tex}
sub environment_handler{
my($html_element,$environment) = @_;
return '\begin{' . $environment . '}' . "\n" .
texify($html_element) . "\n" . '\end{' . $environment . '}' . "\n";
}
# HTML input form: Bar (implicit end)
# Latex output form: \tex Bar
sub single_handler{
my($html_element,$single) = @_;
return $single . " " . texify($html_element) . "\n";
}
# HTML input form:
Bar
# Latex output has all of the spaces made into hard spaces and
# newlines into hard newlines. It's the best I can do since latex
# doesn't want to respect whitespace. It's very experimental. One
# should really just use the verbatim environment, but what the heck,
# give people the option.
sub pre_handler{
my($html_elemnt) = shift;
my $text = $html_elemnt->as_text;
$text =~ s/[ ]/\\ /og;
$text =~ s/\n/\\\\\n/og;
return $text;
}
# Does a lot of work to create a table in latex format.
# It takes , , and . It works by finding those tags nested inside
# and then calling texify on them while keepind track of when to print
# latex syntax. It's messy, I know. Nested tables are completely
# ignored, and anything inside a table but not inside of a | tag is
# also ignored. If anyone would like to improve this, that would be
# very cool.
# <3> The HTML::Element representing the table. It doesn't use
# $content_ref, so you don't really need it.
sub table_handler{
my($html_element,$tex) = @_;
my $output = "";
if($tex eq "table"){
# It's a table tag
$output = ($options->{mbox}? '\mbox{' : '') .
create_latex_table($html_element) . ($options->{mbox}? '}' : '');
} else {
# It's a td or tr, let create_latex_table() take care of "\\" and "&"
# add the texified text inside
$output = texify($html_element);
}
return $output;
}
# HTML input form:
# Latex output form: \includegraphic{bar.png}
# In also converts the image to a .png using "convert".
# <3> The HTML::Element representing the tag. It doesn't use
# $content_ref, so you don't really need it.
sub image_handler{
my($html_element,$tex) = @_;
my $source = $html_element->attr('src') || "";
my $scale = $html_element->attr('scale') || $options->{image};
my $alt = $html_element->attr('alt') || "";
if($scale and my $image = convert_image($source,$scale)){
# convert worked
return "\\$tex\[scale=$scale\]\{$image\} ";
} else {
#convert didn't work or images weren't selected.
print $LOG "IMG: Couldn't convert $source; using alt\n";
print $LOG "\tRecieved <$image>\n" if $options->{debug};
return $alt;
}
}
# Prints the preamble. Not to extensize right now, but will become
# very extensive if I decide to parse stuff in the HEAD tag.
sub preamble_handler{
my($html_element,$tex) = @_;
my $document_class = $html_element->attr('class') || $options->{document_class} || 'article';
my $font_size = $html_element->attr('fontsize') || $options->{font_size} || 10;
my $output;
$output .= join ('',
'\documentclass[',
join (",","${font_size}pt",@$heads),
']{',
$document_class,
'}',
"\n",
'\usepackage{',
join(", ",@$packages),
'}',
"\n"
);
$output .= join ('',
'\setlength{\parskip}{1ex}',
"\n",
'\setlength{\parindent}{0ex}',
"\n",
) if $options->{paragraph};
$output .= texify($html_element);
return $output;
}
###################### END HANDLERS #######################
# Takes in an array of HTML::Element-es which calls a handler on all of its
# children, which calls texify recursively, and eventually makes a
# string.
sub texify {
my $parent_element = shift;
my $output = "";
foreach my $html_element ($parent_element->content_list){
if(ref $html_element){
# If this element is another HTML::Element
my $tag = $html_element->tag();
print $LOG "\t" x ($html_element->depth - 1) . "<$tag> " if $options->{debug};
if(my $tag_hash_ref = $tags->{$tag} and !$banned->{$tag}){
# If the tag is used with a handler and it isn't banned, use it.
my $handler_ref = $types{$tag_hash_ref->{type}} or
die "<$tag> needs a proper type (not $tag_hash_ref->{type})\n";
my @tex = @{$tag_hash_ref->{tex} || []};
print $LOG "is of type $tag_hash_ref->{type}: calling handler with \"" .
join(",",@tex) . "\"\n" if $options->{debug};
$output .= $handler_ref->($html_element,@tex);
} else {
# Otherwise, just texify the contents;
print $LOG "has no type \n" if $options->{debug};
$output .= texify($html_element);
}
} else {
# Otherwise, it's just a string
print $LOG "\t" x ($parent_element->depth + 1), $html_element if $options->{debug} > 1;
unless($parent_element->is_inside(@$pres)){
#don't change any characters if inside a tag such as PRE.
#Quote expansion needs more finese.
$html_element =~ s/([^\s\[\{\(~])"/$1''/og; #" preceded by character not \s,[,{,or [
$html_element =~ s/"/``/og;
foreach my $special (@specials){
$html_element =~ s/$special->[0]/$special->[1]/g;
}
}
$output .= urlify($html_element);
}
}
return $output;
}
# opens necessary files
# <1> The base of the filename
sub open_files {
return unless (my $htmlfile = get_uri(@_));
#if filename has anything .*html, then remove the extension
my ($filename,$path,$suffix) = fileparse($htmlfile,'\.\w*html?');
my $texfile = "$path$filename.tex";
check_for_previous_files($texfile);
my $fh_in = FileHandle->new("< $htmlfile") or die "Can't open $htmlfile: $!";
my $fh_out = FileHandle->new("> $texfile") or die "Can't open $texfile: $!";
print $LOG "FILE: Processing $htmlfile and writing to $texfile\n";
return ($fh_in,$fh_out,$htmlfile,$texfile);
}
# checks for existance of file and moves it to name .old .
# <1> The filename
# [2] whether files should be renamed and overridden or just left alone.
# default is rename override
# returns whether the file exists or not
sub check_for_previous_files {
my $filename = shift;
my $override = shift || 1;
if(-f $filename && $override){
rename $filename, "$filename.old";
print $LOG "FILE: renamed $filename $filename.old\n";
}
return $filename;
}
# checks for existance of file and prints that it successfully created it.
# <1> filename
# [2] error to print if didn't create;
sub check_for_current_files {
my $filename = shift;
if( -f $filename){
print $LOG "FILE: Successfully created $filename\n";
return $filename;
}
else{
print $LOG "FILE: Failed to create $filename\n";
return;
}
}
# Creates a latex table from an html table using the other table sub procedures.
# <1> The $html_element that is a table tag.
# Returns the table in latex string form
sub create_latex_table {
my $table = shift;
my $output;
my($latex_table_ref,$row_number,$column_number) = create_latex_table_def($table);
my $border = $table->attr('border') || $options->{border};
$output .= "\n\n" . '\begin{tabular}{' . $latex_table_ref . '}' . "\n";
$output .= "\\hline \n" if $border;
#pay attention to only the TR tags inside the TABLE tag.
my @rows = grep 'tr', $table->content_list;
foreach my $row (@rows){
#pay attention to only the TD tags inside the TR tags.
my @columns = grep 'td', $row->content_list;
for my $i (0 .. $column_number - 1){
# Make Sure to fill in blank ones if necessary
my $column = $columns[$i];
# Add the td data
$output .= texify($column) if $column;
# Add the puncation at the end if not the last one
$output .= (($i < $column_number -1)? " &" : "");
}
# Add the puncation at the end if not the last one
$output .= (($row->pindex() < $row_number -1 or $border)? " \\\\" : "") . "\n";
$output .= " \\hline \n" if $border;
}
$output .= "\n" . '\end{tabular}' . "\n\n";
return $output;
}
# Based on the alignments of the rows, create a latex table defenition (i.e. "cccc")
# <1> The the number of columns the table has;
# <2> A refrence to an array with alignment defenitions
# <3> Whether it has a border or not;
# Returns the table definition, the number of columns and the number of rows
sub create_latex_table_def {
# get variables
my $table = shift;
my $border = $table->attr('border') || $options->{border};
my ($row_number,$column_number) = find_table_lengths($table);
my @column_alignments = create_column_alignments($table);
# define table_def
my $latex_table_def = ($border? "|" : "");
for my $i (0 .. $column_number - 1){
my $align = $column_alignments[$i];
$latex_table_def .= ($align? ($border? $align . "|" : $align) : ($border? "c|" : "c"));
}
return ($latex_table_def,$row_number,$column_number);
}
# Finds the maximum number of columns that any row in a table has
# and also the number of rows it has.
# <1> the refrence to the HTML::Element table.
sub find_table_lengths {
my $table = shift;
#only care about TR children
my @rows = grep 'tr', $table->content_list;
my $max_row_length = 0;
foreach my $row (@rows){
#only care about the TD children
my @columns = grep 'td', $row->content_list;
if(@columns > $max_row_length){
$max_row_length = @columns;
}
}
# row_number column_number
return (scalar(@rows),$max_row_length);
}
# returns an array of column alignments
# <1> the refrence to the HTML::Element table.
sub create_column_alignments {
my $table = shift;
my @column_alignments;
#only care about TR children
my @rows = grep 'tr', $table->content_list;
if($rows[0]){
#only care about the TD children
my @columns = grep 'td', $rows[0]->content_list;
foreach my $column (@columns){
my $align = $column->attr('align');
if($align and $align eq 'left'){
$align = 'l';
} elsif($align and $align eq 'right'){
$align = 'r';
} else {
$align = 'c';
}
push @column_alignments, $align;
}
}
return @column_alignments;
}
# converts an image from jpeg or gif into png
# returns the name of the new filename is successfull
# <1> filename
sub convert_image {
my $source = shift;
my($absolute,$relative) = get_uri($source);
if ($absolute and $relative){ #If we can find the file
#if it successfully stores the file
my ($aname,$apath,$asuffix) = fileparse($absolute,'\.(gif|png|jpe?g)');
my ($rname,$rpath,$rsuffix) = fileparse($relative,'\.(gif|png|jpe?g)');
if($asuffix eq '.gif' || $asuffix eq '.jpg' || $asuffix eq '.jpeg'){ #
# Picture is of a convertable type
if($present{'Image::Magick'}){
# convert it with Image::Magick
require Image::Magick;
my $aoutput = "$apath$aname.png"; #write to and return with png
my $routput = "$rpath$rname.png";
my $image = Image::Magick->new();
$image->Read("$absolute");
$image->Write("$aoutput");
undef $image;
print $LOG "IMG: Converted $source to $routput\n";
return $routput;
} else {
# No Image::Magick. Warn user and return nothing.
print $LOG "IMG: Can't convert $source without Image::Magick; using alt\n";
return;
}
} elsif ($asuffix eq '.png'){
# It's a PNG for sure.
my $routput = "$rpath$rname.png";
return $routput;
} else {
# so, it's not a png,gif, or jpg. That means it's an invalid.
print $LOG "IMG: Invalid picture type: $source; using alt\n";
return;
}
} else {
# We can't even get at the file.
return;
}
}
# If the filename is really a URL, then go grab it, translate
# the name to the local file directory, and return that file name.
# Otherwise, just return the thing you got in.
# <1> is the URI
# [2] can specify to change the default host for subsiquent calls
# return ($absolute_path_to_file,$relative_path_to_file);
# The relative can be absolute itself (same as $absolute).
{
#variables to stay the same across calls of get_uri. It's used in
#case we get image URLs with no host or scheme or path.
my $HOST = undef; #global value of current HOST
my $PATH = undef; #path inside host where we start
my $SCHEME = undef; #scheme originally used
sub get_uri {
my ($uri,$absolute_local,$relative_local);
$uri = $absolute_local = $relative_local = shift;
print $LOG "looking for $uri\n" if $options->{debug};
my $override = shift || 0; #absolute means that you replace $HOST and $PATH
if(-f $uri){
# it's an absolute local file.
$PATH = dirname($uri) if $override;
print $LOG "returning $uri\n" if $options->{debug};
return ($uri,$uri);
} elsif(defined($PATH) && -f "$PATH/$uri") {
#it must be a local relative image
print $LOG "returning $PATH/$uri\n" if $options->{debug};
return ("$PATH/$uri",$uri);
} elsif($uri =~ m|://|){
#It's a full URL
# Load necessary modules if you can.
unless($present{'URI'}) {
print $LOG "NEED: Can't handle request of $uri without module URI\n";
return;
}
require URI;
URI->import();
unless($present{'LWP::Simple'}) {
print $LOG "NEED: Can't handle request of $uri without module LWP::Simple\n";
return;
}
require LWP::Simple;
LWP::Simple->import();
$uri = new URI($uri);
my ($path,$filename) = ($uri->path =~ m|(.*/)(.*)|);
#replace the host,host_path, and scheme if it doesn't have a value and we're allowed to
print $LOG "It's a full URL\n" if $options->{debug};
if($override){
$HOST = $uri->host;
$PATH = $path || '/';
$SCHEME = $uri->scheme;
print $LOG "Setting HOST to $HOST, PATH to $PATH, and SCHEME to $SCHEME\n" if $options->{debug};
}
my $absolute = ($options->{store} || '.') . '/' . ($uri->host || "") . ($path || "/") . ($filename || "index.html");
if(store_uri($uri,$absolute)){ #Now, download the file. If it fails, return 0.
print $LOG "returning $absolute\n" if $options->{debug};
return ($absolute,$absolute);
} else {
return;
}
} elsif(defined($HOST) && defined($SCHEME)){
#It's a partial URL.
if($uri =~ m|^/|){
#it's an absolute partial URL
my $absolute_uri = $SCHEME . '://' . $HOST . $uri;
$absolute_local = ($options->{store} || '.') . '/' . $HOST . $uri;
if(store_uri($absolute_uri,$absolute_local)){ #Now, download the file. If it fails, return nothing.
return($absolute_local,$absolute_local);
} else {
return;
}
} else {
#it's a relative partial URL
my $absolute_uri = $SCHEME . '://' . $HOST . $PATH . $uri;
$absolute_local = ($options->{store} || '.') . '/' . $HOST . $PATH . $uri;
if(store_uri($absolute_uri,$absolute_local)){ #Now, download the file. If it fails, return nothing.
return($absolute_local,$uri);
} else {
return;
}
}
} else {
print $LOG "FILE: Unable to access $uri\n";
return;
}
}
}
# store a URI as a local file, and create a path if necessary
# <1> The URI
# <2> The file to store it in
# returns the base of a filename
sub store_uri {
my ($uri,$localfile) = @_;
my ($name,$path) = fileparse($localfile);
if(-f $localfile && $options->{cache}){
#Use localfile if it's cached and caching is allowed
print $LOG "URI: Using $localfile for $uri. See -h to stop cacheing\n";
return $localfile;
} else {
#Override localfile if new.
mkpath($path,1) if (head($uri));
if (is_error(mirror($uri,$localfile))){
print $LOG "URI: Unable to access $uri\n";
return;
} else {
print $LOG "URI: Mirrored $uri in $localfile\n";
return $localfile;
}
}
}
# replaces URL with \url{URL}. This code is taken right from the Perl
# Cookbook, which I reccomend. Honestly, I'm not quite sure how it
# works; but, it does.
# <1> string to urlify.
{
# I think putting them here will prevent them from needing to be
# loaded into memory after each function call.
my $urls = '(http|telnet|gopher|file|wais|ftp)';
my $ltrs = '\w';
my $gunk = '/#~:.?+=&%@!\-';
my $punc = '.:?\-';
my $any = "${ltrs}${gunk}${punc}";
sub urlify {
$_[0] =~ s!\b($urls:[$any]+?)(?=[$punc]*[^$any]|$)!\\url{$1}!igox;
return $_[0];
}
}
1; #package must return true.
########################## END SUBS #############################
__DATA__
textbf
document
\\
quote
center
verbatim
\N
description
\item[
]
emph
section*
subsection*7
subsubsection*
textbf
textbf
textbf
\hline
emph
includegraphics
\item
enumerate
\N\N
verbatim
textbf
table
tr
title
td
itemize
~/.html2latex
0
article
1
10
1
0
0
0
fullpage
graphicx
url
code
| |