#!/usr/bin/perl
# urlify -- tchrist@perl.com
require 5.002; # well, or 5.000 if you see below
$urls = '(' . join ('|', qw{
http
telnet
gopher
file
wais
ftp
} )
. ')';
$ltrs = '\w';
$gunk = '/#~:.?+=&%@!\-';
$punc = '.:?\-';
$any = "${ltrs}${gunk}${punc}";
while (<>) {
## use this if early-ish perl5 (pre 5.002)
## s{\b(${urls}:[$any]+?)(?=[$punc]*[^$any]|\Z)}{$1}goi;
## otherwise use this -- it just has 5.002ish comments
s{
\b # start at word boundary
( # begin $1 {
$urls : # need resource and a colon
[$any] +? # followed by on or more
# of any valid character, but
# be conservative and take only
# what you need to....
) # end $1 }
(?= # look-ahead non-consumptive assertion
[$punc]* # either 0 or more puntuation
[^$any] # followed by a non-url char
| # or else
$ # then end of the string
)
}{$1}igox;
print;
}