/usr/share/perl5/MIME/Lite/HTML.pm is in libmime-lite-html-perl 1.24-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
| package MIME::Lite::HTML;
# module MIME::Lite::HTML : Provide routine to transform a HTML page in
# a MIME::Lite mail
# Copyright 2001/2011 A.Barbet alian@cpan.org. All rights reserved.
# $Log: HTML.pm,v $
# Revision 1.24 2011/10/07 11:27:42 alian
#
# Revision 1.24 2011/10/07 11:27:42 alian
# - Fix rt#67695 Add feature: "ExternImages" parameter to constructor (tbriggs)
# - Fix rt#68303 Outdated COPYING file
# - Fix rt#52907 CSS (and likely other) links match double-quote only
# - Fix rt#41447 Unable to call replace function
# - Fix rt#40164 Removing script code often fails
# - Fix bug when HTTP result is gzip format (use decoded_content, tks to E.Bataille
#
# Revision 1.23 2008/10/14 11:27:42 alian
# - Fix rt#36006: cid has no effect on background images
# - Fix rt#36005: include_javascript does not remove closing tag "</SCRIPT>"
# - Fix rt#29033: eliminate nested subs
# Revision 1.22 2006/09/06 14:46:42 alian
# - Fix rt#19656: unknown URI schemes cause rewrite to fail
# - Fix rt#17385: make test semi-panics
# - Fix rt#7841: Text-Only Encoding Ignored
# - Fix rt#21339: no license or copyright information provided
# - Fix rt#19655: include_css is far too aggressive
#
# Revision 1.21 2004/04/15 22:59:33 alian
# fix for 1.20 and bad ref for tests
#
# Revision 1.20 2004/04/14 21:26:51 alian
# - fix error on last version
#
# Revision 1.19 2004/03/16 15:18:57 alian
# - Add Url param in new for direct call of parse & send
# - Correct a problem in parsing of html elem background
# - Re-indent some methods
#
# Revision 1.18 2003/08/08 09:37:42 alian
# Fix test case and cid method
#
# Revision 1.17 2003/08/07 16:55:08 alian
# - Fix test case (hostname)
# - Update POD documentation
#
# Revision 1.16 2003/08/07 00:07:57 alian
# - Use pack for include type == cid: RFC says no '/'.
# Tks to Cláudio Valente for report.
# - Add a __END__ statement before POD documentation.
#
# Revision 1.15 2002/10/19 17:54:32 alian
# - Correct bug with relative anchor '/'. Tks to Keith D. Zimmerman for
# report.
#
# See Changes files for older changes
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
use MIME::Lite;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw();
$VERSION = ('$Revision: 1.24 $ ' =~ /(\d+\.\d+)/)[0];
my $LOGINDETAILS;
#------------------------------------------------------------------------------
# redefine get_basic_credentials
#------------------------------------------------------------------------------
{
package RequestAgent;
use vars qw(@ISA);
@ISA = qw(LWP::UserAgent);
sub new {
my $self = LWP::UserAgent::new(@_);
$self;
}
sub get_basic_credentials {
my($self, $realm, $uri) = @_;
# Use parameter of MIME-Lite-HTML, key LoginDetails
if (defined $LOGINDETAILS) { return split(':', $LOGINDETAILS, 2); }
# Ask user on STDIN
elsif (-t) {
my $netloc = $uri->host_port;
print "Enter username for $realm at $netloc: ";
my $user = <STDIN>;
chomp($user);
# 403 if no user given
return (undef, undef) unless length $user;
print "Password: ";
system("stty -echo");
my $password = <STDIN>;
system("stty echo");
print "\n"; # because we disabled echo
chomp($password);
return ($user, $password);
}
# Damm we got 403 with CGI (use param LoginDetails) ...
else { return (undef, undef) }
}
}
#------------------------------------------------------------------------------
# new
#------------------------------------------------------------------------------
sub new {
my $class = shift;
my $self = {};
bless $self, $class;
my %param = @_;
# Agent name
$self->{_AGENT} = new RequestAgent;
$self->{_AGENT}->agent("MIME-Lite-HTML $VERSION");
$self->{_AGENT}->from('mime-lite-html@alianwebserver.com' );
# remove javascript code or no ?
if ($param{'remove_jscript'}) {
$self->{_remove_jscript} = 1;
} else { $self->{_remove_jscript} = 0; }
# Set debug level
if ($param{'Debug'}) {
$self->{_DEBUG} = 1;
delete $param{'Debug'};
}
# Set Login information
if ($param{'LoginDetails'}) {
$LOGINDETAILS = $param{'LoginDetails'};
delete $param{'LoginDetails'};
}
# Set type of include to do
if ($param{'IncludeType'}) {
die "IncludeType must be in 'extern', 'cid' or 'location'\n" if
( ($param{'IncludeType'} ne 'extern') and
($param{'IncludeType'} ne 'cid') and
($param{'IncludeType'} ne 'location'));
$self->{_include} = $param{'IncludeType'};
delete $param{'IncludeType'};
} # Defaut type: use a Content-Location field
else {$self->{_include}='location';}
# Get regexps for images that should be external
if (defined $param{'ExternImages'}) {
$self->{_externimages} = $param{'ExternImages'};
}
## Added by Michalis@linuxmail.org to manipulate non-us mails
if ($param{'TextCharset'}) {
$self->{_textcharset}=$param{'TextCharset'};
delete $param{'TextCharset'};
} else { $self->{_textcharset}='iso-8859-1'; }
if ($param{'HTMLCharset'}) {
$self->{_htmlcharset}=$param{'HTMLCharset'};
delete $param{'HTMLCharset'};
} else { $self->{_htmlcharset}='iso-8859-1'; }
if ($param{'TextEncoding'}) {
$self->{_textencoding}=$param{'TextEncoding'};
delete $param{'TextEncoding'};
} else { $self->{_textencoding}='7bit'; }
if ($param{'HTMLEncoding'}) {
$self->{_htmlencoding}=$param{'HTMLEncoding'};
delete $param{'HTMLEncoding'};
} else { $self->{_htmlencoding}='quoted-printable'; }
## End. Default values remain as they were initially set.
## No need to change existing scripts if you send US-ASCII.
## If you DON't send us-ascii, you wouldn't be able to use
## MIME::Lite::HTML anyway :-)
# Set proxy to use to get file
if ($param{'Proxy'}) {
$self->{_AGENT}->proxy('http',$param{'Proxy'}) ;
print "Set proxy for http : ", $param{'Proxy'},"\n"
if ($self->{_DEBUG});
delete $param{'Proxy'};
}
# Set hash to use with template
if ($param{'HashTemplate'}) {
$param{'HashTemplate'} = ref($param{'HashTemplate'}) eq "HASH"
? $param{'HashTemplate'} : %{$param{'HashTemplate'}};
$self->{_HASH_TEMPLATE}= $param{'HashTemplate'};
delete $param{'HashTemplate'};
}
# Ok I hope I known what I do ;-)
MIME::Lite->quiet(1);
# direct call of new parse & send
my $url;
if ($param{'Url'}) {
$url = $param{'Url'};
delete $param{'Url'};
}
$self->{_param} = \%param;
if ($url) {
my $m = $self->parse($url);
$m->send;
}
return $self;
}
#------------------------------------------------------------------------------
# absUrl
#------------------------------------------------------------------------------
sub absUrl($$) {
# rt 19656 : unknown URI schemes cause rewrite to fail
my $rep = eval { URI::WithBase->new($_[0], $_[1])->abs; };
return ($rep ? $rep : $_[0]);
}
# Replace in HTML link with image with cid:key
sub pattern_image_cid {
my $sel = shift;
return '<img '.$_[0].'src="cid:'.$sel->cid(absUrl($_[1],$_[2])).'"';
}
# Replace relative url for image with absolute
sub pattern_image {
return '<img '.$_[0].'src="'.absUrl($_[1],$_[2]).'"';
}
sub pattern_href {
my ($url,$balise, $sep)=@_;
my $b=" $balise=\"$url\"";
$b.=$sep if ($sep ne '"' and $sep ne "'");
return $b;
}
#------------------------------------------------------------------------------
# parse
#------------------------------------------------------------------------------
sub parse
{
my($self,$url_page,$url_txt,$url1)=@_;
my ($type,@mail,$gabarit,$gabarit_txt,$racinePage);
# Get content of $url_page with LWP
if ($url_page && $url_page=~/^(https?|ftp|file|nntp):\/\//)
{
print "Get ", $url_page,"\n" if $self->{_DEBUG};
my $req = new HTTP::Request('GET' => $url_page);
my $res = $self->{_AGENT}->request($req);
if (!$res->is_success)
{$self->set_err("Can't fetch $url_page (".$res->message.")");}
else {$gabarit = $res->content;}
$racinePage=$url1 || $res->base;
}
else {$gabarit=$url_page;$racinePage=$url1;}
# Get content of $url_txt with LWP if needed
if ($url_txt)
{
if ($url_txt=~/^(https?|ftp|file|nntp):\/\//)
{
print "Get ", $url_txt,"\n" if $self->{_DEBUG};
my $req2 = new HTTP::Request('GET' => $url_txt);
my $res3 = $self->{_AGENT}->request($req2);
if (!$res3->is_success)
{$self->set_err("Can't fetch $url_txt (".$res3->message.")");}
else {$gabarit_txt = $res3->content;}
}
else {$gabarit_txt=$url_txt;}
}
goto BUILD_MESSAGE unless $gabarit;
# Get all multimedia part (img, flash) for later create a MIME part
# for each of them
my $analyseur = HTML::LinkExtor->new;
$analyseur->parse($gabarit);
my @l = $analyseur->links;
# Include external CSS files
$gabarit = $self->include_css($gabarit,$racinePage);
# Include external Javascript files
$gabarit = $self->include_javascript($gabarit,$racinePage);
# Include form images
($gabarit,@mail) = $self->input_image($gabarit,$racinePage);
# Change target action for form
$gabarit = $self->link_form($gabarit,$racinePage);
# Scan each part found by linkExtor
my (%images_read,%url_remplace);
foreach my $url (@l) {
my $urlAbs = absUrl($$url[2],$racinePage);
chomp $urlAbs; # Sometime a strange cr/lf occur
# Replace relative href found to absolute one
if ( ($$url[0] eq 'a') && ($$url[1] eq 'href') && ($$url[2]) &&
(($$url[2]!~m!^http://!) && # un lien non absolu
($$url[2]!~m!^mailto:!) && # pas les mailto
($$url[2]!~m!^\#!)) && # ni les ancres
(!$url_remplace{$urlAbs}) ) # ni les urls deja remplacees
{
$gabarit=~s/\s href \s* = \s* [\"']? \Q$$url[2]\E ([\"'>])
/pattern_href($urlAbs,"href",$1)/giemx;
print "Replace ",$$url[2]," with ",$urlAbs,"\n"
if ($self->{_DEBUG});
$url_remplace{$urlAbs}=1;
}
# For frame & iframe
elsif ( (lc($$url[0] eq 'iframe') || lc($$url[0] eq 'frame')) &&
(lc($$url[1]) eq 'src') && ($$url[2]) )
{
$gabarit=~s/\s src \s* = \s* [\"']? \Q$$url[2]\E ([\"'>])
/pattern_href($urlAbs,"src",$1)/giemx;
print "Replace ",$$url[2]," with ",$urlAbs,"\n"
if ($self->{_DEBUG});
$url_remplace{$urlAbs}=1;
}
# For background images
elsif ((lc($$url[1]) eq 'background') && ($$url[2])) {
# Replace relative url with absolute
my $v = ($self->{_include} eq 'cid') ?
"cid:".$self->cid($urlAbs) : $urlAbs;
$gabarit=~s/background \s* = \s* [\"']? \Q$$url[2]\E ([\"'>])
/pattern_href($v,"background",$1)/giemx;
# Exit with extern configuration, don't include image
# else add part to mail
if (($self->{_include} ne 'extern')&&(!$images_read{$urlAbs})
and not $self->_matches_extern_images( $urlAbs ) )
{
$images_read{$urlAbs} = 1;
push(@mail, $self->create_image_part($urlAbs));
}
}
# For flash part (embed)
elsif (lc($$url[0]) eq 'embed' && $$url[4])
{
# rebuild $urlAbs
$urlAbs = absUrl($$url[4],$racinePage);
# Replace relative url with absolute
my $v = ($self->{_include} eq 'cid') ?
"cid:$urlAbs" : $urlAbs;
$gabarit=~s/src \s = \s [\"'] \Q$$url[4]\E ([\"'>])
/pattern_href($v,"src",$1)/giemx;
# Exit with extern configuration, don't include image
if (($self->{_include} ne 'extern')&&(!$images_read{$urlAbs})
and not $self->_matches_extern_images( $urlAbs ) )
{
$images_read{$urlAbs}=1;
push(@mail, $self->create_image_part($urlAbs));
}
}
# For flash part (object)
# Need to add "param" to Tagset.pm in the linkElements definition:
# 'param' => ['name', 'value'],
# Tks to tosh@c4.ca for that
elsif (lc($$url[0]) eq 'param' && lc($$url[2]) eq 'movie'
&& $$url[4]) {
# rebuild $urlAbs
$urlAbs = absUrl($$url[4],$racinePage);
# Replace relative url with absolute
my $v = ($self->{_include} eq 'cid') ?
"cid:".$self->cid($urlAbs) : $urlAbs;
$gabarit=~s/value \s* = \s* [\"'] \Q$$url[4]\E ([\"'>])
/pattern_href($v,"value",$1)/giemx;
# Exit with extern configuration, don't include image
if (($self->{_include} ne 'extern')&&(!$images_read{$urlAbs})
and not $self->_matches_extern_images($urlAbs))
{
$images_read{$urlAbs}=1;
push(@mail, $self->create_image_part($urlAbs));
}
}
# For new images create part
# Exit with extern configuration, don't include image
elsif ( ($self->{_include} ne 'extern') &&
( not $self->_matches_extern_images( $urlAbs ) ) &&
((lc($$url[0]) eq 'img') || (lc($$url[0]) eq 'src')) &&
(!$images_read{$urlAbs})) {
$images_read{$urlAbs}=1;
push(@mail, $self->create_image_part($urlAbs));
}
}
# If cid choice, put a cid + absolute url on each link image
if ($self->{_include} eq 'cid')
{$gabarit=~s/<img ([^<>]*) src\s*=\s*(["']?) ([^"'> ]* )(["']?)
/pattern_image_cid($self,$1,$3,$racinePage)/iegx;}
# Else just make a absolute url
else {$gabarit=~s/<img ([^<>]*) src\s*=\s*(["']?)([^"'> ]*) (["']?)
/pattern_image($1,$3,$racinePage)/iegx;}
BUILD_MESSAGE:
# Substitue value in template if needed
if (scalar keys %{$self->{_HASH_TEMPLATE}}!=0)
{
$gabarit=$self->fill_template($gabarit,$self->{_HASH_TEMPLATE})
if ($gabarit);
$gabarit_txt=$self->fill_template($gabarit_txt,
$self->{_HASH_TEMPLATE});
}
# Create MIME-Lite object
$self->build_mime_object($gabarit, $gabarit_txt || undef, \@mail);
return $self->{_MAIL};
}
#------------------------------------------------------------------------------
# size
#------------------------------------------------------------------------------
sub size {
my ($self)=shift;
return length($self->{_MAIL}->as_string);
}
#------------------------------------------------------------------------------
# _matches_extern_images
#
# For a given image, does it match any of the regexps in $self->{_externimages} ?
#------------------------------------------------------------------------------
sub _matches_extern_images {
my ( $self, $image ) = @_;
my $regexps = $self->{_externimages} || [ ];
foreach my $regexp ( @$regexps ) {
if ( $image =~ /$regexp/ ) {
return 1;
}
}
return 0;
}
#------------------------------------------------------------------------------
# build_mime_object
#------------------------------------------------------------------------------
sub build_mime_object {
my ($self,$html,$txt,$ref_mail)=@_;
my ($txt_part, $part,$mail);
# Create part for HTML if needed
if ($html) {
my $ref = ($txt || @$ref_mail) ? {} : $self->{_param};
$part = new MIME::Lite(%$ref,
'Type' => 'TEXT',
'Encoding' => $self->{_htmlencoding},
'Data' => $html);
$part->attr("content-type"=> "text/html; charset=".$self->{_htmlcharset});
# Remove some header for Eudora client in HTML and related part
$part->replace("MIME-Version" => "");
$part->replace('X-Mailer' =>"");
$part->replace('Content-Disposition' =>"");
# only html, no images & no txt
$mail = $part unless ($txt || @$ref_mail);
}
# Create part for text if needed
if ($txt) {
my $ref = ($html ? {} : $self->{_param} );
$txt_part = new MIME::Lite (%$ref,
'Type' => 'TEXT',
'Data' => $txt,
'Encoding' => $self->{_textencoding});
$txt_part->attr("content-type" =>
"text/plain; charset=".$self->{_textcharset});
# Remove some header for Eudora client
$txt_part->replace("MIME-Version" => "");
$txt_part->replace("X-Mailer" => "");
$txt_part->replace("Content-Disposition" => "");
# only text, no html
$mail = $txt_part unless $html;
}
# If images and html and no text, multipart/related
if (@$ref_mail and !$txt) {
my $ref=$self->{_param};
$$ref{'Type'} = "multipart/related";
$mail = new MIME::Lite (%$ref);
# Attach HTML part to related part
$mail->attach($part);
# Attach each image to related part
foreach (@$ref_mail) {$mail->attach($_);} # Attach list of part
$mail->replace("Content-Disposition" => "");
}
# Else if html and text and no images, multipart/alternative
elsif ($txt and !@$ref_mail) {
my $ref=$self->{_param};
$$ref{'Type'} = "multipart/alternative";
$mail = new MIME::Lite (%$ref);
$mail->attach($txt_part); # Attach text part
$mail->attach($part); # Attach HTML part
}
# Else (html, txt and images) mutilpart/alternative
elsif ($txt && @$ref_mail) {
my $ref=$self->{_param};
$$ref{'Type'} = "multipart/alternative";
$mail = new MIME::Lite (%$ref);
# Create related part
my $rel = new MIME::Lite ('Type'=>'multipart/related');
$rel->replace("Content-transfer-encoding" => "");
$rel->replace("MIME-Version" => "");
$rel->replace("X-Mailer" => "");
# Attach text part to alternative part
$mail->attach($txt_part);
# Attach HTML part to related part
$rel->attach($part);
# Attach each image to related part
foreach (@$ref_mail) {$rel->attach($_);}
# Attach related part to alternative part
$mail->attach($rel);
}
$mail->replace('X-Mailer' => "MIME::Lite::HTML $VERSION");
$self->{_MAIL} = $mail;
}
#------------------------------------------------------------------------------
# include_css
#------------------------------------------------------------------------------
sub pattern_css {
my ($self,$url,$milieu,$fin,$root)=@_;
# if not stylesheet - rt19655
if ($milieu!~/stylesheet/i && $fin!~/stylesheet/i) {
return "<link".$milieu." href=\"$url\"".$fin.">";
}
# Don't store <LINK REL="SHORTCUT ICON"> tag. Tks to doggy@miniasp.com
if ( $fin =~ m/shortcut/i || $milieu =~ m/shortcut/i )
{ return "<link" . $milieu . "href='". $url . "'" . $fin .">"; }
# Complete url
my $ur = URI::URL->new($url, $root)->abs;
print "Include CSS file $ur\n" if $self->{_DEBUG};
my $res2 = $self->{_AGENT}->request(new HTTP::Request('GET' => $ur));
print "Ok file downloaded\n" if $self->{_DEBUG};
return '<style type="text/css">'."\n".
'<!--'."\n".$res2->decoded_content.
"\n-->\n</style>\n";
}
sub include_css(\%$$) {
my ($self,$gabarit,$root)=@_;
$gabarit=~s/<link ([^<>]*?)
href\s*=\s*["']?([^\"\' ]*)["']?([^>]*)>
/$self->pattern_css($2,$1,$3,$root)/iegmx;
print "Done CSS\n" if ($self->{_DEBUG});
return $gabarit;
}
#------------------------------------------------------------------------------
# include_javascript
#------------------------------------------------------------------------------
sub pattern_js {
my ($self,$url,$milieu,$fin,$root)=@_;
my $ur = URI::URL->new($url, $root)->abs;
print "Include Javascript file $ur\n" if $self->{_DEBUG};
my $res2 = $self->{_AGENT}->request(new HTTP::Request('GET' => $ur));
my $content = $res2->decoded_content;
print "Ok file downloaded\n" if $self->{_DEBUG};
return ($self->{_remove_jscript} ? ' ' : "\n"."<!-- $ur -->\n".
'<script '.$milieu.$fin.">\n".
'<!--'."\n".$content.
"\n-->\n</script>\n");
}
sub include_javascript(\%$$) {
my ($self,$gabarit,$root)=@_;
$gabarit=~s/<script([^>]*)src\s*=\s*"?([^\" ]*js)"?([^>]*)>[^<]*<\/script>
/$self->pattern_js($2,$1,$3,$root)/iegmx;
if ($self->{_remove_jscript}) {
$gabarit=~s/<script([^>]*)>[\s\S]*?<\/script>//iegmx;
}
print "Done Javascript\n" if $self->{_DEBUG};
return $gabarit;
}
#------------------------------------------------------------------------------
# input_image
#------------------------------------------------------------------------------
sub pattern_input_image {
my ($self,$deb,$url,$fin,$base,$ref_tab_mail)=@_;
my $ur = URI::URL->new($url, $base)->abs;
if ($self->{_include} ne 'extern')
{push(@$ref_tab_mail,$self->create_image_part($ur));}
if ($self->{_include} eq 'cid')
{return '<input '.$deb.' src="cid:'.$ur.'"'.$fin;}
else {return '<input '.$deb.' src="'.$ur.'"'.$fin;}
}
sub input_image(\%$$) {
my ($self,$gabarit,$root)=@_;
my @mail;
$gabarit=~s/<input([^<>]*)src\s*=\s*"?([^\"'> ]*)"?([^>]*)>
/$self->pattern_input_image($1,$2,$3,$root,\@mail)/iegmx;
print "Done input image\n" if $self->{_DEBUG};
return ($gabarit,@mail);
}
#------------------------------------------------------------------------------
# create_image_part
#------------------------------------------------------------------------------
sub create_image_part {
my ($self,$ur, $typ)=@_;
my ($type, $buff1);
# Create MIME type
if ($typ) { $type = $typ; }
elsif (lc($ur)=~/\.gif$/i) {$type="image/gif";}
elsif (lc($ur)=~/\.jpg$/i) {$type = "image/jpg";}
elsif (lc($ur)=~/\.png$/i) {$type = "image/png";}
else { $type = "application/x-shockwave-flash"; }
# Url is already in memory
if ($self->{_HASH_TEMPLATE}{$ur}) {
print "Using buffer on: ", $ur,"\n" if $self->{_DEBUG};
$buff1 = ref($self->{_HASH_TEMPLATE}{$ur}) eq "ARRAY"
? join "", @{$self->{_HASH_TEMPLATE}{$ur}}
: $self->{_HASH_TEMPLATE}{$ur};
delete $self->{_HASH_TEMPLATE}{$ur};
} else { # Get image
print "Get img ", $ur,"\n" if $self->{_DEBUG};
my $res2 = $self->{_AGENT}->
request(new HTTP::Request('GET' => $ur));
if (!$res2->is_success) {$self->set_err("Can't get $ur\n");}
$buff1=$res2->decoded_content;
}
# Create part
my $mail = new MIME::Lite( Data => $buff1, Encoding =>'base64');
$mail->attr("Content-type"=>$type);
# With cid configuration, add a Content-ID field
if ($self->{_include} eq 'cid') {
$mail->attr('Content-ID' =>'<'.$self->cid($ur).'>');
} else { # Else (location) put a Content-Location field
$mail->attr('Content-Location'=>$ur);
}
# Remove header for Eudora client
$mail->replace("X-Mailer" => "");
$mail->replace("MIME-Version" => "");
$mail->replace("Content-Disposition" => "");
return $mail;
}
#------------------------------------------------------------------------------
# cid
#------------------------------------------------------------------------------
sub cid (\%$) {
my ($self, $url)=@_;
# rfc say: don't use '/'. So I do a pack on it.
# but as string can get long, I need to revert it to have
# difference at begin of url to avoid max size of cid
# I remove scheme always same in a document.
$url = reverse(substr($url, 7));
return reverse(split("",unpack("h".length($url),$url))).'@MIME-Lite-HTML-'.
$VERSION;
}
#------------------------------------------------------------------------------
# link_form
#------------------------------------------------------------------------------
sub pattern_link_form {
my ($self,$deb,$url,$fin,$base)=@_;
my $type;
my $ur = URI::URL->new($url, $base)->abs;
return '<form '.$deb.' action="'.$ur.'"'.$fin.'>';
}
sub link_form
{
my ($self,$gabarit,$root)=@_;
my @mail;
$gabarit=~s/<form([^<>]*)action="?([^\"'> ]*)"?([^>]*)>
/$self->pattern_link_form($1,$2,$3,$root)/iegmx;
print "Done form\n" if $self->{_DEBUG};
return $gabarit;
}
#------------------------------------------------------------------------------
# fill_template
#------------------------------------------------------------------------------
sub fill_template {
my ($self,$masque,$vars)=@_;
return unless $masque;
my @buf=split(/\n/,$masque);
my $i=0;
while (my ($n,$v)=each(%$vars)) {
if ($v) {map {s/<\?\s\$$n\s\?>/$v/gm} @buf;}
else {map {s/<\?\s\$$n\s\?>//gm} @buf;}
$i++;
}
return join("\n",@buf);
}
#------------------------------------------------------------------------------
# set_err
#------------------------------------------------------------------------------
sub set_err {
my($self,$error) = @_;
print $error,"\n" if ($self->{_DEBUG});
my @array;
if ($self->{_ERRORS}) {
@array = @{$self->{_ERRORS}};
}
push @array, $error;
$self->{_ERRORS} = \@array;
return 1;
}
#------------------------------------------------------------------------------
# errstr
#------------------------------------------------------------------------------
sub errstr {
my($self) = @_;
return @{$self->{_ERRORS}} if ($self->{_ERRORS});
return ();
}
__END__
#------------------------------------------------------------------------------
# POD Documentation
#------------------------------------------------------------------------------
=head1 NAME
MIME::Lite::HTML - Provide routine to transform a HTML page in a MIME-Lite mail
=head1 SYNOPSIS
perl -MMIME::Lite::HTML -e '
new MIME::Lite::HTML
From => "MIME-Lite\@alianwebserver.com",
To => "alian\@cpan.org",
Url => "http://localhost/server-status";'
=head1 VERSION
$Revision: 1.23 $
=head1 DESCRIPTION
This module is a Perl mail client interface for sending message that
support HTML format and build them for you..
This module provide routine to transform a HTML page in MIME::Lite mail.
So you need this module to use MIME-Lite-HTML possibilities
=head2 What's happen ?
The job done is:
=over
=item *
Get the file (LWP) if needed
=item *
Parse page to find include images (gif, jpg, flash)
=item *
Attach them to mail with adequat header if asked (default)
=item *
Include external CSS,Javascript file
=item *
Replace relative url with absolute one
=item *
Build the final MIME-Lite object with each part found
=back
=head2 Usage
Did you alread see link like "Send this page to a friend" ?. With this module,
you can do script that to this in 3 lines.
It can be used too in a HTML newsletter. You make a classic HTML page,
and give just url to MIME::Lite::HTML.
=head2 Construction
MIME-Lite-HTML use a MIME-Lite object, and RFC2557 construction:
If images and text are present, construction use is:
--> multipart/alternative
------> text/plain
------> multipart/related
-------------> text/html
-------------> each images
If no images but text is present, this is that:
---> multipart/alternative
-------> text/plain if present
-------> text/html
If images but no text, this is:
---> multipart/related
-------> text/html
-------> each images
If no images and no text, this is:
---> text/html
=head2 Documentation
Additional documentation can be found here:
=over
=item *
MIME-lite module
=item *
RFC 822, RFC 1521, RFC 1522 and specially RFC 2557 (MIME Encapsulation
of Aggregate Documents, such as HTML)
=back
=head2 Clients tested
HTML in mail is not full supported so this module can't work with all email
clients. If some client recognize HTML, they didn't support images include in
HTML. So in fact, they recognize multipart/relative but not multipart/related.
=over
=item Netscape Messager (Linux-Windows)
100% ok
=item Outlook Express (Windows-Mac)
100% ok. Mac work only with Content-Location header. Thx to Steve Benbow for
give mr this feedback and for his test.
=item Eudora (Windows)
If this module just send HTML and text, (without images), 100% ok.
With images, Eudora didn't recognize multipart/related part as describe in
RFC 2557 even if he can read his own HTML mail. So if images are present in
HTML part, text and HTML part will be displayed both, text part in first.
Two additional headers will be displayed in HTML part too in this case.
Version 1.0 of this module correct major problem of headers displayed
with image include in HTML part.
=item KMail (Linux)
If this module just send HTML and text, (without images), 100% ok.
In other case, Kmail didn't support image include in HTML. So if you set in
KMail "Prefer HTML to text", it display HTML with images broken. Otherwise,
it display text part.
=item Pegasus (Windows)
If this module just send HTML and text, (without images), 100% ok.
Pegasus didn't support images in HTML. When it find a multipart/related
message, it ignore it, and display text part.
=back
If you find others mail client who support (or not support) MIME-Lite-HTML
module, give me some feedback ! If you want be sure that your mail can be
read by maximum of people, (so not only OE and Netscape), don't include
images in your mail, and use a text buffer too. If multipart/related mail
is not recognize, multipart/alternative can be read by the most of mail client.
=head2 Install on WinX with ActiveState / PPM
Just do in DOS "shell":
c:\ ppm
> set repository alian http://www.alianwebserver.com/perl/CPAN
> install MIME-Lite-HTML
> quit
=head2 How know when next release will be ?
Subscribe on http://www.alianwebserver.com/cgi-bin/news_mlh.cgi
=head1 Public Interface
=over
=item new(%hash)
Create a new instance of MIME::Lite::HTML.
The hash can have this key : [Url], [Proxy], [Debug], [IncludeType],
[HashTemplate], [LoginDetails], [TextCharset], [HTMLCharset],
[TextEncoding], [HTMLEncoding], [remove_jscript]
=over
=item Url
... is url to parse and send. If this param is found, call of parse routine
and send of mail is done. Else you must use parse routine of MIME::Lite::HTML
and send of MIME::Lite.
=item Proxy
... is url of proxy to use.
Eg: Proxy => 'http://192.168.100.166:8080'
=item remove_jscript
if set, remove all script code from html source
Eg: remove_jscript => 1
=item Debug
... is trace to stdout during parsing.
Eg: Debug => 1
=item IncludeType
... is method to use when finding images:
=over
=item location
Default method is embed them in mail with 'Content-Location' header.
=item cid
You use a 'Content-CID' header.
=item extern
Images are not embed, relative url are just replace with absolute,
so images are fetch when user read mail. (Server must be reachable !)
=back
=item ExternImages
This is a listref of regular expressions. If an image matches any of the
regular expressions, it will be rendered as an <img> link, without being
attached to the mail, regardless of the IncludeType setting above.
For example:
ExternImages => [ '.*cat\.jpg.*', 'external/.*' ]
...would mean that "images/cat.jpg" and "external/foo.jpg" would be
sent as external <img> links, but "images/dog.jpg" would be sent using
whatever the default IncludeType (above) is.
=item $hash{'HashTemplate'}
... is a reference to a hash. If present, MIME::Lite::HTML
will substitute <? $name ?> with $hash{'HashTemplate'}{'name'} when parse url
to send. $hash{'HashTemplate'} can be used too for include data for subelement.
Eg:
$hash{'HashTemplate'}{'http://www.al.com/images/sommaire.gif'}=\@data;
or
$hash{'HashTemplate'}{'http://www.al.com/script.js'}="alert("Hello world");;
When module find the image http://www.alianwebserver.com/images/sommaire.gif
in buffer, it don't get image with LWP but use data found in
$hash{'HashTemplate'}. (See eg/example2.pl)
=item LoginDetails
... is the couple user:password for use with restricted url.
Eg: LoginDetails => 'my_user:my_password'
=item TextCharset
... is the character set to use for the text part.
Eg: TextCharset => 'iso-8859-7'
for Greek. If none specified, the default is used (iso-8859-1).
=item HTMLCharset
... is the character set to use for the html part.
Eg: HTMLCharset => 'iso-8859-7'
for Greek. If none specified, the default is used (iso-8859-1).
Take care, as that option does NOT change the character
set of the HTML page, it only changes the character set of the mime part.
=item TextEncoding
... is the Encoding to be used for the text part (if such a part
exists). If none specified, the default is used (7bit).
Eg: TextEncoding => 'base64'
=item HTMLEncoding
... is the Encoding to be used for the html part. If none specified, the
default is used (quoted-printable).
Eg: HTMLEncoding => 'base64'.
=back
Others keys are use with MIME::Lite constructor.
This MIME-Lite keys are: Bcc, Encrypted, Received, Sender, Cc, From,
References, Subject, Comments, Keywords, Reply-To To, Content-*,
Message-ID,Resent-*, X-*,Date, MIME-Version, Return-Path,
Organization
=item parse($html, [$url_txt], [$url_base])
Subroutine used for created HTML mail with MIME-Lite
Parameters:
=over
=item $html
Url of HTML file to send, can be a local file. If $url is not an
url (http or https or ftp or file or nntp), $url is used as a buffer.
Example :
http://www.alianwebserver.com
file://c|/tmp/index.html
<img src=toto.gif>
=item $url_txt
Url of text part to send for person who doesn't support HTML mail.
As $html, $url_txt can be a simple buffer.
=item $url_base
$url_base is used if $html is a buffer, for get element found in HTML buffer.
=back
Return the MIME::Lite part to send
=item size()
Display size of mail in characters (so octets) that will be send.
(So use it *after* parse method). Use this method for control
size of mail send, I personnaly hate receive 500k by mail.
I pay for a 33k modem :-(
=back
=head1 Private methods
=over
=item build_mime_object($html,[$txt],[@mail])
(private)
Build the final MIME-Lite object to send with each part read before
=over
=item $html
Buffer of HTML part
=item $txt
Buffer of text part
=item @mail
List of images attached to HTML part. Each item is a MIME-Lite object.
=back
See "Construction" in "Description" for know how MIME-Lite object is build.
=item create_image_part($url)
(private)
Fetch if needed $url, and create a MIME part for it.
=item include_css($gabarit,$root)
(private)
Search in HTML buffer ($gabarit) to remplace call to extern CSS file
with his content. $root is original absolute url where css file will
be found.
=item include_javascript($gabarit,$root)
(private)
Search in HTML buffer ($gabarit) to remplace call to extern javascript file
with his content. $root is original absolute url where javascript file will
be found.
=item input_image($gabarit,$root)
(private)
Search in HTML buffer ($gabarit) to remplace input form image with his cid
Return final buffer and list of MIME::Lite part
=item link_form($gabarit,$root)
(private)
Replace link to formulaire with absolute link
=item fill_template($masque,$vars)
=over
=item $masque
Path of template
=item $vars
hash ref with keys/val to substitue
=back
Give template with remplaced variables
Ex: if $$vars{age}=12, and $masque have
J'ai <? $age ?> ans,
this function give:
J'ai 12 ans,
=back
=head1 Error Handling
The set_err routine is used privately. You can ask for an array of all the
errors which occurred inside the parse routine by calling:
@errors = $mailHTML->errstr;
If no errors where found, it'll return undef.
=head1 CGI Example
#!/usr/bin/perl -w
# A cgi program that do "Mail this page to a friend";
# Call this script like this :
# script.cgi?email=myfriend@isp.com&url=http://www.go.com
use strict;
use CGI qw/:standard/;
use CGI::Carp qw/fatalsToBrowser/;
use MIME::Lite::HTML;
my $mailHTML = new MIME::Lite::HTML
From => 'MIME-Lite@alianwebserver.com',
To => param('email'),
Subject => 'Your url: '.param('url');
my $MIMEmail = $mailHTML->parse(param('url'));
$MIMEmail->send; # or for win user : $mail->send_by_smtp('smtp.fai.com');
print header,"Mail envoye (", param('url'), " to ", param('email'),")<br>\n";
=head1 TERMS AND CONDITIONS
Copyright (c) 2000 by Alain BARBET alian (at) cpan.org
All rights reserved. This program is free software; you can
redistribute it and/or modify it under the same terms as Perl
itself.
This software comes with B<NO WARRANTY> of any kind.
See the COPYING file in the distribution for details.
=head1 AUTHOR
Alain BARBET alian@cpan.org , see file Changes for helpers.
=cut
|