/usr/share/perl5/HTML/AutoPagerize.pm is in libhtml-autopagerize-perl 0.02-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | package HTML::AutoPagerize;
use strict;
use 5.8.1;
our $VERSION = '0.02';
use Carp;
use HTML::TreeBuilder::XPath;
use URI;
sub new {
my $class = shift;
bless { sites => [] }, $class;
}
sub sites {
my $self = shift;
$self->{sites} = shift if @_;
$self->{sites};
}
sub sorted_sites {
my $self = shift;
return [ sort { length $b->{url} <=> length $a->{url} } @{ $self->sites } ];
}
sub add_site {
my($self, %site) = @_;
for my $key (qw( url nextLink )) {
unless (defined $site{$key}) {
croak "key '$key' needed for SITEINFO";
}
}
$site{url} = qr/$site{url}/; # compile the regexp
push @{$self->{sites}}, \%site;
}
sub handle {
my($self, $uri, $html) = @_;
my $siteinfo = $self->site_info_for($uri) or return;
my $tree = HTML::TreeBuilder::XPath->new;
$tree->parse($html);
my $res;
my $next_link = $siteinfo->{nextLink};
if (my $nodes = $tree->findnodes($next_link)) {
$res->{next_link} = URI->new_abs($nodes->shift->attr('href'), $uri);
}
if (my $page_element = $siteinfo->{pageElement}) {
if (my $nodes = $tree->findnodes($page_element)) {
$res->{page_element} = $nodes;
}
}
return $res;
}
sub site_info_for {
my($self, $uri) = @_;
for my $site (@{ $self->sorted_sites }) {
if ($uri =~ $site->{url}) {
return $site;
}
}
return;
}
1;
__END__
=for stopwords AutoPagerize SITEINFO userscript
=head1 NAME
HTML::AutoPagerize - Utility to load AutoPagerize SITEINFO stuff
=head1 SYNOPSIS
use HTML::AutoPagerize;
my $autopager = HTML::AutoPagerize->new;
$autopager->add_site(
url => 'http://.+.tumblr.com/',
nextLink => '//div[@id="content" or @id="container"]/div[last()]/a[last()]',
pageElement => '//div[@id="content" or @id="container"]/div[@class!="footer" or @class!="navigation"]',
);
my $uri = 'http://otsune.tumblr.com/';
my $html = LWP::Simple::get($uri);
my $res = $autopager->handle($uri, $html);
if ($res) {
my $next_link = $res->{next_link}; # URI object
my $content = $res->{page_element}; # XML::XPathEngine::NodeSet object. may be empty
}
=head1 DESCRIPTION
HTML::AutoPagerize is an utility module to load SITEINFO defined in
AutoPagerize. AutoPagerize is an userscript to automatically figure
out the L<next link> of the current page, then fetch the content and
insert the content by extracting the L<page element>.
=head1 AUTHOR
Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>
=head1 LICENSE
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
L<WWW::Mechanize::AutoPager>, L<http://swdyh.infogami.com/autopagerize>
=cut
|