This file is indexed.

/usr/share/doc/libweb-scraper-perl/examples/scraper is in libweb-scraper-perl 0.38-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
#!/usr/bin/perl
use strict;
use warnings;

use Config;
use Term::ReadLine;
use Data::Dumper;
use HTML::Entities;
use URI;
use Web::Scraper;
use YAML;

sub WARN() {
    return sub {
        warn $_->isTextNode
            ? HTML::Entities::encode($_->as_XML, q("'<>&))
            : $_->as_HTML(q('"&<>), "", {});
    };
}

my $print = sub {
    if ($ENV{PAGER}) {
        open my $pager, "|$ENV{PAGER}";
        print $pager @_;
    } else {
        print @_;

    }
};

my(@stack, $source);

my $stuff   = process_args($ARGV[0])
    or die "Usage: scraper [URI-or-filename]\n";

my $term    = Term::ReadLine->new("Web::Scraper");
my $scraper = scraper { run_loop($_[0], $term) };
   $scraper->user_agent->env_proxy;

my $result  = $scraper->scrape($stuff);

sub process_args {
    my $uri = shift;

    if (!-t STDIN and my $content = join "", <STDIN>) {
        $source = [ 'stdin' ];
        return \$content;
    } elsif ($uri && $uri =~ m!^https?://!) {
        $source = [ "URI", $uri ];
        return URI->new($uri);
    } elsif ($uri && -e $uri) {
        $source = [ 'file', $uri ];
        open my $fh, "<", $uri or die "$uri: $!";
        return join "", <$fh>;
    }

    return;
}

sub run_loop {
    my($tree, $term) = @_;
    while (defined(my $in = $term->readline("scraper> "))) {
        if ($in eq 'd') {
            $Data::Dumper::Indent = 1;
            warn Dumper result;
        } elsif ($in eq 'y') {
            warn Dump result;
        } elsif ($in eq 's') {
            $print->($tree->as_HTML(q('"&<>), "  ", {}));
        } elsif ($in eq 'q') {
            return;
        } elsif ($in eq 'c') {
            print generate_code($source, $stack[-1]);
        } elsif ($in =~ /^c\s+all\s*$/) {
            print generate_code($source, @stack);
        } else {
            my $res = eval $in;
            warn $@ if $@;
            push @stack, $in unless $@;
        }
    }
}

sub generate_code {
    my($source, @stack) = @_;

    my $code_stack = join "\n", map { "    $_" . (/;$/ ? "" : ";") } @stack;
    my($var, $stuff) =
        $source->[0] eq 'stdin'         ? ('$input', '\join "", <STDIN>') :
        $source->[0] eq 'URI'           ? ('$uri',   qq(URI->new("$source->[1]"))) :
        $source->[0] eq 'file'          ? ('$file',  qq(\\do { my \$file = "$source->[1]"; open my \$fh, \$file or die "\$file: \$!"; join '', <\$fh> })) :
                                          '...';

return <<CODE;
#!$Config{perlpath}
use strict;
use Web::Scraper;
use URI;

my $var = $stuff;
my \$scraper = scraper {
$code_stack
};
my \$result = \$scraper->scrape($var);
CODE

}