/usr/bin/listimages is in libcam-pdf-perl 1.60-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | #!/usr/bin/perl -w
package main;
use warnings;
use strict;
use CAM::PDF;
use Getopt::Long;
use Pod::Usage;
our $VERSION = '1.60';
my %opts = (
verbose => 0,
help => 0,
version => 0,
);
Getopt::Long::Configure('bundling');
GetOptions('v|verbose' => \$opts{verbose},
'h|help' => \$opts{help},
'V|version' => \$opts{version},
) or pod2usage(1);
if ($opts{help})
{
pod2usage(-exitstatus => 0, -verbose => 2);
}
if ($opts{version})
{
print "CAM::PDF v$CAM::PDF::VERSION\n";
exit 0;
}
if (@ARGV < 1)
{
pod2usage(1);
}
my $file = shift;
my $doc = CAM::PDF->new($file) || die "$CAM::PDF::errstr\n";
my $pages = $doc->numPages();
my $nimages = 0;
for my $p (1..$pages)
{
my $c = $doc->getPageContent($p);
my @parts = split /(\/[\w]+\s*Do)\b/xms, $c;
foreach my $part (@parts)
{
if ($part =~ /\A(\/[\w]+)\s*Do\z/xms)
{
$nimages++;
my $ref = $1;
my $xobj = $doc->dereference($ref, $p);
my $objnum = $xobj->{objnum};
my $im = $doc->getValue($xobj);
my $l = $im->{Length} || $im->{L} || 0;
if ($l)
{
$l = $doc->getValue($l);
}
my $w = $im->{Width} || $im->{W} || 0;
if ($w)
{
$w = $doc->getValue($w);
}
my $h = $im->{Height} || $im->{H} || 0;
if ($h)
{
$h = $doc->getValue($h);
}
print "Image $nimages page $p, (w,h)=($w,$h), ref $ref = object $objnum, length $l\n";
}
else
{
# Ths code may break if there is are legitimate strings "BI",
# "ID" and "EI" in order in the page (which happened in the
# PDF reference doc, of course!
BI:
while ($part =~ s/.*?\bBI\b\s*//xms)
{
my ($im) = $part =~ s/\A(.*?)\s*\bEI\b\s*//xms;
next BI if (!$im);
$im =~ s/\A.*\bBI\b//xms; # this may get rid of a fake BI if there is one in the page
# Easy tests:
next BI if ($im =~ m/ \A [)] /xms);
next BI if ($im =~ m/ [(] \z /xms);
next BI if ($im !~ m/ \bID\b /xms);
# make sure that there is an open paren before every close
# if not, then the "BI" was part of a string
my $test = $im;
$test =~ s/ \\[()] //gxms; # get rid of escaped parens for the test
while ($test =~ s/ \A(.*?) [)] //xms)
{
my $bit = $1;
next BI if ($bit !~ m/ [(] /xms);
}
$nimages++;
my $w = 0;
my $h = 0;
if ($im =~ m/ \/W(|idth)\s*(\d+) /xms)
{
$w = $2;
}
if ($im =~ m/ \/H(|eight)\s*(\d+) /xms)
{
$h = $2;
}
print "Image $nimages page $p, (w,h)=($w,$h), inline\n";
}
}
}
}
__END__
=for stopwords listimages.pl
=head1 NAME
listimages.pl - Save copies of all PDF JPG images to a directory
=head1 SYNOPSIS
listimages.pl [options] infile.pdf
Options:
-v --verbose print diagnostic messages
-h --help verbose help message
-V --version print CAM::PDF version
=head1 DESCRIPTION
Searches the PDF for images and lists them on STDOUT in one of the
following formats:
Image <n> page <p>, (w,h)=(<w>,<h>), ref <label> = object <objnum>, length <l>\n";
Image <n> page <p>, (w,h)=(<w>,<h>), inline
=head1 SEE ALSO
CAM::PDF
F<crunchjpgs.pl>
F<extractallimages.pl>
F<extractjpgs.pl>
F<uninlinepdfimages.pl>
=head1 AUTHOR
See L<CAM::PDF>
=cut
|