/usr/bin/oai_pmh is in libhttp-oai-perl 4.03-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
use encoding 'utf8';
use HTTP::OAI;
use Getopt::Long;
use Pod::Usage;
use XML::LibXML;
=head1 NAME
oai_pmh.pl - pipe OAI-PMH to the command-line
=head1 SYNOPSIS
oai_pmh.pl <options> [baseURL]
=head1 OPTIONS
=over 8
=item --help
=item --man
=item --verbose
Be more verbose (repeatable).
=item --force
Force a non-conformant OAI request.
=item --from <ISO datetime>
=item --identifier <identifier>
OAI identifier to GetRecord or ListMetadataFormats.
=item --metadataPrefix <mdp>
Specify format of metadata to retrieve.
=item -X/--request <command>
Verb to request, defaults to ListRecords.
=item --set <oai set>
Request only those records in a set.
=item --until <ISO datetime>
=back
=head1 DESCRIPTION
Retrieve data from OAI-PMH endpoints. The output format is:
<headers>
<content>
<FORMFEED>
Where <headers> are in HTTP header format. Content will be the raw XML as exposed by the repository. Each record is separated by a FORMFEED character.
For example:
oai_pmh.pl -X GetRecord --metadataPrefix oai_dc \
--identifier oai:eprints.soton.ac.uk:20 http://eprints.soton.ac.uk/cgi/oai2
=cut
my %opts = (
verbose => 1,
);
GetOptions(\%opts,
'help',
'man',
'metadataPrefix=s',
'request|X=s',
'identifier=s',
'verbose+',
'force',
'from=s',
'until=s',
) or pod2usage(2);
pod2usage(1) if $opts{help};
pod2usage({-verbose => 2}) if $opts{man};
my $noise = delete $opts{verbose};
if (!exists $opts{request}) {
$opts{request} = 'ListRecords';
$opts{metadataPrefix} = 'oai_dc';
}
my $base_url = pop @ARGV;
pod2usage(1) if !$base_url;
my $ha = HTTP::OAI::Harvester->new(baseURL => $base_url);
my $f = delete $opts{request};
debug("Requesting $f", 2);
my $r = $ha->$f(
%opts,
onRecord => \&output_record,
);
if( $f eq "ListMetadataFormats" )
{
foreach my $mdf ($r->metadataFormat) {
print "metadataPrefix: " . $mdf->metadataPrefix . "\n";
print "schema: " . $mdf->schema . "\n";
print "metadataNamespace: " . $mdf->metadataNamespace . "\n";
print "\n";
print "\f";
}
}
if( !$r->is_success )
{
die "Error in response: " . $r->message . "\n";
}
sub debug
{
my( $msg, $level ) = @_;
warn "$msg\n" if $noise >= $level;
}
sub output_record
{
my( $rec ) = @_;
my $header = $rec->isa( 'HTTP::OAI::Header' ) ? $rec : $rec->header;
print "identifier: " . $header->identifier . "\n";
print "datestamp: " . $header->datestamp . "\n";
print "status: " . $header->status . "\n";
foreach my $set ($header->setSpec) {
print "setSpec: " . $set . "\n";
}
print "\n";
if ($rec->can( "metadata" ) && defined(my $metadata = $rec->metadata)) {
print $metadata->dom->toString( 1 );
}
print "\f";
}
|