/usr/share/doc/libplucene-perl/examples/dump_index is in libplucene-perl 1.25-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | #!/usr/bin/perl
=head1 NAME
dump_index - dump the contents of an index
=head1 SYNOPSIS
perl -w dump_index $DIR
=head1 DESCRIPTION
This will dump out an index in human readable form. It can be used when
debugging to compare indexes create with Plucene to those created with
Lucene.
=cut
use strict;
use warnings;
use Plucene::Index::Reader;
my $where = shift @ARGV;
my $r = Plucene::Index::Reader->open($where);
no strict 'refs';
my @readers = (@{ $r->{readers} } ? @{ $r->{readers} } : $r);
print "We have " . @readers . " readers\n";
if (@readers == 1 and $r->isa("Plucene::Index::SegmentsReader")) {
die "But no segments\n";
}
print "\n\nDocuments:\n";
for my $reader (@readers) {
print "Segment "
. $reader->{segment} . " has "
. $reader->max_doc
. " docs\n";
my @terms = $reader->terms;
print "Fields:\n";
for my $field ($reader->field_infos->fields) {
print "\t" . $field->number . ": " . $field->name;
print " [indexed]" if $field->is_indexed;
print "\n";
}
print "Terms: \n";
my $td = $reader->term_docs;
for my $t (@terms) {
while ($t->next) {
my $term = $t->term;
print $term->field . ": " . $term->text . "\n";
$td->seek($term);
my ($docs, $freqs) = $td->read;
for (0 .. $#$docs) {
print "\t Doc "
. $docs->[$_] . " ("
. $freqs->[$_]
. " occurrences)\n";
}
}
}
}
print "Total documents: " . $r->max_doc . " in " . @readers . " segments\n";
|