/usr/share/perl5/TM/Index/Match.pm is in libtm-perl 1.56-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | package TM::Index::Match;
use strict;
use warnings;
use Data::Dumper;
use base qw(TM::Index);
=pod
=head1 NAME
TM::Index::Match - Topic Maps, Indexing support (match layer)
=head1 SYNOPSIS
# somehow get a map (any subclass of TM will do)
my $tm = ...
# one option: create a lazy index which learns as you go
use TM::Index::Match;
my $idx = new TM::Index::Match ($tm);
# for most operations which involve match_forall to be called
# reading and querying the map should be much faster
# learn about some statistics, what keys are most likely to be useful
my @optimized_keys = @{ $stats->{proposed_keys} };
# another option: create an eager index
my $idx = new TM::Index::Match ($tm, closed => 1);
# pre-populate it, use the proposed keys
$idx->populate (@optimized_keys);
# this may be a lengthy operation if the map is big
# but then the index is 'complete'
# query map now, should also be faster
# getting rid of an index explicitly
$idx->detach;
# cleaning an index
$idx->discard;
=head1 DESCRIPTION
This index implements a generic query cache which can capture all queries not handled by more
specific indices. This class inherits directly from L<TM::Index>.
=head1 INTERFACE
=head2 Constructor
The constructor/destructors are the same as that described in L<TM::Index>.
=head2 Methods
=over
=item B<populate>
I<$idx>->populate (I<@list_of_keys>)
To populate the index with canned results this method can be invoked. At this stage it is not very
clever and may take quite some time to work its way through a larger map. This is most likely
something to be done in the background.
The list of keys to be passed in is a bit black magic. Your current best bet is to look at the
index statistics method, and retrieve a proposed list from there:
@optimized_keys = @{ $stats->{proposed_keys} };
$idx->populate (@optimized_keys[0..2]); # only take the first few
If this list is empty, nothing clever will happen.
=cut
sub populate {
my $self = shift;
my @halfkeys = @_ or return;
my $map = $self->{map};
my $indices = delete $map->{indices}; # detach temporarily
my @mids = map { $_->[TM->LID] } $map->toplets;
foreach my $halfkey (@halfkeys) {
my @keys = split /\./, $halfkey;
#warn "keys ".(join " ", @keys);
_combinatorial (\@mids, [], scalar @keys - 1, \@keys, $self->{closed}, $map, $self->{cache});
}
$map->{indices} = $indices; # re-attach
sub _combinatorial {
my $mids = shift; # will be passed through
my $idxs = shift; # will be accumulated in every recursion
my $depth = shift; # will be decremented at every recursion
my $keys = shift; # just pass them through
my $closed = shift; # pass through
my $map = shift;
my $cache = shift;
for (my $i = 0; $i <= $#$mids; $i++) { # iterate over all indices of mids
my $l = [ @$idxs, $i ]; # build an uptodate index list
if ($depth) { # we are still not at the bottom of things
_combinatorial ($mids, $l, $depth - 1, $keys, $closed, $map, $cache); # recurse
} else { # we reached the correct length
#warn "have indices ".join ("..", @$l);
my @vals = map { $mids->[$_] } @$l; # the values are all mids, taking from the mids list
my %query = map { $_ => shift @vals } @$keys; # build a match query
#warn "query ".Dumper \%query;
my @as = $map->match_forall (%query); # compute the results
#warn "got back ".Dumper \ @as;
my @skeys = sort keys %query; # recompute the total key (including the values)
my $skeys = join ('.', @skeys);
my @svals = map { $query{$_} } @skeys;
my $key = "$skeys:" . join ('.', @svals);
#warn "computed key '$key'";
if (@as) { # if the match list is not empty
$cache->{$key} = [ map { $_->[TM->LID] } @as ]; # memorize it
} elsif ($closed) { # otherwise, if empty, check on close
# don't do nothing, dude # that's exactly the meaning of 'closed'
} else {
$cache->{$key} = []; # in an open world record the result
}
}
}
}
}
=pod
=item B<statistics>
I<$hashref> = I<$idx>->statistics
This returns a hash containing statistical information about certain keys, how much data is behind
them, how often they are used when adding information to the index, how often data is read out
successfully. The C<cost> component can give you an estimated about the cost/benefit.
=cut
sub statistics {
my $self = shift;
my %stats;
foreach my $q (keys %{ $self->{cache} }) {
$q =~ /([^:]+)/;
my $ki;
$ki->{writes}++;
$ki->{reads} += $self->{reads}->{$q};
$ki->{size} += scalar @{ $self->{cache}->{$q} };
$ki->{cost} = $ki->{writes} / $ki->{reads}; # it is impossible that reads == 0
$ki->{avg_size_of_read} = $ki->{size} / $ki->{reads};
$ki->{avg_size_of_write} = $ki->{size} / $ki->{writes};
$stats{keys}->{$1} = $ki;
}
$stats{proposed_keys} = [ sort { $stats{keys}->{$a}->{cost} <=> $stats{keys}->{$b}->{cost} } keys %{$stats{keys}} ];
return \%stats;
}
=pod
=back
=head1 SEE ALSO
L<TM>, L<TM::Index>
=head1 COPYRIGHT AND LICENSE
Copyright 200[6] by Robert Barta, E<lt>drrho@cpan.orgE<gt>
This library is free software; you can redistribute it and/or modify it under the same terms as Perl
itself.
=cut
our $VERSION = 0.3;
our $REVISION = '$Id: Match.pm,v 1.2 2006/12/01 08:01:00 rho Exp $';
1;
__END__
|