/usr/lib/perl5/KinoSearch1/Index/FieldInfos.pm is in libkinosearch1-perl 1.00-1build3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | package KinoSearch1::Index::FieldInfos;
use strict;
use warnings;
use KinoSearch1::Util::ToolSet;
use base qw( KinoSearch1::Util::Class Exporter );
use constant INDEXED => "\x01";
use constant VECTORIZED => "\x02";
use constant OMIT_NORMS => "\x10";
our @EXPORT_OK;
BEGIN {
__PACKAGE__->init_instance_vars(
# members
by_name => undef,
by_num => undef,
from_file => 0,
);
__PACKAGE__->ready_get_set(qw( from_file ));
@EXPORT_OK = qw(
INDEXED
VECTORIZED
OMIT_NORMS
);
}
use KinoSearch1::Document::Field;
sub init_instance {
my $self = shift;
$self->{by_name} = {};
$self->{by_num} = [];
}
sub clone {
my $self = shift;
my $evil_twin = __PACKAGE__->new;
$evil_twin->{from_file} = $self->{from_file};
my @by_num;
my %by_name;
for my $finfo ( @{ $self->{by_num} } ) {
my $dupe = $finfo->clone;
push @by_num, $dupe;
$by_name{ $finfo->get_name } = $dupe;
}
$evil_twin->{by_num} = \@by_num;
$evil_twin->{by_name} = \%by_name;
return $evil_twin;
}
# Add a user-supplied Field object to the collection.
sub add_field {
my ( $self, $field ) = @_;
croak("Not a KinoSearch1::Document::Field")
unless a_isa_b( $field, 'KinoSearch1::Document::Field' );
# don't mod Field objects for segments that are read back in
croak("Can't update FieldInfos that were read in from file")
if $self->{from_file};
# add the field
my $fieldname = $field->get_name;
$self->{by_name}{$fieldname} = $field;
$self->_assign_field_nums;
}
# Return the number of fields in the segment.
sub size { scalar @{ $_[0]->{by_num} } }
# Return a list of the Field objects.
sub get_infos { @{ $_[0]->{by_num} } }
# Given a fieldname, return its number.
sub get_field_num {
my ( $self, $name ) = @_;
return undef
unless exists $self->{by_name}{$name};
my $num = $self->{by_name}{$name}->get_field_num;
return $num;
}
# Given a fieldname, return its FieldInfo.
sub info_by_name { $_[0]->{by_name}{ $_[1] } }
# Given a field number, return its fieldInfo.
sub info_by_num { $_[0]->{by_num}[ $_[1] ] }
# Given the field number (new, not original), return the name of the field.
sub field_name {
my ( $self, $num ) = @_;
my $name = $self->{by_num}[$num]->get_name;
croak("Don't know about field number $num")
unless defined $name;
return $name;
}
# Sort all the fields lexically by name and assign ascending numbers.
sub _assign_field_nums {
my $self = shift;
confess("Can't _assign_field_nums when from_file") if $self->{from_file};
# assign field nums according to lexical order of field names
@{ $self->{by_num} }
= sort { $a->get_name cmp $b->get_name } values %{ $self->{by_name} };
my $inc = 0;
$_->set_field_num( $inc++ ) for @{ $self->{by_num} };
}
# Decode an existing .fnm file.
sub read_infos {
my ( $self, $instream ) = @_;
my ( $by_name, $by_num ) = @{$self}{qw( by_name by_num )};
# set flag indicating that this FieldInfos object has been read in
$self->{from_file} = 1;
# read in infos from stream
my $num_fields = $instream->lu_read('V');
my @names_and_bits = $instream->lu_read( 'Ta' x $num_fields );
my $field_num = 0;
while ( $field_num < $num_fields ) {
my ( $name, $bits ) = splice( @names_and_bits, 0, 2 );
my $info = KinoSearch1::Document::Field->new(
field_num => $field_num,
name => $name,
indexed => ( "$bits" & INDEXED ) eq INDEXED ? 1 : 0,
vectorized => ( "$bits" & VECTORIZED ) eq VECTORIZED ? 1 : 0,
fnm_bits => $bits,
);
$by_name->{$name} = $info;
# order of storage implies lexical order by name and field number
push @$by_num, $info;
$field_num++;
}
}
# Write .fnm file.
sub write_infos {
my ( $self, $outstream ) = @_;
$outstream->lu_write( 'V', scalar @{ $self->{by_num} } );
for my $finfo ( @{ $self->{by_num} } ) {
$outstream->lu_write( 'Ta', $finfo->get_name, $finfo->get_fnm_bits, );
}
}
# Merge two FieldInfos objects, redefining fields as necessary and generating
# new field numbers.
sub consolidate {
my ( $self, @others ) = @_;
my $infos = $self->{by_name};
# Make *this* finfos the master FieldInfos object
for my $other (@others) {
while ( my ( $name, $other_finfo ) = each %{ $other->{by_name} } ) {
if ( exists $infos->{$name} ) {
$infos->{$name} = $other_finfo->breed_with( $infos->{$name} );
}
else {
$infos->{$name} = $other_finfo->clone;
}
}
}
$self->_assign_field_nums;
}
# Generate a mapping of field numbers between two FieldInfos objects. Should
# be called by the superset.
sub generate_field_num_map {
my ( $self, $other ) = @_;
my $map = '';
for my $other_finfo ( @{ $other->{by_num} } ) {
my $orig_finfo = $self->{by_name}{ $other_finfo->get_name };
$map .= pack( 'I', $orig_finfo->get_field_num );
}
return KinoSearch1::Util::IntMap->new( \$map );
}
sub encode_fnm_bits {
my ( undef, $field ) = @_;
my $bits = "\0";
for ($bits) {
$_ |= INDEXED if $field->get_indexed;
$_ |= VECTORIZED if $field->get_vectorized;
$_ |= OMIT_NORMS if $field->get_omit_norms;
}
return $bits;
}
sub decode_fnm_bits {
my ( undef, $field, $bits ) = @_;
$field->set_indexed( ( $bits & INDEXED ) eq INDEXED );
$field->set_vectorized( ( $bits & VECTORIZED ) eq VECTORIZED );
$field->set_omit_norms( ( $bits & OMIT_NORMS ) eq OMIT_NORMS );
}
sub close { }
1;
__END__
=begin devdocs
=head1 NAME
KinoSearch1::Index::FieldInfos - track field characteristics
=head1 SYNOPSIS
my $finfos = KinoSearch1::Index::FieldInfos->new;
$finfos->read_infos($instream);
=head1 DESCRIPTION
A FieldInfos object tracks the characteristics of all fields in a given
segment.
KinoSearch1 counts on having field nums assigned to fields by lexically sorted
order of field names, but indexes generated by Java Lucene are not likely to
have this property.
=head1 COPYRIGHT
Copyright 2005-2010 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
See L<KinoSearch1> version 1.00.
=end devdocs
=cut
|