/usr/share/perl5/DBIx/FullTextSearch/Phrase.pm is in libdbix-fulltextsearch-perl 0.73-11.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 | package DBIx::FullTextSearch::Phrase;
use strict;
use DBIx::FullTextSearch::Column;
use vars qw! @ISA !;
@ISA = qw! DBIx::FullTextSearch::Column !;
# Open in the backend just sets the object
sub open {
my ($class, $fts) = @_;
return bless { 'fts' => $fts }, $class;
}
sub DESTROY {
my ($self) = @_;
if (defined $self->{'select_wordid_sth'}) {
$self->{'select_wordid_sth'}->finish();
}
}
# Create creates the table(s) according to the parameters
sub _create_tables {
my ($class, $fts) = @_;
my $COUNT_FIELD = '';
my $CREATE_DATA = <<EOF;
create table $fts->{'data_table'} (
word_id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'word_id_bits'}} unsigned not null,
doc_id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'doc_id_bits'}} unsigned not null,
idx longblob default '' not null,
index (word_id),
index (doc_id)
)
EOF
$fts->{'word_id_table'} = $fts->{'table'}.'_words'
unless defined $fts->{'word_id_table'};
my $CREATE_WORD_ID = <<EOF;
create table if not exists $fts->{'word_id_table'} (
word varchar($fts->{'word_length'}) binary
default '' not null,
id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'word_id_bits'}} unsigned not null auto_increment,
primary key (id),
unique (word)
)
EOF
my $dbh = $fts->{'dbh'};
$dbh->do($CREATE_DATA) or return $dbh->errstr;
push @{$fts->{'created_tables'}}, $fts->{'data_table'};
$dbh->do($CREATE_WORD_ID) or return $dbh->errstr;
push @{$fts->{'created_tables'}}, $fts->{'word_id_table'};
return;
}
sub add_document {
my ($self, $id, $words) = @_;
# here the value in the %$words hash is an array of word
# positions
my $fts = $self->{'fts'};
my $dbh = $fts->{'dbh'};
my $word_id_table = $fts->{'word_id_table'};
if (not defined $self->{'select_wordid_sth'}) {
$self->{'select_wordid_sth'} = $dbh->prepare("
select id from $word_id_table where word = ?
");
}
my $data_table = $fts->{'data_table'};
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'position_bits'}};
my $num_words = 0;
my (@wids,@data,@widshandler,@datahandler);
my $wordid;
$dbh->do("lock tables $word_id_table write");
my ($maxid) = $dbh->selectrow_array("select max(id)
from $word_id_table");
foreach my $word (keys %$words) {
if(!defined $self->{'wordids'}->{$word}) {
$self->{'select_wordid_sth'}->execute($word);
($wordid) = $self->{'select_wordid_sth'}->fetchrow_array();
unless ($wordid) {
$maxid++;
push @widshandler, "(?,$maxid)";
push @wids, $word;
$wordid = $maxid;
}
$self->{'wordids'}->{$word} = $wordid;
} else {
$wordid=$self->{'wordids'}->{$word};
}
push @datahandler, "($wordid,$id,?)";
push @data, pack $packstring.'*', @{$words->{$word}};
$num_words++;
};
$dbh->do("insert into $word_id_table values " .
join (',',@widshandler),undef,@wids) if @wids;
$dbh->do("unlock tables");
$dbh->do("insert into $data_table values " .
join (',',@datahandler),undef,@data) if @data;
return $num_words;
}
sub update_document {
my ($self, $id, $words) = @_;
my $fts = $self->{'fts'};
my $dbh = $fts->{'dbh'};
my $data_table = $fts->{'data_table'};
$dbh->do("delete from $data_table where doc_id = ?", {}, $id);
$self->add_document($id, $words);
}
sub contains_hashref {
my $self = shift;
my $fts = $self->{'fts'};
my $dbh = $fts->{'dbh'};
my $data_table = $fts->{'data_table'};
my $word_id_table = $fts->{'word_id_table'};
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'position_bits'}};
my $SQL = qq{
select doc_id, idx
from $data_table, $word_id_table
where word like ?
and id = word_id
};
my $out = {};
for my $phrase (@_){
my @words = split(' ', $phrase);
my @sths;
for (my $i = 0; $i < @words; $i++) {
$sths[$i] = $dbh->prepare($SQL);
$sths[$i]->execute($words[$i]);
}
my %prev_pos = ();
my %cur_pos = ();
# iterate through words in phrase
for (my $i = 0; $i < @words; $i++){
if($i > 0){
%prev_pos = %cur_pos;
%cur_pos = ();
}
# get docs that have this word
while (my ($doc, $data) = $sths[$i]->fetchrow_array){
# get positions of words in doc
my @positions = unpack $packstring.'*', $data;
map { $cur_pos{$doc}->{$_} = 1 } @positions;
}
if($i > 0){
# check to see if word $i comes after word $i-1
for my $doc (keys %cur_pos){
my $isPhrase = 0;
for my $position (keys %{$cur_pos{$doc}}){
if ($position > 0 && exists $prev_pos{$doc}{$position - 1}){
$isPhrase = 1;
} else {
delete $cur_pos{$doc}{$position};
}
}
delete $cur_pos{$doc} unless $isPhrase;
}
}
}
for my $doc (keys %cur_pos){
my @positions = keys %{$cur_pos{$doc}};
$out->{$doc} += scalar (@positions);
}
}
return $out;
}
*parse_and_index_data = \&DBIx::FullTextSearch::parse_and_index_data_list;
1;
|