This file is indexed.

/usr/lib/x86_64-linux-gnu/perl5/5.24/Lucy/Simple.pm is in liblucy-perl 0.3.3-7+b1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

use strict;
use warnings;

package Lucy::Simple;
use Lucy;
our $VERSION = '0.003003';
$VERSION = eval $VERSION;
use Carp;
use Scalar::Util qw( weaken reftype refaddr );

use Lucy::Plan::Schema;
use Lucy::Analysis::PolyAnalyzer;
use Lucy::Index::Indexer;
use Lucy::Search::IndexSearcher;

my %obj_cache;

sub new {
    my ( $either, %args ) = @_;
    my $path     = delete $args{path};
    my $language = lc( delete $args{language} );
    confess("Missing required parameter 'path'") unless defined $path;
    confess("Invalid language: '$language'")
        unless $language =~ /^(?:da|de|en|es|fi|fr|it|nl|no|pt|ru|sv)$/;
    my @remaining = keys %args;
    confess("Invalid params: @remaining") if @remaining;
    my $self = bless {
        type     => undef,
        schema   => undef,
        indexer  => undef,
        searcher => undef,
        hits     => undef,
        language => $language,
        path     => $path,
        },
        ref($either) || $either;

    # Get type and schema.
    my $analyzer = Lucy::Analysis::PolyAnalyzer->new( language => $language );
    $self->{type} = Lucy::Plan::FullTextType->new( analyzer => $analyzer, );
    my $schema = $self->{schema} = Lucy::Plan::Schema->new;

    # Cache the object for later clean-up.
    weaken( $obj_cache{ refaddr $self } = $self );

    return $self;
}

sub _lazily_create_indexer {
    my $self = shift;
    if ( !defined $self->{indexer} ) {
        $self->{indexer} = Lucy::Index::Indexer->new(
            schema => $self->{schema},
            index  => $self->{path},
        );
    }
}

sub add_doc {
    my ( $self, $hashref ) = @_;
    my $schema = $self->{schema};
    my $type   = $self->{type};
    croak("add_doc requires exactly one argument: a hashref")
        unless ( @_ == 2 and reftype($hashref) eq 'HASH' );
    $self->_lazily_create_indexer;
    $schema->spec_field( name => $_, type => $type ) for keys %$hashref;
    $self->{indexer}->add_doc($hashref);
}

sub _finish_indexing {
    my $self = shift;

    # Don't bother to throw an error if index not modified.
    if ( defined $self->{indexer} ) {
        $self->{indexer}->commit;

        # Trigger searcher and indexer refresh.
        undef $self->{indexer};
        undef $self->{searcher};
    }
}

sub search {
    my ( $self, %args ) = @_;

    # Flush recent adds; lazily create searcher.
    $self->_finish_indexing;
    if ( !defined $self->{searcher} ) {
        $self->{searcher}
            = Lucy::Search::IndexSearcher->new( index => $self->{path} );
    }

    $self->{hits} = $self->{searcher}->hits(%args);

    return $self->{hits}->total_hits;
}

sub next {
    my $self = shift;
    return unless defined $self->{hits};

    # Get the hit, bail if hits are exhausted.
    my $hit = $self->{hits}->next;
    if ( !defined $hit ) {
        undef $self->{hits};
        return;
    }

    return $hit;
}

sub DESTROY {
    for (shift) {
        $_->_finish_indexing;
        delete $obj_cache{ refaddr $_ };
    }
}

END {
    # Finish indexing for any objects that still exist, since, if we wait
    # until global destruction, our Indexer might no longer exist,
    # (see bug #32689)
    $_->_finish_indexing for values %obj_cache;
}

1;

__END__

__POD__

=head1 NAME

Lucy::Simple - Basic search engine.

=head1 SYNOPSIS

First, build an index of your documents.

    my $index = Lucy::Simple->new(
        path     => '/path/to/index/'
        language => 'en',
    );

    while ( my ( $title, $content ) = each %source_docs ) {
        $index->add_doc({
            title    => $title,
            content  => $content,
        });
    }

Later, search the index.

    my $total_hits = $index->search( 
        query      => $query_string,
        offset     => 0,
        num_wanted => 10,
    );

    print "Total hits: $total_hits\n";
    while ( my $hit = $index->next ) {
        print "$hit->{title}\n",
    }

=head1 DESCRIPTION

Lucy::Simple is a stripped-down interface for the L<Apache Lucy|Lucy> search
engine library.  

=head1 METHODS 

=head2 new

    my $lucy = Lucy::Simple->new(
        path     => '/path/to/index/',
        language => 'en',
    );

Create a Lucy::Simple object, which can be used for both indexing and
searching.  Two hash-style parameters are required.

=over 

=item *

B<path> - Where the index directory should be located.  If no index is found
at the specified location, one will be created.

=item *

B<language> - The language of the documents in your collection, indicated 
by a two-letter ISO code.  12 languages are supported:

    |-----------------------|
    | Language   | ISO code |
    |-----------------------|
    | Danish     | da       |
    | Dutch      | nl       |
    | English    | en       |
    | Finnish    | fi       |
    | French     | fr       |
    | German     | de       |
    | Italian    | it       |
    | Norwegian  | no       |
    | Portuguese | pt       |
    | Spanish    | es       |
    | Swedish    | sv       |
    | Russian    | ru       |
    |-----------------------|

=back

=head2 add_doc 

    $lucy->add_doc({
        location => $url,
        title    => $title,
        content  => $content,
    });

Add a document to the index.  The document must be supplied as a hashref, with
field names as keys and content as values.

=head2 search

    my $total_hits = $lucy->search( 
        query      => $query_string,    # required
        offset     => 40,               # default 0
        num_wanted => 20,               # default 10
    );

Search the index.  Returns the total number of documents which match the
query.  (This number is unlikely to match C<num_wanted>.)

=over

=item *

B<query> - A search query string.

=item *

B<offset> - The number of most-relevant hits to discard, typically used when
"paging" through hits N at a time.  Setting offset to 20 and num_wanted to 10
retrieves hits 21-30, assuming that 30 hits can be found.

=item *

B<num_wanted> - The number of hits you would like to see after C<offset> is
taken into account.  

=back

=head1 BUGS

Not thread-safe.

=cut