This file is indexed.

/usr/lib/python2.7/dist-packages/xappy/indexerconnection.py is in python-xappy 0.5-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
#!/usr/bin/env python
#
# Copyright (C) 2007 Lemur Consulting Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
r"""indexerconnection.py: A connection to the search engine for indexing.

"""
__docformat__ = "restructuredtext en"

import _checkxapian
import cPickle
import xapian

from datastructures import *
import errors
from fieldactions import *
import fieldmappings
import memutils
from replaylog import log

class IndexerConnection(object):
    """A connection to the search engine for indexing.

    """

    def __init__(self, indexpath):
        """Create a new connection to the index.

        There may only be one indexer connection for a particular database open
        at a given time.  Therefore, if a connection to the database is already
        open, this will raise a xapian.DatabaseLockError.

        If the database doesn't already exist, it will be created.

        """
        self._index = log(xapian.WritableDatabase, indexpath, xapian.DB_CREATE_OR_OPEN)
        self._indexpath = indexpath

        # Read existing actions.
        self._field_actions = {}
        self._field_mappings = fieldmappings.FieldMappings()
        self._facet_hierarchy = {}
        self._facet_query_table = {}
        self._next_docid = 0
        self._config_modified = False
        self._load_config()

        # Set management of the memory used.
        # This can be removed once Xapian implements this itself.
        self._mem_buffered = 0
        self.set_max_mem_use()

    def set_max_mem_use(self, max_mem=None, max_mem_proportion=None):
        """Set the maximum memory to use.

        This call allows the amount of memory to use to buffer changes to be
        set.  This will affect the speed of indexing, but should not result in
        other changes to the indexing.

        Note: this is an approximate measure - the actual amount of memory used
        max exceed the specified amount.  Also, note that future versions of
        xapian are likely to implement this differently, so this setting may be
        entirely ignored.

        The absolute amount of memory to use (in bytes) may be set by setting
        max_mem.  Alternatively, the proportion of the available memory may be
        set by setting max_mem_proportion (this should be a value between 0 and
        1).

        Setting too low a value will result in excessive flushing, and very
        slow indexing.  Setting too high a value will result in excessive
        buffering, leading to swapping, and very slow indexing.

        A reasonable default for max_mem_proportion for a system which is
        dedicated to indexing is probably 0.5: if other tasks are also being
        performed on the system, the value should be lowered.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if max_mem is not None and max_mem_proportion is not None:
            raise errors.IndexerError("Only one of max_mem and "
                                       "max_mem_proportion may be specified")

        if max_mem is None and max_mem_proportion is None:
            self._max_mem = None

        if max_mem_proportion is not None:
            physmem = memutils.get_physical_memory()
            if physmem is not None:
                max_mem = int(physmem * max_mem_proportion)

        self._max_mem = max_mem

    def _store_config(self):
        """Store the configuration for the database.

        Currently, this stores the configuration in a file in the database
        directory, so changes to it are not protected by transactions.  When
        support is available in xapian for storing metadata associated with
        databases. this will be used instead of a file.

        """
        assert self._index is not None

        config_str = cPickle.dumps((
                                     self._field_actions,
                                     self._field_mappings.serialise(),
                                     self._facet_hierarchy,
                                     self._facet_query_table,
                                     self._next_docid,
                                    ), 2)
        log(self._index.set_metadata, '_xappy_config', config_str)

        self._config_modified = False

    def _load_config(self):
        """Load the configuration for the database.

        """
        assert self._index is not None

        config_str = log(self._index.get_metadata, '_xappy_config')
        if len(config_str) == 0:
            return

        try:
            (self._field_actions, mappings, self._facet_hierarchy, self._facet_query_table, self._next_docid) = cPickle.loads(config_str)
        except ValueError:
            # Backwards compatibility - configuration used to lack _facet_hierarchy and _facet_query_table
            (self._field_actions, mappings, self._next_docid) = cPickle.loads(config_str)
            self._facet_hierarchy = {}
            self._facet_query_table = {}
        self._field_mappings = fieldmappings.FieldMappings(mappings)

        self._config_modified = False

    def _allocate_id(self):
        """Allocate a new ID.

        """
        while True:
            idstr = "%x" % self._next_docid
            self._next_docid += 1
            if not self._index.term_exists('Q' + idstr):
                break
        self._config_modified = True
        return idstr

    def add_field_action(self, fieldname, fieldtype, **kwargs):
        """Add an action to be performed on a field.

        Note that this change to the configuration will not be preserved on
        disk until the next call to flush().

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if fieldname in self._field_actions:
            actions = self._field_actions[fieldname]
        else:
            actions = FieldActions(fieldname)
            self._field_actions[fieldname] = actions
        actions.add(self._field_mappings, fieldtype, **kwargs)
        self._config_modified = True

    def clear_field_actions(self, fieldname):
        """Clear all actions for the specified field.

        This does not report an error if there are already no actions for the
        specified field.

        Note that this change to the configuration will not be preserved on
        disk until the next call to flush().

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if fieldname in self._field_actions:
            del self._field_actions[fieldname]
            self._config_modified = True

    def get_fields_with_actions(self):
        """Get a list of field names which have actions defined.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        return self._field_actions.keys()

    def process(self, document):
        """Process an UnprocessedDocument with the settings in this database.

        The resulting ProcessedDocument is returned.

        Note that this processing will be automatically performed if an
        UnprocessedDocument is supplied to the add() or replace() methods of
        IndexerConnection.  This method is exposed to allow the processing to
        be performed separately, which may be desirable if you wish to manually
        modify the processed document before adding it to the database, or if
        you want to split processing of documents from adding documents to the
        database for performance reasons.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        result = ProcessedDocument(self._field_mappings)
        result.id = document.id
        context = ActionContext(self._index)

        for field in document.fields:
            try:
                actions = self._field_actions[field.name]
            except KeyError:
                # If no actions are defined, just ignore the field.
                continue
            actions.perform(result, field.value, context)

        return result

    def _get_bytes_used_by_doc_terms(self, xapdoc):
        """Get an estimate of the bytes used by the terms in a document.

        (This is a very rough estimate.)

        """
        count = 0
        for item in xapdoc.termlist():
            # The term may also be stored in the spelling correction table, so
            # double the amount used.
            count += len(item.term) * 2

            # Add a few more bytes for holding the wdf, and other bits and
            # pieces.
            count += 8

        # Empirical observations indicate that about 5 times as much memory as
        # the above calculation predicts is used for buffering in practice.
        return count * 5

    def add(self, document):
        """Add a new document to the search engine index.

        If the document has a id set, and the id already exists in
        the database, an exception will be raised.  Use the replace() method
        instead if you wish to overwrite documents.

        Returns the id of the newly added document (making up a new
        unique ID if no id was set).

        The supplied document may be an instance of UnprocessedDocument, or an
        instance of ProcessedDocument.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if not hasattr(document, '_doc'):
            # It's not a processed document.
            document = self.process(document)

        # Ensure that we have a id
        orig_id = document.id
        if orig_id is None:
            id = self._allocate_id()
            document.id = id
        else:
            id = orig_id
            if self._index.term_exists('Q' + id):
                raise errors.IndexerError("Document ID of document supplied to add() is not unique.")
            
        # Add the document.
        xapdoc = document.prepare()
        self._index.add_document(xapdoc)

        if self._max_mem is not None:
            self._mem_buffered += self._get_bytes_used_by_doc_terms(xapdoc)
            if self._mem_buffered > self._max_mem:
                self.flush()

        if id is not orig_id:
            document.id = orig_id
        return id

    def replace(self, document):
        """Replace a document in the search engine index.

        If the document does not have a id set, an exception will be
        raised.

        If the document has a id set, and the id does not already
        exist in the database, this method will have the same effect as add().

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if not hasattr(document, '_doc'):
            # It's not a processed document.
            document = self.process(document)

        # Ensure that we have a id
        id = document.id
        if id is None:
            raise errors.IndexerError("No document ID set for document supplied to replace().")

        xapdoc = document.prepare()
        self._index.replace_document('Q' + id, xapdoc)

        if self._max_mem is not None:
            self._mem_buffered += self._get_bytes_used_by_doc_terms(xapdoc)
            if self._mem_buffered > self._max_mem:
                self.flush()

    def _make_synonym_key(self, original, field):
        """Make a synonym key (ie, the term or group of terms to store in
        xapian).

        """
        if field is not None:
            prefix = self._field_mappings.get_prefix(field)
        else:
            prefix = ''
        original = original.lower()
        # Add the prefix to the start of each word.
        return ' '.join((prefix + word for word in original.split(' ')))

    def add_synonym(self, original, synonym, field=None,
                    original_field=None, synonym_field=None):
        """Add a synonym to the index.

         - `original` is the word or words which will be synonym expanded in
           searches (if multiple words are specified, each word should be
           separated by a single space).
         - `synonym` is a synonym for `original`.
         - `field` is the field which the synonym is specific to.  If no field
           is specified, the synonym will be used for searches which are not
           specific to any particular field.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if original_field is None:
            original_field = field
        if synonym_field is None:
            synonym_field = field
        key = self._make_synonym_key(original, original_field)
        # FIXME - this only works for exact fields which have no upper case
        # characters, or single words
        value = self._make_synonym_key(synonym, synonym_field)
        self._index.add_synonym(key, value)

    def remove_synonym(self, original, synonym, field=None):
        """Remove a synonym from the index.

         - `original` is the word or words which will be synonym expanded in
           searches (if multiple words are specified, each word should be
           separated by a single space).
         - `synonym` is a synonym for `original`.
         - `field` is the field which this synonym is specific to.  If no field
           is specified, the synonym will be used for searches which are not
           specific to any particular field.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        key = self._make_synonym_key(original, field)
        self._index.remove_synonym(key, synonym.lower())

    def clear_synonyms(self, original, field=None):
        """Remove all synonyms for a word (or phrase).

         - `field` is the field which this synonym is specific to.  If no field
           is specified, the synonym will be used for searches which are not
           specific to any particular field.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        key = self._make_synonym_key(original, field)
        self._index.clear_synonyms(key)

    def _assert_facet(self, facet):
        """Raise an error if facet is not a declared facet field.

        """
        for action in self._field_actions[facet]._actions:
            if action == FieldActions.FACET:
                return
        raise errors.IndexerError("Field %r is not indexed as a facet" % facet)

    def add_subfacet(self, subfacet, facet):
        """Add a subfacet-facet relationship to the facet hierarchy.
        
        Any existing relationship for that subfacet is replaced.

        Raises a KeyError if either facet or subfacet is not a field,
        and an IndexerError if either facet or subfacet is not a facet field.
        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        self._assert_facet(facet)
        self._assert_facet(subfacet)
        self._facet_hierarchy[subfacet] = facet
        self._config_modified = True

    def remove_subfacet(self, subfacet):
        """Remove any existing facet hierarchy relationship for a subfacet.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if subfacet in self._facet_hierarchy:
            del self._facet_hierarchy[subfacet]
            self._config_modified = True

    def get_subfacets(self, facet):
        """Get a list of subfacets of a facet.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        return [k for k, v in self._facet_hierarchy.iteritems() if v == facet] 

    FacetQueryType_Preferred = 1;
    FacetQueryType_Never = 2;
    def set_facet_for_query_type(self, query_type, facet, association):
        """Set the association between a query type and a facet.

        The value of `association` must be one of
        IndexerConnection.FacetQueryType_Preferred,
        IndexerConnection.FacetQueryType_Never or None. A value of None removes
        any previously set association.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if query_type is None:
            raise errors.IndexerError("Cannot set query type information for None")
        self._assert_facet(facet)
        if query_type not in self._facet_query_table:
            self._facet_query_table[query_type] = {}
        if association is None:
            if facet in self._facet_query_table[query_type]:
                del self._facet_query_table[query_type][facet]
        else:
            self._facet_query_table[query_type][facet] = association;
        if self._facet_query_table[query_type] == {}:
            del self._facet_query_table[query_type]
        self._config_modified = True

    def get_facets_for_query_type(self, query_type, association):
        """Get the set of facets associated with a query type.

        Only those facets associated with the query type in the specified
        manner are returned; `association` must be one of
        IndexerConnection.FacetQueryType_Preferred or
        IndexerConnection.FacetQueryType_Never.

        If the query type has no facets associated with it, None is returned.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if query_type not in self._facet_query_table:
            return None
        facet_dict = self._facet_query_table[query_type]
        return set([facet for facet, assoc in facet_dict.iteritems() if assoc == association])

    def set_metadata(self, key, value):
        """Set an item of metadata stored in the connection.

        The value supplied will be returned by subsequent calls to
        get_metadata() which use the same key.

        Keys with a leading underscore are reserved for internal use - you
        should not use such keys unless you really know what you are doing.

        This will store the value supplied in the database.  It will not be
        visible to readers (ie, search connections) until after the next flush.

        The key is limited to about 200 characters (the same length as a term
        is limited to).  The value can be several megabytes in size.

        To remove an item of metadata, simply call this with a `value`
        parameter containing an empty string.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if not hasattr(self._index, 'set_metadata'):
            raise errors.IndexerError("Version of xapian in use does not support metadata")
        log(self._index.set_metadata, key, value)

    def get_metadata(self, key):
        """Get an item of metadata stored in the connection.

        This returns a value stored by a previous call to set_metadata.

        If the value is not found, this will return the empty string.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if not hasattr(self._index, 'get_metadata'):
            raise errors.IndexerError("Version of xapian in use does not support metadata")
        return log(self._index.get_metadata, key)

    def delete(self, id):
        """Delete a document from the search engine index.

        If the id does not already exist in the database, this method
        will have no effect (and will not report an error).

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        self._index.delete_document('Q' + id)

    def flush(self):
        """Apply recent changes to the database.

        If an exception occurs, any changes since the last call to flush() may
        be lost.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if self._config_modified:
            self._store_config()
        self._index.flush()
        self._mem_buffered = 0

    def close(self):
        """Close the connection to the database.

        It is important to call this method before allowing the class to be
        garbage collected, because it will ensure that any un-flushed changes
        will be flushed.  It also ensures that the connection is cleaned up
        promptly.

        No other methods may be called on the connection after this has been
        called.  (It is permissible to call close() multiple times, but
        only the first call will have any effect.)

        If an exception occurs, the database will be closed, but changes since
        the last call to flush may be lost.

        """
        if self._index is None:
            return
        try:
            self.flush()
        finally:
            # There is currently no "close()" method for xapian databases, so
            # we have to rely on the garbage collector.  Since we never copy
            # the _index property out of this class, there should be no cycles,
            # so the standard python implementation should garbage collect
            # _index straight away.  A close() method is planned to be added to
            # xapian at some point - when it is, we should call it here to make
            # the code more robust.
            self._index = None
            self._indexpath = None
            self._field_actions = None
            self._config_modified = False

    def get_doccount(self):
        """Count the number of documents in the database.

        This count will include documents which have been added or removed but
        not yet flushed().

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        return self._index.get_doccount()

    def iterids(self):
        """Get an iterator which returns all the ids in the database.

        The unqiue_ids are currently returned in binary lexicographical sort
        order, but this should not be relied on.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        return PrefixedTermIter('Q', self._index.allterms())

    def get_document(self, id):
        """Get the document with the specified unique ID.

        Raises a KeyError if there is no such document.  Otherwise, it returns
        a ProcessedDocument.

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        postlist = self._index.postlist('Q' + id)
        try:
            plitem = postlist.next()
        except StopIteration:
            # Unique ID not found
            raise KeyError('Unique ID %r not found' % id)
        try:
            postlist.next()
            raise errors.IndexerError("Multiple documents " #pragma: no cover
                                       "found with same unique ID")
        except StopIteration:
            # Only one instance of the unique ID found, as it should be.
            pass

        result = ProcessedDocument(self._field_mappings)
        result.id = id
        result._doc = self._index.get_document(plitem.docid)
        return result

    def iter_synonyms(self, prefix=""):
        """Get an iterator over the synonyms.

         - `prefix`: if specified, only synonym keys with this prefix will be
           returned.

        The iterator returns 2-tuples, in which the first item is the key (ie,
        a 2-tuple holding the term or terms which will be synonym expanded,
        followed by the fieldname specified (or None if no fieldname)), and the
        second item is a tuple of strings holding the synonyms for the first
        item.

        These return values are suitable for the dict() builtin, so you can
        write things like:

         >>> conn = IndexerConnection('foo')
         >>> conn.add_synonym('foo', 'bar')
         >>> conn.add_synonym('foo bar', 'baz')
         >>> conn.add_synonym('foo bar', 'foo baz')
         >>> dict(conn.iter_synonyms())
         {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')}

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        return SynonymIter(self._index, self._field_mappings, prefix)

    def iter_subfacets(self):
        """Get an iterator over the facet hierarchy.

        The iterator returns 2-tuples, in which the first item is the
        subfacet and the second item is its parent facet.

        The return values are suitable for the dict() builtin, for example:

         >>> conn = IndexerConnection('db')
         >>> conn.add_field_action('foo', FieldActions.FACET)
         >>> conn.add_field_action('bar', FieldActions.FACET)
         >>> conn.add_field_action('baz', FieldActions.FACET)
         >>> conn.add_subfacet('foo', 'bar')
         >>> conn.add_subfacet('baz', 'bar')
         >>> dict(conn.iter_subfacets())
         {'foo': 'bar', 'baz': 'bar'}

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if 'facets' in _checkxapian.missing_features:
            raise errors.IndexerError("Facets unsupported with this release of xapian")
        return self._facet_hierarchy.iteritems()

    def iter_facet_query_types(self, association):
        """Get an iterator over query types and their associated facets.

        Only facets associated with the query types in the specified manner
        are returned; `association` must be one of IndexerConnection.FacetQueryType_Preferred
        or IndexerConnection.FacetQueryType_Never.

        The iterator returns 2-tuples, in which the first item is the query
        type and the second item is the associated set of facets.

        The return values are suitable for the dict() builtin, for example:

         >>> conn = IndexerConnection('db')
         >>> conn.add_field_action('foo', FieldActions.FACET)
         >>> conn.add_field_action('bar', FieldActions.FACET)
         >>> conn.add_field_action('baz', FieldActions.FACET)
         >>> conn.set_facet_for_query_type('type1', 'foo', conn.FacetQueryType_Preferred)
         >>> conn.set_facet_for_query_type('type1', 'bar', conn.FacetQueryType_Never)
         >>> conn.set_facet_for_query_type('type1', 'baz', conn.FacetQueryType_Never)
         >>> conn.set_facet_for_query_type('type2', 'bar', conn.FacetQueryType_Preferred)
         >>> dict(conn.iter_facet_query_types(conn.FacetQueryType_Preferred))
         {'type1': set(['foo']), 'type2': set(['bar'])}
         >>> dict(conn.iter_facet_query_types(conn.FacetQueryType_Never))
         {'type1': set(['bar', 'baz'])}

        """
        if self._index is None:
            raise errors.IndexerError("IndexerConnection has been closed")
        if 'facets' in _checkxapian.missing_features:
            raise errors.IndexerError("Facets unsupported with this release of xapian")
        return FacetQueryTypeIter(self._facet_query_table, association)

class PrefixedTermIter(object):
    """Iterate through all the terms with a given prefix.

    """
    def __init__(self, prefix, termiter):
        """Initialise the prefixed term iterator.

        - `prefix` is the prefix to return terms for.
        - `termiter` is a xapian TermIterator, which should be at its start.

        """

        # The algorithm used in next() currently only works for single
        # character prefixes, so assert that the prefix is single character.
        # To deal with multicharacter prefixes, we need to check for terms
        # which have a starting prefix equal to that given, but then have a
        # following uppercase alphabetic character, indicating that the actual
        # prefix is longer than the target prefix.  We then need to skip over
        # these.  Not too hard to implement, but we don't need it yet.
        assert(len(prefix) == 1)

        self._started = False
        self._prefix = prefix
        self._prefixlen = len(prefix)
        self._termiter = termiter

    def __iter__(self):
        return self

    def next(self):
        """Get the next term with the specified prefix.

        """
        if not self._started:
            term = self._termiter.skip_to(self._prefix).term
            self._started = True
        else:
            term = self._termiter.next().term
        if len(term) < self._prefixlen or term[:self._prefixlen] != self._prefix:
            raise StopIteration
        return term[self._prefixlen:]


class SynonymIter(object):
    """Iterate through a list of synonyms.

    """
    def __init__(self, index, field_mappings, prefix):
        """Initialise the synonym iterator.

         - `index` is the index to get the synonyms from.
         - `field_mappings` is the FieldMappings object for the iterator.
         - `prefix` is the prefix to restrict the returned synonyms to.

        """
        self._index = index
        self._field_mappings = field_mappings
        self._syniter = self._index.synonym_keys(prefix)

    def __iter__(self):
        return self

    def next(self):
        """Get the next synonym.

        """
        synkey = self._syniter.next()
        pos = 0
        for char in synkey:
            if char.isupper(): pos += 1
            else: break
        if pos == 0:
            fieldname = None
            terms = synkey
        else:
            prefix = synkey[:pos]
            fieldname = self._field_mappings.get_fieldname_from_prefix(prefix)
            terms = ' '.join((term[pos:] for term in synkey.split(' ')))
        synval = tuple(self._index.synonyms(synkey))
        return ((terms, fieldname), synval)

class FacetQueryTypeIter(object):
    """Iterate through all the query types and their associated facets.

    """
    def __init__(self, facet_query_table, association):
        """Initialise the query type facet iterator.

        Only facets associated with each query type in the specified
        manner are returned (`association` must be one of
        IndexerConnection.FacetQueryType_Preferred or
        IndexerConnection.FacetQueryType_Never).

        """
        self._table_iter = facet_query_table.iteritems()
        self._association = association

    def __iter__(self):
        return self

    def next(self):
        """Get the next (query type, facet set) 2-tuple.

        """
        query_type, facet_dict = self._table_iter.next()
        facet_list = [facet for facet, association in facet_dict.iteritems() if association == self._association]
        if len(facet_list) == 0:
            return self.next()
        return (query_type, set(facet_list))

if __name__ == '__main__':
    import doctest, sys
    doctest.testmod (sys.modules[__name__])