This file is indexed.

/usr/share/apt-xapian-index/plugins/translated-desc.py is in apt-xapian-index 0.47ubuntu8.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
try:
    import apt
    HAS_APT=True
except ImportError:
    HAS_APT=False
import xapian
import re
import os
import codecs
try:
    from debian import deb822
except ImportError:
    from debian_bundle import deb822

from six.moves.urllib_parse import unquote

APTLISTDIR="/var/lib/apt/lists"

def translationFiles(langs=None):
    # Look for files like: ftp.uk.debian.org_debian_dists_sid_main_i18n_Translation-it
    # And extract the language code at the end
    tfile = re.compile(r"_i18n_Translation-([^-]+)$")
    for f in os.listdir(APTLISTDIR):
        mo = tfile.search(f)
        if not mo: continue
        if langs and not mo.group(1) in langs: continue
        yield unquote(mo.group(1)), os.path.join(APTLISTDIR, f)

class Indexer:
    def __init__(self, lang, file):
        self.lang = lang
        self.xlang = lang.split("_")[0]
        self.indexer = xapian.TermGenerator()
        # Get a stemmer for this language, if available
        try:
            self.stemmer = xapian.Stem(self.xlang)
            self.indexer.set_stemmer(self.stemmer)
        except xapian.InvalidArgumentError:
            pass

        # Read the translated descriptions
        self.descs = dict()
        desckey = "Description-"+self.lang
        with codecs.open(file, 'r', encoding='utf-8') as fp:
            for pkg in deb822.Deb822.iter_paragraphs(fp):
                # I need this if because in some translation files, some
                # packages have a different Description header.  For example,
                # in the -de translations, I once found a
                # Description-de.noguide: header instead of Description-de:
                if desckey in pkg:
                    self.descs[pkg["Package"]] = pkg[desckey]

    def index(self, document):
        name = document.get_data()
        self.indexer.set_document(document)
        self.indexer.index_text_without_positions(self.descs.get(name, ""))

class TranslatedDescriptions:
    def __init__(self, langs):
        self.langs = langs

    def info(self):
        """
        Return general information about the plugin.

        The information returned is a dict with various keywords:
         
         timestamp (required)
           the last modified timestamp of this data source.  This will be used
           to see if we need to update the database or not.  A timestamp of 0
           means that this data source is either missing or always up to date.
         values (optional)
           an array of dicts { name: name, desc: description }, one for every
           numeric value indexed by this data source.

        Note that this method can be called before init.  The idea is that, if
        the timestamp shows that this plugin is currently not needed, then the
        long initialisation can just be skipped.
        """
        if not HAS_APT: return dict(timestamp = 0)
        filelist = translationFiles(self.langs)
        maxts = max([0] + [os.path.getmtime(f) for l, f in filelist])
        return dict(
                timestamp=maxts,
                sources=[dict(path=f, desc="%s translation" % l) for l, f in filelist],
                prefixes=[
                    dict(idx="Z", qp=None, type=None,
                         desc="Stemmed forms of keywords",
                         ldesc="This contains the stemmed forms of keywords as generated by"
                               " TermGenerator and matched by QueryParser"),
                ],
        )

    def init(self, info, progress):
        """
        If needed, perform long initialisation tasks here.

        info is a dictionary with useful information.  Currently it contains
        the following values:

          "values": a dict mapping index mnemonics to index numbers

        The progress indicator can be used to report progress.
        """

        self.indexers = []
        for lang, file in translationFiles(self.langs):
            progress.begin("Reading %s translations from %s" % (lang, file))
            self.indexers.append(Indexer(lang, file))
            progress.end()

    def doc(self):
        """
        Return documentation information for this data source.

        The documentation information is a dictionary with these keys:
          name: the name for this data source
          shortDesc: a short description
          fullDoc: the full description as a chapter in ReST format
        """
        return dict(
            name = "Translated package descriptions",
            shortDesc = "terms extracted from the translated package descriptions using Xapian's TermGenerator",
            fullDoc = """
            The TranslatedDescriptions data source reads translated description
            files from %s, then uses Xapian's TermGenerator to tokenise and
            index their content.

            Currently this creates normal terms as well as stemmed terms
            prefixed with ``Z``.
            """ % APTLISTDIR
        )

    def index(self, document, pkg):
        """
        Update the document with the information from this data source.

        document  is the document to update
        pkg       is the python-apt Package object for this package
        """
        for i in self.indexers:
            i.index(document)

    def indexDeb822(self, document, pkg):
        """
        Update the document with the information from this data source.

        This is alternative to index, and it is used when indexing with package
        data taken from a custom Packages file.

        document  is the document to update
        pkg       is the Deb822 object for this package
        """
        for i in self.indexers:
            i.index(document)

def init(langs=None, **kw):
    """
    Create and return the plugin object.
    """
    if not HAS_APT: return None
    if not langs: return None
    files = [f for l, f in translationFiles(langs)]
    if len(files) == 0:
        return None
    return TranslatedDescriptions(langs=langs)