This file is indexed.

/usr/share/pyshared/translate/tools/pretranslate.py is in translate-toolkit 1.10.0-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2008 Zuza Software Foundation
#
# This file is part of translate.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

"""Fill localization files with suggested translations based on
translation memory and existing translations.

See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/pretranslate.html
for examples and usage instructions.
"""

from translate.storage import factory
from translate.storage import xliff, po
from translate.search import match

# We don't want to reinitialise the TM each time, so let's store it here.
tmmatcher = None


def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000):
    """Returns the TM store to use. Only initialises on first call."""
    global tmmatcher
    # Only initialise first time
    if tmmatcher is None:
        if isinstance(tmfiles, list):
            tmstore = [factory.getobject(tmfile) for tmfile in tmfiles]
        else:
            tmstore = factory.getobject(tmfiles)
        tmmatcher = match.matcher(tmstore, max_candidates=max_candidates,
                                  min_similarity=min_similarity,
                                  max_length=max_length)
    return tmmatcher


def pretranslate_file(input_file, output_file, template_file, tm=None,
                      min_similarity=75, fuzzymatching=True):
    """Pretranslate any factory supported file with old translations and
    translation memory."""
    input_store = factory.getobject(input_file)
    template_store = None
    if template_file is not None:
        template_store = factory.getobject(template_file)

    output = pretranslate_store(input_store, template_store, tm,
                                min_similarity, fuzzymatching)
    output_file.write(str(output))
    return 1


def match_template_location(input_unit, template_store):
    """Returns a matching unit from a template. matching based on locations"""
    # we want to use slightly different matching strategies for PO files
    # generated by our own moz2po and oo2po. Let's take a cheap shot at
    # detecting them from the presence of a ':' in the first location.
    locations = input_unit.getlocations()
    if not locations or ":" in locations[0]:
        return match_template_id(input_unit, template_store)

    # since oo2po and moz2po use location as unique identifiers for strings
    # we match against location first, then check for matching source strings
    # this makes no sense for normal gettext files
    for location in locations:
        matching_unit = template_store.locationindex.get(location, None)
        if (matching_unit is not None and
            matching_unit.source == input_unit.source and
            matching_unit.gettargetlen() > 0):
            return matching_unit


def match_template_id(input_unit, template_store):
    """Returns a matching unit from a template. matching based on unit id"""
    matching_unit = template_store.findid(input_unit.getid())
    return matching_unit


def match_source(input_unit, template_store):
    """Returns a matching unit from a template. matching based on unit id"""
    # hack for weird mozilla single letter strings, we don't want to
    # match them by anything but locations
    if len(input_unit.source) > 1:
        matching_unit = template_store.findunit(input_unit.source)
        return matching_unit


def match_fuzzy(input_unit, matchers):
    """Return a fuzzy match from a queue of matchers."""
    for matcher in matchers:
        fuzzycandidates = matcher.matches(input_unit.source)
        if fuzzycandidates:
            return fuzzycandidates[0]


def pretranslate_unit(input_unit, template_store, matchers=None,
                      mark_reused=False, merge_on='id'):
    """Pretranslate a unit or return unchanged if no translation was found.

    :param input_unit: Unit that will be pretranslated.
    :param template_store: Fill input unit with units matching in this store.
    :param matchers: List of fuzzy :class:`~translate.search.match.matcher`
        objects.
    :param mark_reused: Whether to mark old translations as reused or not.
    :param merge_on: Where will the merge matching happen on.
    """
    matching_unit = None

    # Do template matching
    if template_store:
        # :param:`merge_on` supports `location` and `id` for now
        if merge_on == 'location':
            matching_unit = match_template_location(input_unit, template_store)
        else:
            matching_unit = match_template_id(input_unit, template_store)

    if matching_unit and matching_unit.gettargetlen() > 0:
        input_unit.merge(matching_unit, authoritative=True)
    elif matchers:
        # quickly try exact match by source
        matching_unit = match_source(input_unit, template_store)

        if not matching_unit or not matching_unit.gettargetlen():
            #do fuzzy matching
            matching_unit = match_fuzzy(input_unit, matchers)

        if matching_unit and matching_unit.gettargetlen() > 0:
            #FIXME: should we dispatch here instead of this crude type check
            if isinstance(input_unit, xliff.xliffunit):
                #FIXME: what about origin, lang and matchquality
                input_unit.addalttrans(matching_unit.target, origin="fish",
                                       sourcetxt=matching_unit.source)
            else:
                input_unit.merge(matching_unit, authoritative=True)

    #FIXME: ugly hack required by pot2po to mark old
    #translations reused for new file. loops over
    if mark_reused and matching_unit and template_store:
        original_unit = template_store.findunit(matching_unit.source)
        if original_unit is not None:
            original_unit.reused = True

    return input_unit


def pretranslate_store(input_store, template_store, tm=None,
                       min_similarity=75, fuzzymatching=True):
    """Do the actual pretranslation of a whole store."""
    #preperation
    matchers = []
    #prepare template
    if template_store is not None:
        template_store.makeindex()
        #template preparation based on type
        prepare_template = "prepare_template_%s" % template_store.__class__.__name__
        if prepare_template in globals():
            globals()[prepare_template](template_store)

        if fuzzymatching:
            #create template matcher
            #FIXME: max_length hardcoded
            matcher = match.matcher(template_store, max_candidates=1,
                                    min_similarity=min_similarity,
                                    max_length=3000, usefuzzy=True)
            matcher.addpercentage = False
            matchers.append(matcher)

    #prepare tm
    #create tm matcher
    if tm and fuzzymatching:
        #FIXME: max_length hardcoded
        matcher = memory(tm, max_candidates=1, min_similarity=min_similarity,
                         max_length=1000)
        matcher.addpercentage = False
        matchers.append(matcher)

    # Main loop
    for input_unit in input_store.units:
        if  input_unit.istranslatable():
            input_unit = pretranslate_unit(input_unit, template_store,
                                           matchers,
                                           merge_on=input_store.merge_on)

    return input_store


def main(argv=None):
    from translate.convert import convert
    formats = {
        "pot": ("po", pretranslate_file),
                ("pot", "po"): ("po", pretranslate_file),
        "po": ("po", pretranslate_file),
               ("po", "po"): ("po", pretranslate_file),
        "xlf": ("xlf", pretranslate_file),
                ("xlf", "xlf"): ("xlf", pretranslate_file),
    }
    parser = convert.ConvertOptionParser(formats, usetemplates=True,
                                         allowmissingtemplate=True,
                                         description=__doc__)
    parser.add_option("", "--tm", dest="tm", default=None,
                      help="The file to use as translation memory when fuzzy matching")
    parser.passthrough.append("tm")
    defaultsimilarity = 75
    parser.add_option("-s", "--similarity", dest="min_similarity",
                      default=defaultsimilarity, type="float",
                      help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity)
    parser.passthrough.append("min_similarity")
    parser.add_option("--nofuzzymatching", dest="fuzzymatching",
                      action="store_false", default=True,
                      help="Disable fuzzy matching")
    parser.passthrough.append("fuzzymatching")
    parser.run(argv)


if __name__ == '__main__':
    main()