This file is indexed.

/usr/lib/python2.7/dist-packages/Codeville/lcsmatch.py is in codeville 0.8.0-2.1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# Written by Bram Cohen and Uoti Urpala

from bisect import bisect

def unique_lcs(a, b):
    # set index[line in a] = position of line in a unless
    # unless a is a duplicate, in which case it's set to None
    index = {}
    for i in xrange(len(a)):
        line = a[i]
        if line in index:
            index[line] = None
        else:
            index[line]= i
    # make btoa[i] = position of line i in a, unless
    # that line doesn't occur exactly once in both, 
    # in which case it's set to None
    btoa = [None] * len(b)
    index2 = {}
    for pos, line in enumerate(b):
        next = index.get(line)
        if next is not None:
            if line in index2:
                # unset the previous mapping, which we now know to
                # be invalid because the line isn't unique
                btoa[index2[line]] = None
                del index[line]
            else:
                index2[line] = pos
                btoa[pos] = next
    # this is the Patience sorting algorithm
    # see http://en.wikipedia.org/wiki/Patience_sorting
    backpointers = [None] * len(b)
    stacks = []
    lasts = []
    k = 0
    for bpos, apos in enumerate(btoa):
        if apos is None:
            continue
        # skip over solitary matches with no surrounding matching context
        if (bpos == 0 or apos == 0 or a[apos-1] != b[bpos-1]) and \
                (bpos == len(b)-1 or apos == len(a)-1 or a[apos+1] != b[bpos+1]):
            continue
        # as an optimization, check if the next line comes right after
        # the previous line, because usually it does
        if stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos):
            k += 1
        else:
            k = bisect(stacks, apos)
        if k > 0:
            backpointers[bpos] = lasts[k-1]
        if k < len(stacks):
            stacks[k] = apos
            lasts[k] = bpos
        else:
            stacks.append(apos)
            lasts.append(bpos)
    if len(lasts) == 0:
        return []
    result = []
    k = lasts[-1]
    while k is not None:
        result.append((btoa[k], k))
        k = backpointers[k]
    result.reverse()
    return result

def test_lcs():
    assert unique_lcs('', '') == []
    assert unique_lcs('a', 'a') == []
    assert unique_lcs('a', 'b') == []
    assert unique_lcs('ab', 'ab') == [(0, 0), (1, 1)]
    assert unique_lcs('abcde', 'cdeab') == [(2, 0), (3, 1), (4, 2)]
    assert unique_lcs('cdeab', 'abcde') == [(0, 2), (1, 3), (2, 4)]
    assert unique_lcs('abXde', 'abYde') == [(0, 0), (1, 1), (3, 3), (4, 4)]
    return

def recurse_matches(a, b, ahi, bhi, answer, maxrecursion, is_junk):
    # this will never happen normally, this check is to prevent DOS attacks
    if maxrecursion < 0:
        return
    oldlength = len(answer)
    # check for new matches after the last confirmed match ends
    if oldlength == 0:
        alo = 0
        blo = 0
    else:
        lasta, lastb, lastlen = answer[-1]
        alo = lasta + lastlen
        blo = lastb + lastlen
    if alo == ahi or blo == bhi:
        return
    last_enda = alo
    last_endb = blo
    last_checkb = blo
    # extend individual line matches into match sections
    for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
        apos += alo
        bpos += blo
        # don't overlap with an existing match or check something which
        # already got thrown out as junk
        if bpos < last_checkb or apos < last_enda:
            continue
        # extend line match as far in either direction as possible
        enda = apos + 1
        endb = bpos + 1
        while enda < ahi and endb < bhi and a[enda] == b[endb]:
            enda += 1
            endb += 1
        while apos > last_enda and bpos > last_endb and a[apos-1] == b[bpos-1]:
            apos -= 1
            bpos -= 1
        # mark what's been checked now, so even if it's junked it doesn't
        # have to be checked again
        last_checkb = endb
        # if this section is junk, skip it
        numreal = 0
        for k in xrange(apos, enda):
            if not is_junk(a[k]):
                numreal += 1
                if numreal >= 2:
                    break
        else:
            # Niklaus Wirth can bite me
            continue
        last_enda = enda
        last_endb = endb
        # find matches which come before the new match section
        # this can happen because there may be lines which weren't unique
        # in the whole file but are unique in the subsection
        recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1, is_junk)
        answer.append((apos, bpos, enda - apos))
    if len(answer) > oldlength:
        # find matches between the last match and the end
        recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1, is_junk)
    # else: fall back to difflib (possibly a good idea, possibly not)

def default_is_junk(x):
    return len(x.strip()) <= 2

def find_matches(a, b, is_junk = default_is_junk):
    # single-line identical files match, despite being too short for
    # a real match if they were part of a larger file
    if a == b:
        return [(0, 0, len(a))]
    answer = []
    recurse_matches(a, b, len(a), len(b), answer, 10, is_junk)
    return answer

try:
    import psyco
    psyco.bind(unique_lcs, 0)
except ImportError:
    pass


# Stuff below here is for testing

def x(a, b):
    t1 = time.time()
    r = unique_lcs(a, b)
    t2 = time.time()
    #print r
    for i, j in r:
        assert a[i] == b[j]
        #print a[i]

    print
    print 'time:', t2-t1

def return_false(x):
    return False

def x2(a, b, is_junk = return_false):
    t1 = time.time()
    r = find_matches(a, b, is_junk)
    t2 = time.time()
    #print r
    for i, j, l in r:
        assert a[i:i+l] == b[j:j+l]
        #print ''.join(a[i:i+l]),

    print
    print 'time:', t2-t1


def completely_random_test(x):
    a = [str(i) for i in range(100000)]
    b = list(a)
    random.shuffle(b)
    #print ' '.join(b)
    #print
    x(a, b)

def random_with_ranges_test(x):
    d = [[str(i), str(i+1)] for i in xrange(0, 100000, 2)]
    c = list(d)
    random.shuffle(c)
    a = []
    for i in d:
        a.extend(i)
    b = []
    for i in c:
        b.extend(i)
    #print ' '.join(b)
    #print
    x(a, b)

def is_A_test(s):
    return s == 'A'

def test_lcsmatch():
    global random, time
    import random
    import time

    cur_time = time.time()
    random.seed(cur_time)

    print 'Seeded tests with %s' % (cur_time,)

    completely_random_test(x)
    random_with_ranges_test(x)
    x2('0123456789abc', '    01 89a 34 67')
    x2('AB 1 CD 1 AB P XY Q AB 1 CD 1 AB', 'AB 2 CD 2 AB Q XY P AB 2 CD 2 AB')
    x2('AjBjC', 'AjC')
    x2('AjC', 'AjBjC')
    x2('AjBjC', 'AjC', is_A_test)
    x2('AjC', 'AjBjC', is_A_test)
    x2('x', 'x')
    x2('01 2', '01')
    x2('ABPXYQAB', 'ABQXYPAB')
    return