This file is indexed.

/usr/lib/python3/dist-packages/csb/statistics/__init__.py is in python3-csb 1.2.2+dfsg-2ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
"""
Statistics root package.

This package contains a number of common statistical utilities. Sub-packages
provide more specialized APIs, for example L{csb.statistics.pdf} defines the
probability density object model.
"""

class Cumulative(object):

    total_mem = 1e8

    def __init__(self, data):

        self.data = data

    def __call__(self, x, nchunks=None):

        from numpy import greater, reshape, concatenate
        
        c = []
        x = reshape(x, (-1,))

        if nchunks is None:

            total_size = len(x) * len(self.data)
            nchunks = total_size / self.total_mem + int(total_size % self.total_mem != 0)
            
        size = len(x) / nchunks + int(len(x) % nchunks != 0)
        
        while len(x):

            y = x[:size]
            x = x[size:]

            c = concatenate((c, greater.outer(y, self.data).sum(1) / float(len(self.data))))

        return c
    
    def cumulative_density(self, x, nchunks=None):
        return 1 - self.__call__(x, nchunks)

def geometric_mean(x, axis=None):
    """
    @param x:
    @param axis: compute the geometric mean along this axis 

    @return: geometric mean of x
    """
    from numpy import exp, log, clip, mean

    return exp(mean(log(clip(x, 1e-300, 1e300)), axis))

def harmonic_mean(x, axis=None):
    """
    @param x:
    @param axis: compute the harmonic mean along this axis 

    @return: harmonic mean of x
    """
    from numpy import mean

    return 1 / mean(1 / x, axis)

def kurtosis(x, axis=None):
    """
    @param x: random variables
    @param axis: compute the kurtosis along this axis

    @return: Sample kurtosis of x
    """
    from numpy import mean, std

    m = x.mean(axis)
    a = mean((x - m) ** 4, axis)
    s = std(x, axis)

    return a / s ** 4 - 3

def skewness(x, axis=None):
    """
    @param x: random variables
    @param axis: compute the skewness along this axis

    @return: Sample skewness of x
    """
    from numpy import mean, std

    s = std(x)
    return mean((x - x.mean()) ** 3, axis) / s ** 3
    
def autocorrelation(x, n):
    """
    auto-correlation of a times series

    @param x: time series
    @type x: numpy.array
    @param n: Maximal lag for which to compute the auto-correlation
    @type n: int
    """
    from numpy import array, mean, std
    x = x - x.mean()
    return array([mean(x[i:] * x[:len(x) - i]) for i in range(n)]) / std(x)**2

def probabilistic_and(p, axis=0):
    """
    Probabilistic version of AND
    """
    from numpy import array, multiply
    return multiply.reduce(array(p), axis=axis)

def probabilistic_or(p, axis=0):
    """
    Probabilistic version of OR
    """
    from numpy import array
    return 1 - probabilistic_and(1 - array(p), axis)

def probabilistic_xor(p, axis=0):
    """
    Probabilistic version of XOR.
    Works only for axis=0.
    """
    from numpy import array

    p = array(p)
    p_not = 1 - p
    
    P = []

    for i in range(p.shape[axis]):
        x = p_not * 1
        x[i] = p[i]
        P.append(probabilistic_and(x, 0))

    return probabilistic_or(P, 0)

def principal_coordinates(D, nd=None):
    """
    Reconstruction of a multidimensional configuration that
    optimally reproduces the input distance matrix.

    See: Gower, J (1966)
    """
    from numpy import clip, sqrt, take, argsort, sort
    from csb.numeric import reverse
    from scipy.linalg import eigh
    
    ## calculate centered similarity matrix
    
    B = -clip(D, 1e-150, 1e150) ** 2 / 2.

    b = B.mean(0)

    B = B - b
    B = (B.T - b).T
    B += b.mean()

    ## calculate spectral decomposition

    v, U = eigh(B)
    v = v.real
    U = U.real

    U = take(U, argsort(v), 1)
    v = sort(v)

    U = reverse(U, 1)
    v = reverse(v)
    
    if nd is None: nd = len(v)

    X = U[:, :nd] * sqrt(clip(v[:nd], 0., 1e300))

    return X

def entropy(p):
    """
    Calculate the entropy of p. 
    @return: entropy of p
    """
    from csb.numeric import log
    from numpy import sum
    
    return -sum(p * log(p))

def histogram2D(x, nbins=100, axes=None, nbatch=1000, normalize=True):
    """
    Non-greedy two-dimensional histogram.

    @param x: input array of rank two
    @type x: numpy array
    @param nbins: number of bins
    @type nbins: integer
    @param axes: x- and y-axes used for binning the data (if provided this will be used instead of <nbins>)
    @type axes: tuple of two one-dimensional numpy arrays
    @param nbatch: size of batch that is used to sort the data into the 2D grid
    @type nbatch: integer
    @param normalize: specifies whether histogram should be normalized
    @type normalize: boolean

    @return: 2-rank array storing histogram, tuple of x- and y-axis
    """
    from numpy import linspace, zeros, argmin, fabs, subtract, transpose
    
    if axes is None:
        
        lower, upper = x.min(0), x.max(0)
        axes = [linspace(lower[i], upper[i], nbins) for i in range(lower.shape[0])]

    H = zeros((len(axes[0]), len(axes[1])))

    while len(x):

        y = x[:nbatch]
        x = x[nbatch:]

        I = transpose([argmin(fabs(subtract.outer(y[:, i], axes[i])), 1) for i in range(2)])

        for i, j in I: H[i, j] += 1

    if normalize:
        H = H / H.sum() / (axes[0][1] - axes[0][0]) / (axes[1][1] - axes[1][0])

    return H, axes

def histogram_nd(x, nbins=100, axes=None, nbatch=1000, normalize=True):
    """
    Non-greedy n-dimemsional histogram.

    @param x: input array of rank (-1,n)
    @type x: numpy array
    @param nbins: number of bins
    @type nbins: integer
    @param axes: axes used for binning the data (if provided this will be used instead of <nbins>)
    @type axes: tuple of two one-dimensional numpy arrays
    @param nbatch: size of batch that is used to sort the data into the nD grid
    @type nbatch: integer
    @param normalize: specifies whether histogram should be normalized
    @type normalize: boolean

    @return: n-rank array storing histogram, tuple of axes
    """
    import numpy as np

    if len(x.shape) == 1:
        x = np.reshape(x, (-1,1))

    d = x.shape[1]
    
    if axes is None:
        
        lower, upper = x.min(0), x.max(0)
        axes = [np.linspace(lower[i], upper[i], nbins) for i in range(d)]

    shape = tuple(map(len, axes))

    H = np.zeros(shape)
    ## MH: was like that before...
    ## s = np.multiply.accumulate(np.array((1,) + H.shape[:-1]))[::-1]
    s = np.multiply.accumulate(np.array((1,) + H.shape[1:]))[::-1]
    H = H.flatten()
    
    while len(x):

        y = x[:nbatch]
        x = x[nbatch:]

        I = np.transpose([np.argmin(np.fabs(np.subtract.outer(y[:, i], axes[i])), 1)
                          for i in range(d)])
        I = np.dot(I, s)
        I = np.sort(I)

        i = list(set(I.tolist()))
        n = np.equal.outer(I, i).sum(0)
        
        H[i] += n
        
    if normalize:
        H = H / H.sum() / np.multiply.reduce([axes[i][1] - axes[i][0] for i in range(d)])

    H = np.reshape(H, shape)

    return H, axes

def density(x, nbins, normalize=True):
    """
    Histogram of univariate input data: basically calls numpy's histogram method and
    does a proper normalization.

    @param x: input numpy array
    @param nbins: number of bins
    @type nbins: integer
    @param normalize: if true, histogram will be normalized
    """
    from numpy import histogram
    
    hy, hx = histogram(x, nbins)
    hx = 0.5 * (hx[1:] + hx[:-1])
    hy = hy.astype('d')
    if normalize:
        hy /= (hx[1] - hx[0]) * hy.sum()

    return hx, hy

def circvar(a, axis=None):
    """
    Calculate circular variance of a circular variable.

    @param a: input array
    @param axis: axis along which mean is calculated
    @type axis: None or integer    
    """
    from numpy import average, cos, sin
    return 1 - average(cos(a), axis) ** 2 - average(sin(a), axis) ** 2

def circmean(a, axis=None):
    """
    Estimate mean of a circular variable

    @param a: input array
    @param axis: axis along which mean is calculated
    @type axis: None or integer
    """
    from numpy import sin, cos, arctan2, average
    return arctan2(average(sin(a), axis), average(cos(a), axis))

def running_average(x, w, axis=None):
    """
    Calculates a running average for given window size

    @param x: input array
    @param w: window size
    @type w: integer
    @param axis: axis along which mean is calculated
    """
    from numpy import array, mean
    return array([mean(x[i:i + w], axis) for i in range(len(x) - w)])

def weighted_median(x, w):
    """
    Calculates the weighted median, that is the minimizer of
    argmin {\sum w_i |x_i - \mu|}

    @param x: input array
    @param w: array of weights
    """
    from numpy import sum, add, argsort, sort

    w = w / w.sum()
    w = w[argsort(x)]
    x = sort(x)
    j = sum(add.accumulate(w) < 0.5)

    return x[j]