This file is indexed.

/usr/lib/python3/dist-packages/astroML/stats/_point_statistics.py is in python3-astroml 0.3-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import numpy as np
from scipy import stats

#from scipy.special import erfinv
#sigmaG_factor = 1. / (2 * np.sqrt(2) * erfinv(0.5))
sigmaG_factor = 0.74130110925280102


def mean_sigma(a, axis=None, dtype=None, ddof=0, keepdims=False):
    """Compute mean and standard deviation for an array

    Parameters
    ----------
    a : array_like
        Array containing numbers whose mean is desired. If `a` is not an
        array, a conversion is attempted.
    axis : int, optional
        Axis along which the means are computed. The default is to compute
        the mean of the flattened array.
    dtype : dtype, optional
        Type to use in computing the standard deviation. For arrays of
        integer type the default is float64, for arrays of float types it is
        the same as the array type.
    keepdims : bool, optional
        If this is set to True, the axes which are reduced are left
        in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original `arr`.

    Returns
    -------
    mu : ndarray, see dtype parameter above
        array containing the mean values

    sigma : ndarray, see dtype parameter above.
        array containing the standard deviation

    See Also
    --------
    median_sigmaG : robust rank-based version of this calculation.

    Notes
    -----
    This routine simply calls ``np.mean`` and ``np.std``, passing the
    keyword arguments to them.  It is provided for ease of comparison
    with the function median_sigmaG()
    """
    mu = np.mean(a, axis=axis, dtype=dtype)
    sigma = np.std(a, axis=axis, dtype=dtype, ddof=ddof)

    if keepdims:
        if axis is None:
            newshape = a.ndim * (1,)
        else:
            newshape = np.asarray(a.shape)
            newshape[axis] = 1

        mu = mu.reshape(newshape)
        sigma = sigma.reshape(newshape)

    return mu, sigma


def median_sigmaG(a, axis=None, overwrite_input=False, keepdims=False):
    """Compute median and rank-based estimate of the standard deviation

    Parameters
    ----------
    a : array_like
        Array containing numbers whose mean is desired. If `a` is not an
        array, a conversion is attempted.
    axis : int, optional
        Axis along which the means are computed. The default is to compute
        the mean of the flattened array.
    overwrite_input : bool, optional
       If True, then allow use of memory of input array `a` for
       calculations. The input array will be modified by the call to
       median. This will save memory when you do not need to preserve
       the contents of the input array. Treat the input as undefined,
       but it will probably be fully or partially sorted.
       Default is False. Note that, if `overwrite_input` is True and the
       input is not already an array, an error will be raised.
    keepdims : bool, optional
        If this is set to True, the axes which are reduced are left
        in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original `arr`.

    Returns
    -------
    median : ndarray, see dtype parameter above
        array containing the median values

    sigmaG : ndarray, see dtype parameter above.
        array containing the robust estimator of the standard deviation

    See Also
    --------
    mean_sigma : non-robust version of this calculation
    sigmaG : robust rank-based estimate of standard deviation

    Notes
    -----
    This routine uses a single call to ``np.percentile`` to find the
    quartiles along the given axis, and uses these to compute the
    median and sigmaG:

    median = q50
    sigmaG = (q75 - q25) * 0.7413

    where 0.7413 ~ 1 / (2 sqrt(2) erf^-1(0.5))
    """
    q25, median, q75 = np.percentile(a, [25, 50, 75],
                                     axis=axis,
                                     overwrite_input=overwrite_input)
    sigmaG = sigmaG_factor * (q75 - q25)

    if keepdims:
        if axis is None:
            newshape = a.ndim * (1,)
        else:
            newshape = np.asarray(a.shape)
            newshape[axis] = 1

        median = median.reshape(newshape)
        sigmaG = sigmaG.reshape(newshape)

    return median, sigmaG


def sigmaG(a, axis=None, overwrite_input=False, keepdims=False):
    """Compute the rank-based estimate of the standard deviation

    Parameters
    ----------
    a : array_like
        Array containing numbers whose mean is desired. If `a` is not an
        array, a conversion is attempted.
    axis : int, optional
        Axis along which the means are computed. The default is to compute
        the mean of the flattened array.
    overwrite_input : bool, optional
       If True, then allow use of memory of input array `a` for
       calculations. The input array will be modified by the call to
       median. This will save memory when you do not need to preserve
       the contents of the input array. Treat the input as undefined,
       but it will probably be fully or partially sorted.
       Default is False. Note that, if `overwrite_input` is True and the
       input is not already an array, an error will be raised.
    keepdims : bool, optional
        If this is set to True, the axes which are reduced are left
        in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original `arr`.

    Returns
    -------
    median : ndarray, see dtype parameter above
        array containing the median values

    sigmaG : ndarray, see dtype parameter above.
        array containing the robust estimator of the standard deviation

    See Also
    --------
    median_sigmaG : robust rank-based estimate of mean and standard deviation

    Notes
    -----
    This routine uses a single call to ``np.percentile`` to find the
    quartiles along the given axis, and uses these to compute the
    sigmaG, a robust estimate of the standard deviation sigma:

    sigmaG = 0.7413 * (q75 - q25)

    where 0.7413 ~ 1 / (2 sqrt(2) erf^-1(0.5))
    """
    q25, q75 = np.percentile(a, [25, 75],
                             axis=axis,
                             overwrite_input=overwrite_input)
    sigmaG = sigmaG_factor * (q75 - q25)

    if keepdims:
        if axis is None:
            newshape = a.ndim * (1,)
        else:
            newshape = np.asarray(a.shape)
            newshape[axis] = 1

        sigmaG = sigmaG.reshape(newshape)

    return sigmaG


def fit_bivariate_normal(x, y, robust=False):
    """Fit bivariate normal parameters to a 2D distribution of points

    Parameters
    ----------
    x, y : array_like
        The x, y coordinates of the points

    robust : boolean (optional, default=False)
        If True, then use rank-based statistics which are robust to outliers
        Otherwise, use mean/std statistics which are not robust

    Returns
    -------
    mu : tuple
        (x, y) location of the best-fit bivariate normal
    sigma_1, sigma_2 : float
        The best-fit gaussian widths in the uncorrelated frame
    alpha : float
        The rotation angle in radians of the uncorrelated frame
    """
    x = np.asarray(x)
    y = np.asarray(y)

    assert x.shape == y.shape

    if robust:
        # use quartiles to compute center and spread
        med_x, sigmaG_x = median_sigmaG(x)
        med_y, sigmaG_y = median_sigmaG(y)

        # define the principal variables from Shevlyakov & Smirnov (2011)
        sx = 2 * sigmaG_x
        sy = 2 * sigmaG_y

        u = (x / sx + y / sy) / np.sqrt(2)
        v = (x / sx - y / sy) / np.sqrt(2)

        med_u, sigmaG_u = median_sigmaG(u)
        med_v, sigmaG_v = median_sigmaG(v)

        r_xy = ((sigmaG_u ** 2 - sigmaG_v ** 2) /
                (sigmaG_u ** 2 + sigmaG_v ** 2))

        # rename estimators
        mu_x, mu_y = med_x, med_y
        sigma_x, sigma_y = sigmaG_x, sigmaG_y
    else:
        mu_x = np.mean(x)
        sigma_x = np.std(x)

        mu_y = np.mean(y)
        sigma_y = np.std(y)

        r_xy = stats.pearsonr(x, y)[0]

    # We need to use the full (-180, 180) version of arctan: this is
    # np.arctan2(x, y) = np.arctan(x / y), modulo 180 degrees
    sigma_xy = r_xy * sigma_x * sigma_y
    alpha = 0.5 * np.arctan2(2 * sigma_xy, sigma_x ** 2 - sigma_y ** 2)

    sigma1 = np.sqrt((0.5 * (sigma_x ** 2 + sigma_y ** 2)
                      + np.sqrt(0.25 * (sigma_x ** 2 - sigma_y ** 2) ** 2
                                + sigma_xy ** 2)))
    sigma2 = np.sqrt((0.5 * (sigma_x ** 2 + sigma_y ** 2)
                      - np.sqrt(0.25 * (sigma_x ** 2 - sigma_y ** 2) ** 2
                                + sigma_xy ** 2)))

    return [mu_x, mu_y], sigma1, sigma2, alpha