/usr/lib/python3/dist-packages/astroML/stats/_point_statistics.py is in python3-astroml 0.3-6.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | import numpy as np
from scipy import stats
#from scipy.special import erfinv
#sigmaG_factor = 1. / (2 * np.sqrt(2) * erfinv(0.5))
sigmaG_factor = 0.74130110925280102
def mean_sigma(a, axis=None, dtype=None, ddof=0, keepdims=False):
"""Compute mean and standard deviation for an array
Parameters
----------
a : array_like
Array containing numbers whose mean is desired. If `a` is not an
array, a conversion is attempted.
axis : int, optional
Axis along which the means are computed. The default is to compute
the mean of the flattened array.
dtype : dtype, optional
Type to use in computing the standard deviation. For arrays of
integer type the default is float64, for arrays of float types it is
the same as the array type.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.
Returns
-------
mu : ndarray, see dtype parameter above
array containing the mean values
sigma : ndarray, see dtype parameter above.
array containing the standard deviation
See Also
--------
median_sigmaG : robust rank-based version of this calculation.
Notes
-----
This routine simply calls ``np.mean`` and ``np.std``, passing the
keyword arguments to them. It is provided for ease of comparison
with the function median_sigmaG()
"""
mu = np.mean(a, axis=axis, dtype=dtype)
sigma = np.std(a, axis=axis, dtype=dtype, ddof=ddof)
if keepdims:
if axis is None:
newshape = a.ndim * (1,)
else:
newshape = np.asarray(a.shape)
newshape[axis] = 1
mu = mu.reshape(newshape)
sigma = sigma.reshape(newshape)
return mu, sigma
def median_sigmaG(a, axis=None, overwrite_input=False, keepdims=False):
"""Compute median and rank-based estimate of the standard deviation
Parameters
----------
a : array_like
Array containing numbers whose mean is desired. If `a` is not an
array, a conversion is attempted.
axis : int, optional
Axis along which the means are computed. The default is to compute
the mean of the flattened array.
overwrite_input : bool, optional
If True, then allow use of memory of input array `a` for
calculations. The input array will be modified by the call to
median. This will save memory when you do not need to preserve
the contents of the input array. Treat the input as undefined,
but it will probably be fully or partially sorted.
Default is False. Note that, if `overwrite_input` is True and the
input is not already an array, an error will be raised.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.
Returns
-------
median : ndarray, see dtype parameter above
array containing the median values
sigmaG : ndarray, see dtype parameter above.
array containing the robust estimator of the standard deviation
See Also
--------
mean_sigma : non-robust version of this calculation
sigmaG : robust rank-based estimate of standard deviation
Notes
-----
This routine uses a single call to ``np.percentile`` to find the
quartiles along the given axis, and uses these to compute the
median and sigmaG:
median = q50
sigmaG = (q75 - q25) * 0.7413
where 0.7413 ~ 1 / (2 sqrt(2) erf^-1(0.5))
"""
q25, median, q75 = np.percentile(a, [25, 50, 75],
axis=axis,
overwrite_input=overwrite_input)
sigmaG = sigmaG_factor * (q75 - q25)
if keepdims:
if axis is None:
newshape = a.ndim * (1,)
else:
newshape = np.asarray(a.shape)
newshape[axis] = 1
median = median.reshape(newshape)
sigmaG = sigmaG.reshape(newshape)
return median, sigmaG
def sigmaG(a, axis=None, overwrite_input=False, keepdims=False):
"""Compute the rank-based estimate of the standard deviation
Parameters
----------
a : array_like
Array containing numbers whose mean is desired. If `a` is not an
array, a conversion is attempted.
axis : int, optional
Axis along which the means are computed. The default is to compute
the mean of the flattened array.
overwrite_input : bool, optional
If True, then allow use of memory of input array `a` for
calculations. The input array will be modified by the call to
median. This will save memory when you do not need to preserve
the contents of the input array. Treat the input as undefined,
but it will probably be fully or partially sorted.
Default is False. Note that, if `overwrite_input` is True and the
input is not already an array, an error will be raised.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.
Returns
-------
median : ndarray, see dtype parameter above
array containing the median values
sigmaG : ndarray, see dtype parameter above.
array containing the robust estimator of the standard deviation
See Also
--------
median_sigmaG : robust rank-based estimate of mean and standard deviation
Notes
-----
This routine uses a single call to ``np.percentile`` to find the
quartiles along the given axis, and uses these to compute the
sigmaG, a robust estimate of the standard deviation sigma:
sigmaG = 0.7413 * (q75 - q25)
where 0.7413 ~ 1 / (2 sqrt(2) erf^-1(0.5))
"""
q25, q75 = np.percentile(a, [25, 75],
axis=axis,
overwrite_input=overwrite_input)
sigmaG = sigmaG_factor * (q75 - q25)
if keepdims:
if axis is None:
newshape = a.ndim * (1,)
else:
newshape = np.asarray(a.shape)
newshape[axis] = 1
sigmaG = sigmaG.reshape(newshape)
return sigmaG
def fit_bivariate_normal(x, y, robust=False):
"""Fit bivariate normal parameters to a 2D distribution of points
Parameters
----------
x, y : array_like
The x, y coordinates of the points
robust : boolean (optional, default=False)
If True, then use rank-based statistics which are robust to outliers
Otherwise, use mean/std statistics which are not robust
Returns
-------
mu : tuple
(x, y) location of the best-fit bivariate normal
sigma_1, sigma_2 : float
The best-fit gaussian widths in the uncorrelated frame
alpha : float
The rotation angle in radians of the uncorrelated frame
"""
x = np.asarray(x)
y = np.asarray(y)
assert x.shape == y.shape
if robust:
# use quartiles to compute center and spread
med_x, sigmaG_x = median_sigmaG(x)
med_y, sigmaG_y = median_sigmaG(y)
# define the principal variables from Shevlyakov & Smirnov (2011)
sx = 2 * sigmaG_x
sy = 2 * sigmaG_y
u = (x / sx + y / sy) / np.sqrt(2)
v = (x / sx - y / sy) / np.sqrt(2)
med_u, sigmaG_u = median_sigmaG(u)
med_v, sigmaG_v = median_sigmaG(v)
r_xy = ((sigmaG_u ** 2 - sigmaG_v ** 2) /
(sigmaG_u ** 2 + sigmaG_v ** 2))
# rename estimators
mu_x, mu_y = med_x, med_y
sigma_x, sigma_y = sigmaG_x, sigmaG_y
else:
mu_x = np.mean(x)
sigma_x = np.std(x)
mu_y = np.mean(y)
sigma_y = np.std(y)
r_xy = stats.pearsonr(x, y)[0]
# We need to use the full (-180, 180) version of arctan: this is
# np.arctan2(x, y) = np.arctan(x / y), modulo 180 degrees
sigma_xy = r_xy * sigma_x * sigma_y
alpha = 0.5 * np.arctan2(2 * sigma_xy, sigma_x ** 2 - sigma_y ** 2)
sigma1 = np.sqrt((0.5 * (sigma_x ** 2 + sigma_y ** 2)
+ np.sqrt(0.25 * (sigma_x ** 2 - sigma_y ** 2) ** 2
+ sigma_xy ** 2)))
sigma2 = np.sqrt((0.5 * (sigma_x ** 2 + sigma_y ** 2)
- np.sqrt(0.25 * (sigma_x ** 2 - sigma_y ** 2) ** 2
+ sigma_xy ** 2)))
return [mu_x, mu_y], sigma1, sigma2, alpha
|