/usr/lib/python3/dist-packages/astroML/density_estimation/empirical.py is in python3-astroml 0.3-6.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | import numpy as np
from scipy import interpolate
from ..utils import check_random_state
class FunctionDistribution(object):
"""Generate random variables distributed according to an arbitrary function
Parameters
----------
func : function
func should take an array of x values, and return an array
proportional to the probability density at each value
xmin : float
minimum value of interest
xmax : float
maximum value of interest
Nx : int (optional)
number of samples to draw. Default is 1000
random_state : None, int, or np.random.RandomState instance
random seed or random number generator
func_args : dictionary (optional)
additional keyword arguments to be passed to func
"""
def __init__(self, func, xmin, xmax, Nx=1000,
random_state=None, func_args=None):
self.random_state = check_random_state(random_state)
if func_args is None:
func_args = {}
x = np.linspace(xmin, xmax, Nx)
Px = func(x, **func_args)
# if there are too many zeros, interpolation will fail
positive = (Px > 1E-10 * Px.max())
x = x[positive]
Px = Px[positive].cumsum()
Px /= Px[-1]
self._tck = interpolate.splrep(Px, x)
def rvs(self, shape):
"""Draw random variables from the distribution
Parameters
----------
shape : integer or tuple
shape of desired array
Returns
-------
rv : ndarray, shape=shape
random variables
"""
# generate uniform variables between 0 and 1
y = self.random_state.random_sample(shape)
return interpolate.splev(y, self._tck)
class EmpiricalDistribution(object):
"""Empirically learn a distribution from one-dimensional data
Parameters
----------
data : one-dimensional array
input data
Examples
--------
>>> import numpy as np
>>> np.random.seed(0)
>>> x = np.random.normal(size=10000) # normally-distributed variables
>>> x.mean(), x.std()
(-0.018433720158265783, 0.98755656817612003)
>>> x2 = EmpiricalDistribution(x).rvs(10000)
>>> x2.mean(), x2.std()
(-0.020293716681613363, 1.0039249294845276)
Notes
-----
This function works by approximating the inverse of the cumulative
distribution using an efficient spline fit to the sorted values.
"""
def __init__(self, data):
# copy, because we'll need to sort in-place
data = np.array(data, copy=True)
if data.ndim != 1:
raise ValueError("data should be one-dimensional")
data.sort()
# set up spline
y = np.linspace(0, 1, data.size)
self._tck = interpolate.splrep(y, data)
def rvs(self, shape):
"""Draw random variables from the distribution
Parameters
----------
shape : integer or tuple
shape of desired array
Returns
-------
rv : ndarray, shape=shape
random variables
"""
# generate uniform variables between 0 and 1
y = np.random.random(shape)
return interpolate.splev(y, self._tck)
|