/usr/share/pyshared/sympy/statistics/distributions.py is in python-sympy 0.7.1.rc1-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 | from sympy.core import sympify, Lambda, Dummy, Integer, Rational, oo, Float, pi
from sympy.functions import sqrt, exp, erf
from sympy.printing import sstr
import random
class Sample(tuple):
"""
Sample([x1, x2, x3, ...]) represents a collection of samples.
Sample parameters like mean, variance and stddev can be accessed as
properties.
The sample will be sorted.
"""
def __new__(cls, sample):
s = tuple.__new__(cls, sorted(sample))
s.mean = mean = sum(s) / Integer(len(s))
s.variance = sum([(x-mean)**2 for x in s]) / Integer(len(s))
s.stddev = sqrt(s.variance)
if len(s) % 2:
s.median = s[len(s)//2]
else:
s.median = sum(s[len(s)//2-1:len(s)//2+1]) / Integer(2)
return s
def __repr__(self):
return sstr(self)
def __str__(self):
return sstr(self)
class ContinuousProbability(object):
"""Base class for continuous probability distributions"""
def probability(s, a, b):
"""Calculate the probability that a random number x generated
from the distribution satisfies a <= x <= b """
return s.cdf(b) - s.cdf(a)
def random(s, n=None):
"""
random() -- generate a random number from the distribution.
random(n) -- generate a Sample of n random numbers.
"""
if n is None:
return s._random()
else:
return Sample([s._random() for i in xrange(n)])
def __repr__(self):
return sstr(self)
def __str__(self):
return sstr(self)
class Normal(ContinuousProbability):
"""
Normal(mu, sigma) represents the normal or Gaussian distribution
with mean value mu and standard deviation sigma.
Example usage:
>>> from sympy.statistics import Normal
>>> from sympy import oo
>>> N = Normal(1, 2)
>>> N.mean
1
>>> N.variance
4
>>> N.probability(-oo, 1) # probability on an interval
1/2
>>> N.probability(1, oo)
1/2
>>> N.probability(-oo, oo)
1
>>> N.probability(-1, 3)
erf(2**(1/2)/2)
>>> _.evalf()
0.682689492137086
"""
def __init__(self, mu, sigma):
self.mu = sympify(mu)
self.sigma = sympify(sigma)
mean = property(lambda s: s.mu)
median = property(lambda s: s.mu)
mode = property(lambda s: s.mu)
stddev = property(lambda s: s.sigma)
variance = property(lambda s: s.sigma**2)
def pdf(s, x):
"""Return the probability density function as an expression in x"""
x = sympify(x)
return 1/(s.sigma*sqrt(2*pi)) * exp(-(x-s.mu)**2 / (2*s.sigma**2))
def cdf(s, x):
"""Return the cumulative density function as an expression in x"""
x = sympify(x)
return (1+erf((x-s.mu)/(s.sigma*sqrt(2))))/2
def _random(s):
return random.gauss(float(s.mu), float(s.sigma))
def confidence(s, p):
"""Return a symmetric (p*100)% confidence interval. For example,
p=0.95 gives a 95% confidence interval. Currently this function
only handles numerical values except in the trivial case p=1.
Examples usage:
# One standard deviation
>>> from sympy.statistics import Normal
>>> N = Normal(0, 1)
>>> N.confidence(0.68)
(-0.994457883209753, 0.994457883209753)
>>> N.probability(*_).evalf()
0.680000000000000
# Two standard deviations
>>> N = Normal(0, 1)
>>> N.confidence(0.95)
(-1.95996398454005, 1.95996398454005)
>>> N.probability(*_).evalf()
0.950000000000000
"""
if p == 1:
return (-oo, oo)
assert p <= 1
# In terms of n*sigma, we have n = sqrt(2)*ierf(p). The inverse
# error function is not yet implemented in SymPy but can easily be
# computed numerically
from sympy.mpmath import mpf, erfinv
# calculate y = ierf(p) by solving erf(y) - p = 0
y = erfinv(mpf(p))
t = Float(str(mpf(float(s.sigma)) * mpf(2)**0.5 * y))
mu = s.mu.evalf()
return (mu-t, mu+t)
@staticmethod
def fit(sample):
"""Create a normal distribution fit to the mean and standard
deviation of the given distribution or sample."""
if not hasattr(sample, "stddev"):
sample = Sample(sample)
return Normal(sample.mean, sample.stddev)
class Uniform(ContinuousProbability):
"""
Uniform(a, b) represents a probability distribution with uniform
probability density on the interval [a, b] and zero density
everywhere else.
"""
def __init__(self, a, b):
self.a = sympify(a)
self.b = sympify(b)
mean = property(lambda s: (s.a+s.b)/2)
median = property(lambda s: (s.a+s.b)/2)
mode = property(lambda s: (s.a+s.b)/2) # arbitrary
variance = property(lambda s: (s.b-s.a)**2 / 12)
stddev = property(lambda s: sqrt(s.variance))
def pdf(s, x):
"""Return the probability density function as an expression in x"""
x = sympify(x)
if not x.is_Number:
raise NotImplementedError("SymPy does not yet support"
"piecewise functions")
if x < s.a or x > s.b:
return Rational(0)
return 1/(s.b-s.a)
def cdf(s, x):
"""Return the cumulative density function as an expression in x"""
x = sympify(x)
if not x.is_Number:
raise NotImplementedError("SymPy does not yet support"
"piecewise functions")
if x <= s.a:
return Rational(0)
if x >= s.b:
return Rational(1)
return (x-s.a)/(s.b-s.a)
def _random(s):
return Float(random.uniform(float(s.a), float(s.b)))
def confidence(s, p):
"""Generate a symmetric (p*100)% confidence interval.
>>> from sympy import Rational
>>> from sympy.statistics import Uniform
>>> U = Uniform(1, 2)
>>> U.confidence(1)
(1, 2)
>>> U.confidence(Rational(1,2))
(5/4, 7/4)
"""
p = sympify(p)
assert p <= 1
d = (s.b-s.a)*p / 2
return (s.mean - d, s.mean + d)
@staticmethod
def fit(sample):
"""Create a uniform distribution fit to the mean and standard
deviation of the given distribution or sample."""
if not hasattr(sample, "stddev"):
sample = Sample(sample)
m = sample.mean
d = sqrt(12*sample.variance)/2
return Uniform(m-d, m+d)
class PDF(ContinuousProbability):
"""
PDF(func, (x, a, b)) represents continuous probability distribution
with probability distribution function func(x) on interval (a, b)
If func is not normalized so that integrate(func, (x, a, b)) == 1,
it can be normalized using PDF.normalize() method
Example usage:
>>> from sympy import Symbol, exp, oo
>>> from sympy.statistics.distributions import PDF
>>> from sympy.abc import x
>>> a = Symbol('a', positive=True)
>>> exponential = PDF(exp(-x/a)/a, (x,0,oo))
>>> exponential.pdf(x)
exp(-x/a)/a
>>> exponential.cdf(x)
1 - exp(-x/a)
>>> exponential.mean
a
>>> exponential.variance
a**2
"""
def __init__(self, pdf, var):
#XXX maybe add some checking of parameters
if isinstance(var, (tuple, list)):
self.pdf = Lambda(var[0], pdf)
self.domain = tuple(var[1:])
else:
self.pdf = Lambda(var, pdf)
self.domain = (-oo, oo)
self._cdf = None
self._mean = None
self._variance = None
self._stddev = None
def normalize(self):
"""
Normalize the probability distribution function so that
integrate(self.pdf(x), (x, a, b)) == 1
Example usage:
>>> from sympy import Symbol, exp, oo
>>> from sympy.statistics.distributions import PDF
>>> from sympy.abc import x
>>> a = Symbol('a', positive=True)
>>> exponential = PDF(exp(-x/a), (x,0,oo))
>>> exponential.normalize().pdf(x)
exp(-x/a)/a
"""
norm = self.probability(*self.domain)
if norm != 1:
w = Dummy('w', real=True)
return self.__class__(self.pdf(w)/norm, (w, self.domain[0], self.domain[1]))
#self._cdf = Lambda(w, (self.cdf(w) - self.cdf(self.domain[0]))/norm)
#if self._mean is not None:
# self._mean /= norm
#if self._variance is not None:
# self._variance = (self._variance + (self._mean*norm)**2)/norm - self.mean**2
#if self._stddev is not None:
# self._stddev = sqrt(self._variance)
else:
return self
def cdf(self, x):
x = sympify(x)
if self._cdf is not None:
return self._cdf(x)
else:
from sympy import integrate
w = Dummy('w', real=True)
self._cdf = integrate(self.pdf(w), w)
self._cdf = Lambda(w, self._cdf - self._cdf.subs(w, self.domain[0]))
return self._cdf(x)
def _get_mean(self):
if self._mean is not None:
return self._mean
else:
from sympy import integrate
w = Dummy('w', real=True)
self._mean = integrate(self.pdf(w)*w,(w,self.domain[0],self.domain[1]))
return self._mean
def _get_variance(self):
if self._variance is not None:
return self._variance
else:
from sympy import integrate, simplify
w = Dummy('w', real=True)
self._variance = integrate(self.pdf(w)*w**2,(w,self.domain[0],self.domain[1])) - self.mean**2
self._variance = simplify(self._variance)
return self._variance
def _get_stddev(self):
if self._stddev is not None:
return self._stddev
else:
self._stddev = sqrt(self.variance)
return self._stddev
mean = property(_get_mean)
variance = property(_get_variance)
stddev = property(_get_stddev)
def _random(s):
raise NotImplementedError
def transform(self,func,var):
"""Return a probability distribution of random variable func(x)
currently only some simple injective functions are supported"""
w = Dummy('w', real=True)
from sympy import solve
inverse = solve(func-w, var)
newPdf = S.Zero
funcdiff = func.diff(var)
#TODO check if x is in domain
for x in inverse:
# this assignment holds only for x in domain
# in general it would require implementing
# piecewise defined functions in sympy
newPdf += (self.pdf(var)/abs(funcdiff)).subs(var,x)
return PDF(newPdf, (w, func.subs(var, self.domain[0]), func.subs(var, self.domain[1])))
|