/usr/share/pyshared/statsmodels/miscmodels/count.py is in python-statsmodels 0.4.2-1.2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 | # -*- coding: utf-8 -*-
Created on Mon Jul 26 08:34:59 2010
Author: josef-pktd
added offset and zero-inflated version of Poisson
- kind of ok, need better test cases,
- a nan in ZIP bse, need to check hessian calculations
- found error in ZIP loglike
- all tests pass with
* If true model is not zero-inflated then numerical Hessian for ZIP has zeros
for the inflation probability and is not invertible.
-> hessian inverts and bse look ok if row and column are dropped, pinv also works
* GenericMLE: still get somewhere (where?)
"CacheWriteWarning: The attribute 'bse' cannot be overwritten"
* bfgs is too fragile, doesn't come back
* `nm` is slow but seems to work
* need good start_params and their use in genericmle needs to be checked for
consistency, set as attribute or method (called as attribute)
* numerical hessian needs better scaling
* check taking parts out of the loop, e.g. factorial(endog) could be precalculated
import numpy as np
from scipy import stats
from scipy.misc import factorial
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
def maxabs(arr1, arr2):
return np.max(np.abs(arr1 - arr2))
def maxabsrel(arr1, arr2):
return np.max(np.abs(arr2 / arr1 - 1))
class NonlinearDeltaCov(object):
'''Asymptotic covariance by Deltamethod
the function is designed for 2d array, with rows equal to
the number of equations and columns equal to the number
of parameters. 1d params work by chance ?
fun: R^{m*k) -> R^{m} where m is number of equations and k is
the number of parameters.
equations follow Greene
def __init__(self, fun, params, cov_params):
self.fun = fun
self.params = params
self.cov_params = cov_params
def grad(self, params=None, **kwds):
if params is None:
params = self.params
kwds.setdefault('epsilon', 1e-4)
from statsmodels.sandbox.regression.numdiff import approx_fprime1
return approx_fprime1(params, self.fun, **kwds)
def cov(self):
g = self.grad()
covar = np.dot(np.dot(g, self.cov_params), g.T)
return covar
def expected(self):
# rename: misnomer, this is the MLE of the fun
return self.fun(self.params)
def wald(self, value):
m = self.expected()
v = self.cov()
df = np.size(m)
diff = m - value
lmstat = np.dot(np.dot(diff.T, np.linalg.inv(v)), diff)
return lmstat, stats.chi2.sf(lmstat, df)
class PoissonGMLE(GenericLikelihoodModel):
'''Maximum Likelihood Estimation of Poisson Model
This is an example for generic MLE which has the same
statistical model as discretemod.Poisson.
Except for defining the negative log-likelihood method, all
methods and results are generic. Gradients and Hessian
and all resulting statistics are based on numerical
# copied from discretemod.Poisson
def nloglikeobs(self, params):
Loglikelihood of Poisson model
params : array-like
The parameters of the model.
The log likelihood of the model evaluated at `params`
.. math :: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
XB = np.dot(self.exog, params)
endog = self.endog
return np.exp(XB) - endog*XB + np.log(factorial(endog))
def predict_distribution(self, exog):
'''return frozen scipy.stats distribution with mu at estimated prediction
if not hasattr(self, result):
mu = np.exp(np.dot(exog, params))
return stats.poisson(mu, loc=0)
class PoissonOffsetGMLE(GenericLikelihoodModel):
'''Maximum Likelihood Estimation of Poisson Model
This is an example for generic MLE which has the same
statistical model as discretemod.Poisson but adds offset
Except for defining the negative log-likelihood method, all
methods and results are generic. Gradients and Hessian
and all resulting statistics are based on numerical
def __init__(self, endog, exog=None, offset=None, **kwds):
# let them be none in case user wants to use inheritance
if not offset is None:
if offset.ndim == 1:
offset = offset[:,None] #need column
self.offset = offset.ravel()
self.offset = 0.
super(PoissonOffsetGMLE, self).__init__(endog, exog, **kwds)
#this was added temporarily for bug-hunting, but shouldn't be needed
# def loglike(self, params):
# return -self.nloglikeobs(params).sum(0)
# original copied from discretemod.Poisson
def nloglikeobs(self, params):
Loglikelihood of Poisson model
params : array-like
The parameters of the model.
The log likelihood of the model evaluated at `params`
.. math :: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
XB = self.offset + np.dot(self.exog, params)
endog = self.endog
nloglik = np.exp(XB) - endog*XB + np.log(factorial(endog))
return nloglik
class PoissonZiGMLE(GenericLikelihoodModel):
'''Maximum Likelihood Estimation of Poisson Model
This is an example for generic MLE which has the same statistical model
as discretemod.Poisson but adds offset and zero-inflation.
Except for defining the negative log-likelihood method, all
methods and results are generic. Gradients and Hessian
and all resulting statistics are based on numerical
There are numerical problems if there is no zero-inflation.
def __init__(self, endog, exog=None, offset=None, **kwds):
# let them be none in case user wants to use inheritance
super(PoissonZiGMLE, self).__init__(endog, exog, **kwds)
if not offset is None:
if offset.ndim == 1:
offset = offset[:,None] #need column
self.offset = offset.ravel() #which way?
self.offset = 0.
if exog is None:
self.exog = np.ones((self.nobs,1))
self.nparams = self.exog.shape[1]
#what's the shape in regression for exog if only constant
self.start_params = np.hstack((np.ones(self.nparams), 0))
self.cloneattr = ['start_params']
# original copied from discretemod.Poisson
def nloglikeobs(self, params):
Loglikelihood of Poisson model
params : array-like
The parameters of the model.
The log likelihood of the model evaluated at `params`
.. math :: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
beta = params[:-1]
gamm = 1 / (1 + np.exp(params[-1])) #check this
# replace with np.dot(self.exogZ, gamma)
#print np.shape(self.offset), self.exog.shape, beta.shape
XB = self.offset + np.dot(self.exog, beta)
endog = self.endog
nloglik = -np.log(1-gamm) + np.exp(XB) - endog*XB + np.log(factorial(endog))
nloglik[endog==0] = - np.log(gamm + np.exp(-nloglik[endog==0]))
return nloglik
if __name__ == '__main__':
nobs = 1000
rvs = np.random.randn(nobs,6)
data_exog = rvs
data_exog = sm.add_constant(data_exog)
xbeta = 1 + 0.1*rvs.sum(1)
data_endog = np.random.poisson(np.exp(xbeta))
#print data_endog
modp = MyPoisson(data_endog, data_exog)
resp = modp.fit()
print resp.params
print resp.bse
from statsmodels.discretemod import Poisson
resdp = Poisson(data_endog, data_exog).fit()
print '\ncompare with discretemod'
print 'compare params'
print resdp.params - resp.params
print 'compare bse'
print resdp.bse - resp.bse
gmlp = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
resgp = gmlp.fit()
''' this creates a warning, bug bse is double defined ???
c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\decorators.py:105: CacheWriteWarning: The attribute 'bse' cannot be overwritten
warnings.warn(errmsg, CacheWriteWarning)
print '\ncompare with GLM'
print 'compare params'
print resgp.params - resp.params
print 'compare bse'
print resgp.bse - resp.bse
lam = np.exp(np.dot(data_exog, resp.params))
'''mean of Poisson distribution'''
predmean = stats.poisson.stats(lam,moments='m')
print np.max(np.abs(predmean - lam))
fun = lambda params: np.exp(np.dot(data_exog.mean(0), params))
lamcov = NonlinearDeltaCov(fun, resp.params, resdp.cov_params())
print lamcov.cov().shape
print lamcov.cov()
print 'analytical'
xm = data_exog.mean(0)
print np.dot(np.dot(xm, resdp.cov_params()), xm.T) * \
np.exp(2*np.dot(data_exog.mean(0), resp.params))
''' cov of linear transformation of params
>>> np.dot(np.dot(xm, resdp.cov_params()), xm.T)
>>> resp.cov_params(xm)
>>> np.dot(np.dot(xm, resp.cov_params()), xm.T)
print lamcov.wald(1.)
print lamcov.wald(2.)
print lamcov.wald(2.6)
do_bootstrap = False
if do_bootstrap:
m,s,r = resp.bootstrap(method='newton')
print m
print s
print resp.bse
print '\ncomparison maxabs, masabsrel'
print 'discr params', maxabs(resdp.params, resp.params), maxabsrel(resdp.params, resp.params)
print 'discr bse ', maxabs(resdp.bse, resp.bse), maxabsrel(resdp.bse, resp.bse)
print 'discr bsejac', maxabs(resdp.bse, resp.bsejac), maxabsrel(resdp.bse, resp.bsejac)
print 'discr bsejhj', maxabs(resdp.bse, resp.bsejhj), maxabsrel(resdp.bse, resp.bsejhj)
print 'glm params ', maxabs(resdp.params, resp.params), maxabsrel(resdp.params, resp.params)
print 'glm bse ', maxabs(resdp.bse, resp.bse), maxabsrel(resdp.bse, resp.bse)