/usr/share/pyshared/statsmodels/tests/rmodelwrap.py is in python-statsmodels 0.4.2-1.2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | ''' Wrapper for R models to allow comparison to scipy models '''
import numpy as np
import rpy
from check_for_rpy import skip_rpy
skipR = skip_rpy()
if not skipR:
from rpy import r
# MASS contains
# negative binomial family for GLM
# rlm
#TODO: write a check_key wrapper for these
class RModel(object):
''' Class gives R models scipy.models -like interface '''
def __init__(self, y, design, model_type=r.lm, **kwds):
''' Set up and estimate R model with data and design '''
r.library('MASS') # still needs to be in test, but also here for
# logical tests at the end not to show an error
self.y = np.array(y)
self.design = np.array(design)
self.model_type = model_type
self._design_cols = ['x.%d' % (i+1) for i in range(
self.design.shape[1])]
# Note the '-1' for no intercept - this is included in the design
self.formula = r('y ~ %s-1' % '+'.join(self._design_cols))
self.frame = r.data_frame(y=y, x=self.design)
rpy.set_default_mode(rpy.NO_CONVERSION)
results = self.model_type(self.formula,
data = self.frame, **kwds)
self.robj = results # keep the Robj model so it can be
# used in the tests
rpy.set_default_mode(rpy.BASIC_CONVERSION)
rsum = r.summary(results)
self.rsum = rsum
# Provide compatible interface with scipy models
self.results = results.as_py()
# coeffs = self.results['coefficients']
# self.beta0 = np.array([coeffs[c] for c in self._design_cols])
self.nobs = len(self.results['residuals'])
if isinstance(self.results['residuals'], dict):
self.resid = np.zeros((len(self.results['residuals'].keys())))
for i in self.results['residuals'].keys():
self.resid[int(i)-1] = self.results['residuals'][i]
else:
self.resid = self.results['residuals']
self.fittedvalues = self.results['fitted.values']
self.df_resid = self.results['df.residual']
self.params = rsum['coefficients'][:,0]
self.bse = rsum['coefficients'][:,1]
self.bt = rsum['coefficients'][:,2]
try:
self.pvalues = rsum['coefficients'][:,3]
except: pass
self.rsquared = rsum.setdefault('r.squared', None)
self.rsquared_adj = rsum.setdefault('adj.r.squared', None)
self.aic_R = rsum.setdefault('aic', None)
self.fvalue = rsum.setdefault('fstatistic', None)
if self.fvalue and isinstance(self.fvalue, dict):
self.fvalue = self.fvalue.setdefault('value', None) # for wls
df = rsum.setdefault('df', None)
if df: # for RLM, works for other models?
self.df_model = df[0]-1 # R counts intercept
self.df_resid = df[1]
self.bcov_unscaled = rsum.setdefault('cov.unscaled', None)
self.bcov = rsum.setdefault('cov.scaled', None)
if 'sigma' in rsum:
self.scale = rsum['sigma']
elif 'dispersion' in rsum:
self.scale = rsum['dispersion']
else:
self.scale = None
self.llf = r.logLik(results)
if model_type == r.glm:
self.getglm()
if model_type == r.rlm:
self.getrlm()
def getglm(self):
self.deviance = self.rsum['deviance']
self.resid = [self.results['residuals'][str(k)] \
for k in range(1, 1+self.nobs)]
if isinstance(self.resid, dict):
tmp = np.zeros(len(self.resid))
for i in self.resid.keys():
tmp[int(i)-1] = self.resid[i]
self.resid = tmp
self.predict = [self.results['linear.predictors'][str(k)] \
for k in range(1, 1+self.nobs)]
self.fittedvalues = [self.results['fitted.values'][str(k)] \
for k in range(1, 1+self.nobs)]
self.weights = [self.results['weights'][str(k)] \
for k in range(1, 1+self.nobs)]
self.resid_deviance = self.rsum['deviance.resid']
if isinstance(self.resid_deviance, dict):
tmp = np.zeros(len(self.resid_deviance))
for i in self.resid_deviance.keys():
tmp[int(i)-1] = self.resid_deviance[i]
self.resid_deviance = tmp
self.null_deviance = self.rsum['null.deviance']
def getrlm(self):
self.k2 = self.results['k2']
if isinstance(self.results['w'], dict):
tmp = np.zeros((len(self.results['w'].keys())))
for i in self.results['w'].keys():
tmp[int(i)-1] = self.results['w'][i]
self.weights = tmp
else: self.weights = self.results['w']
self.stddev = self.rsum['stddev'] # Don't know what this is yet
self.wresid = None # these equal resids always?
#TODO:
# function to write Rresults to results file, so this is a developers tool
# and not a test dependency?
def RModelConvert(model, sec_title=None, results_title=None):
import os
if not results_title:
raise AttributeError("You need to specify a results title")
outfile = open('./model_results.py', 'a')
outfile.write('class '+results_title)
outfile.write(' '*4) # handle indents
|