/usr/share/pyshared/statsmodels/tools/data.py is in python-statsmodels 0.4.2-1.2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | """
Compatibility tools for various data structure inputs
"""
#TODO: question: interpret_data
# looks good and could/should be merged with other check convertion functions we also have
# similar also to what Nathaniel mentioned for Formula
# good: if ndarray check passes then loading pandas is not triggered,
import numpy as np
def have_pandas():
try:
import pandas
return True
except ImportError:
return False
except Exception:
return False
def is_data_frame(obj):
if not have_pandas():
return False
import pandas as pn
return isinstance(obj, pn.DataFrame)
def _is_structured_ndarray(obj):
return isinstance(obj, np.ndarray) and obj.dtype.names is not None
def interpret_data(data, colnames=None, rownames=None):
"""
Convert passed data structure to form required by estimation classes
Parameters
----------
data : ndarray-like
colnames : sequence or None
May be part of data structure
rownames : sequence or None
Returns
-------
(values, colnames, rownames) : (homogeneous ndarray, list)
"""
if isinstance(data, np.ndarray):
if _is_structured_ndarray(data):
if colnames is None:
colnames = data.dtype.names
values = struct_to_ndarray(data)
else:
values = data
if colnames is None:
colnames = ['Y_%d' % i for i in range(values.shape[1])]
elif is_data_frame(data):
# XXX: hack
data = data.dropna()
values = data.values
colnames = data.columns
rownames = data.index
else: # pragma: no cover
raise Exception('cannot handle other input types at the moment')
if not isinstance(colnames, list):
colnames = list(colnames)
# sanity check
if len(colnames) != values.shape[1]:
raise ValueError('length of colnames does not match number '
'of columns in data')
if rownames is not None and len(rownames) != len(values):
raise ValueError('length of rownames does not match number '
'of rows in data')
return values, colnames, rownames
def struct_to_ndarray(arr):
return arr.view((float, len(arr.dtype.names)))
def _is_using_ndarray(endog, exog):
return (isinstance(endog, np.ndarray) and
(isinstance(exog, np.ndarray) or exog is None))
def _is_using_pandas(endog, exog):
from pandas import Series, DataFrame, WidePanel
klasses = (Series, DataFrame, WidePanel)
return (isinstance(endog, klasses) or isinstance(exog, klasses))
def _is_using_larry(endog, exog):
try:
import la
return isinstance(endog, la.larry) or isinstance(exog, la.larry)
except ImportError:
return False
def _is_using_timeseries(endog, exog):
try:
from scikits.timeseries import TimeSeries as tsTimeSeries
return isinstance(endog, tsTimeSeries) or isinstance(exog, tsTimeSeries)
except ImportError:
# if there is no deprecated scikits.timeseries, it is safe to say NO
return False
def _is_array_like(endog, exog):
try: # do it like this in case of mixed types, ie., ndarray and list
endog = np.asarray(endog)
exog = np.asarray(exog)
return True
except:
return False
|