/usr/share/pyshared/pandas/sandbox/functions.py is in python-pandas 0.7.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | from pandas.core.common import isnull
import numpy as np
#-------------------------------------------------------------------------------
# NaN-friendly reductions and such
def reduce_mean(values, index, buckets, inclusive=False):
def _reduceat_mean(values, mask, locs):
the_sum = np.add.reduceat(values, locs)
the_count = np.add.reduceat(-mask, locs)
return the_sum / the_count
return _reduce_generic(values, index, buckets, _reduceat_mean,
inclusive=inclusive, na_fill=0)
def _reduceat_var(values, mask, locs):
XX = np.add.reduceat(values ** 2, locs)
X = np.add.reduceat(values, locs)
nobs = np.add.reduceat(-mask, locs)
return (XX - X * X) / (nobs - 1)
def reduce_std(values, index, buckets, inclusive=False):
result = _reduce_generic(values, index, buckets, _reduceat_var,
inclusive=inclusive, na_fill=0)
return np.sqrt(result)
def reduce_prod(values, index, buckets, inclusive=False):
def _reduceat_prod(values, mask, locs):
return np.multiply.reduceat(values, locs)
return _reduce_generic(values, index, buckets, _reduceat_prod,
inclusive=inclusive, na_fill=1)
def reduce_min(values, index, buckets, inclusive=False):
def _reduceat_min(values, mask, locs):
return np.minimum.reduceat(values, locs)
return _reduce_generic(values, index, buckets, _reduceat_min,
inclusive=inclusive, na_fill=np.inf)
def reduce_max(values, index, buckets, inclusive=False):
def _reduceat_max(values, mask, locs):
return np.maximum.reduceat(values, locs)
return _reduce_generic(values, index, buckets, _reduceat_max,
inclusive=inclusive, na_fill=-np.inf)
def _reduce_generic(values, index, buckets, freduce, inclusive=False,
na_fill=None):
"""
"""
locs = _bucket_locs(index, buckets, inclusive=inclusive)
values = np.asarray(values)
mask = isnull(values)
if na_fill is not None:
values = values.copy()
np.putmask(values, mask, na_fill)
return freduce(values, mask, locs)
def _reduceat_count(values, mask, locs):
return np.add.reduceat(-mask, locs)
def _bucket_locs(index, buckets, inclusive=False):
if inclusive:
locs = index.searchsorted(buckets, side='left')
else:
locs = index.searchsorted(buckets, side='right')
return locs
def get_bucket(date, bucks):
if date in bucks:
idx = bucks.indexMap[date] + 1
else:
idx = bucks.searchsorted(date)
return bucks[idx]
def dumb_way(series, buckets):
sampled2 = hfseries.groupby(lambda x: get_bucket(x, buckets)).mean()
sampled2 = sampled2.reindex(buckets)
return sampled2
def ts_upsample(dates, buckets, values, aggfunc, inclusive=True):
'''
put something here
'''
nbuckets = len(buckets)
nvalues = len(dates)
output = np.empty(nbuckets, dtype=float)
if inclusive:
_check = lambda x, y: x < y
else:
_check = lambda x, y: x <= y
j = 0
for i, bound in enumerate(buckets):
next_bound = buckets[i + 1]
jstart = j
while _check(dates[j], next_bound) and j < nvalues:
j += 1
output[i] = aggfunc(values[jstart:j])
return Series(output, index=buckets)
if __name__ == '__main__':
N = 1000000
K = 1000
values = np.random.randn(N)
index = np.arange(N).astype(object)
buckets = np.arange(0, N, N // K).astype(object)
result = reduce_mean(values, index, buckets)
import pandas._tseries as tseries
tseries.ts_upsample_mean(index, buckets, values)
|