/usr/share/pyshared/pandas/sparse/list.py is in python-pandas 0.7.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | import numpy as np
from pandas.sparse.array import SparseArray
import pandas._sparse as splib
class SparseList(object):
"""
Data structure for accumulating data to be converted into a
SparseArray. Has similar API to the standard Python list
Parameters
----------
data : scalar or array-like
fill_value : scalar, default NaN
"""
def __init__(self, data=None, fill_value=np.nan):
self.fill_value = fill_value
self._chunks = []
if data is not None:
self.append(data)
def __repr__(self):
contents = '\n'.join(repr(c) for c in self._chunks)
return '%s\n%s' % (object.__repr__(self), contents)
def __len__(self):
return sum(len(c) for c in self._chunks)
def __getitem__(self, i):
if i < 0:
if i + len(self) < 0: # pragma: no cover
raise ValueError('%d out of range' % i)
i += len(self)
passed = 0
j = 0
while i >= passed + len(self._chunks[j]):
passed += len(self._chunks[j])
j += 1
return self._chunks[j][i - passed]
def __setitem__(self, i, value):
raise NotImplementedError
@property
def nchunks(self):
return len(self._chunks)
@property
def is_consolidated(self):
return self.nchunks == 1
def consolidate(self, inplace=True):
"""
Internally consolidate chunks of data
Parameters
----------
inplace : boolean, default True
Modify the calling object instead of constructing a new one
Returns
-------
splist : SparseList
If inplace=False, new object, otherwise reference to existing
object
"""
if not inplace:
result = self.copy()
else:
result = self
if result.is_consolidated:
return result
result._consolidate_inplace()
return result
def _consolidate_inplace(self):
new_values = np.concatenate([c.sp_values for c in self._chunks])
new_index = _concat_sparse_indexes([c.sp_index for c in self._chunks])
new_arr = SparseArray(new_values, sparse_index=new_index,
fill_value=self.fill_value)
self._chunks = [new_arr]
def copy(self):
"""
Return copy of the list
Returns
-------
new_list : SparseList
"""
new_splist = SparseList(fill_value=self.fill_value)
new_splist._chunks = list(self._chunks)
return new_splist
def to_array(self):
"""
Return SparseArray from data stored in the SparseList
Returns
-------
sparr : SparseArray
"""
self.consolidate(inplace=True)
return self._chunks[0]
def append(self, value):
"""
Append element or array-like chunk of data to the SparseList
Parameters
----------
value: scalar or array-like
"""
if np.isscalar(value):
value = [value]
sparr = SparseArray(value, fill_value=self.fill_value)
self._chunks.append(sparr)
self._consolidated = False
def _concat_sparse_indexes(indexes):
all_indices = []
total_length = 0
for index in indexes:
# increment by offset
inds = index.to_int_index().indices + total_length
all_indices.append(inds)
total_length += index.length
return splib.IntIndex(total_length, np.concatenate(all_indices))
|