/usr/lib/python3/dist-packages/partd/numpy.py is in python3-partd 0.3.7-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | """ Store arrays
We put arrays on disk as raw bytes, extending along the first dimension.
Alongside each array x we ensure the value x.dtype which stores the string
description of the array's dtype.
"""
from __future__ import absolute_import
import numpy as np
from toolz import valmap, concat, identity, partial
from .compatibility import pickle, unicode
from .utils import frame, framesplit, suffix, ignoring
def serialize_dtype(dt):
""" Serialize dtype to bytes
>>> serialize_dtype(np.dtype('i4'))
'<i4'
>>> serialize_dtype(np.dtype('M8[us]'))
'<M8[us]'
"""
return dt.str.encode()
def parse_dtype(s):
""" Parse text as numpy dtype
>>> parse_dtype('i4')
dtype('int32')
>>> parse_dtype("[('a', 'i4')]")
dtype([('a', '<i4')])
"""
if s.startswith(b'['):
return np.dtype(eval(s)) # Dangerous!
else:
return np.dtype(s)
from .core import Interface
from .file import File
class Numpy(Interface):
def __init__(self, partd=None):
if not partd or isinstance(partd, str):
partd = File(partd)
self.partd = partd
Interface.__init__(self)
def __getstate__(self):
return {'partd': self.partd}
def append(self, data, **kwargs):
for k, v in data.items():
self.partd.iset(suffix(k, '.dtype'), serialize_dtype(v.dtype))
self.partd.append(valmap(serialize, data), **kwargs)
def _get(self, keys, **kwargs):
bytes = self.partd._get(keys, **kwargs)
dtypes = self.partd._get([suffix(key, '.dtype') for key in keys],
lock=False)
dtypes = map(parse_dtype, dtypes)
return list(map(deserialize, bytes, dtypes))
def delete(self, keys, **kwargs):
keys2 = [suffix(key, '.dtype') for key in keys]
self.partd.delete(keys2, **kwargs)
def _iset(self, key, value):
return self.partd._iset(key, value)
def drop(self):
return self.partd.drop()
def __del__(self):
self.partd.__del__()
@property
def lock(self):
return self.partd.lock
def __exit__(self, *args):
self.drop()
self.partd.__exit__(self, *args)
try:
from pandas import msgpack
except ImportError:
try:
import msgpack
except ImportError:
msgpack = False
def serialize(x):
if x.dtype == 'O':
with ignoring(Exception): # Try msgpack (faster on strings)
return frame(msgpack.packb(x.tolist()))
return frame(pickle.dumps(x.tolist(), protocol=pickle.HIGHEST_PROTOCOL))
else:
return x.tobytes()
def decode(o):
if isinstance(o, list):
if not o:
return []
elif isinstance(o[0], bytes):
try:
return [item.decode() for item in o]
except AttributeError:
return list(map(decode, o))
else:
return list(map(decode, o))
elif isinstance(o, bytes):
return o.decode()
else:
return o
def deserialize(bytes, dtype, copy=False):
if dtype == 'O':
try:
l = list(concat(map(msgpack.unpackb, framesplit(bytes))))
except:
l = list(concat(map(pickle.loads, framesplit(bytes))))
l = decode(l)
return np.array(l, dtype='O')
else:
result = np.frombuffer(bytes, dtype)
if copy:
result = result.copy()
return result
compress_text = identity
decompress_text = identity
compress_bytes = lambda bytes, itemsize: bytes
decompress_bytes = identity
with ignoring(ImportError):
import blosc
blosc.set_nthreads(1)
compress_bytes = blosc.compress
decompress_bytes = blosc.decompress
compress_text = partial(blosc.compress, typesize=1)
decompress_text = blosc.decompress
with ignoring(ImportError):
from snappy import compress as compress_text
from snappy import decompress as decompress_text
def compress(bytes, dtype):
if dtype == 'O':
return compress_text(bytes)
else:
return compress_bytes(bytes, dtype.itemsize)
def decompress(bytes, dtype):
if dtype == 'O':
return decompress_text(bytes)
else:
return decompress_bytes(bytes)
|