/usr/lib/python2.7/dist-packages/joblib/numpy_pickle_compat.py is in python-joblib 0.10.3+git55-g660fe5d-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | """Numpy pickle compatibility functions."""
import pickle
import os
import zlib
from io import BytesIO
from ._compat import PY3_OR_LATER
from .numpy_pickle_utils import _ZFILE_PREFIX
from .numpy_pickle_utils import Unpickler
def hex_str(an_int):
"""Convert an int to an hexadecimal string."""
return '{:#x}'.format(an_int)
if PY3_OR_LATER:
def asbytes(s):
if isinstance(s, bytes):
return s
return s.encode('latin1')
else:
asbytes = str
_MAX_LEN = len(hex_str(2 ** 64))
_CHUNK_SIZE = 64 * 1024
def read_zfile(file_handle):
"""Read the z-file and return the content as a string.
Z-files are raw data compressed with zlib used internally by joblib
for persistence. Backward compatibility is not guaranteed. Do not
use for external purposes.
"""
file_handle.seek(0)
header_length = len(_ZFILE_PREFIX) + _MAX_LEN
length = file_handle.read(header_length)
length = length[len(_ZFILE_PREFIX):]
length = int(length, 16)
# With python2 and joblib version <= 0.8.4 compressed pickle header is one
# character wider so we need to ignore an additional space if present.
# Note: the first byte of the zlib data is guaranteed not to be a
# space according to
# https://tools.ietf.org/html/rfc6713#section-2.1
next_byte = file_handle.read(1)
if next_byte != b' ':
# The zlib compressed data has started and we need to go back
# one byte
file_handle.seek(header_length)
# We use the known length of the data to tell Zlib the size of the
# buffer to allocate.
data = zlib.decompress(file_handle.read(), 15, length)
assert len(data) == length, (
"Incorrect data length while decompressing %s."
"The file could be corrupted." % file_handle)
return data
def write_zfile(file_handle, data, compress=1):
"""Write the data in the given file as a Z-file.
Z-files are raw data compressed with zlib used internally by joblib
for persistence. Backward compatibility is not guarantied. Do not
use for external purposes.
"""
file_handle.write(_ZFILE_PREFIX)
length = hex_str(len(data))
# Store the length of the data
file_handle.write(asbytes(length.ljust(_MAX_LEN)))
file_handle.write(zlib.compress(asbytes(data), compress))
###############################################################################
# Utility objects for persistence.
class NDArrayWrapper(object):
"""An object to be persisted instead of numpy arrays.
The only thing this object does, is to carry the filename in which
the array has been persisted, and the array subclass.
"""
def __init__(self, filename, subclass, allow_mmap=True):
"""Constructor. Store the useful information for later."""
self.filename = filename
self.subclass = subclass
self.allow_mmap = allow_mmap
def read(self, unpickler):
"""Reconstruct the array."""
filename = os.path.join(unpickler._dirname, self.filename)
# Load the array from the disk
# use getattr instead of self.allow_mmap to ensure backward compat
# with NDArrayWrapper instances pickled with joblib < 0.9.0
allow_mmap = getattr(self, 'allow_mmap', True)
memmap_kwargs = ({} if not allow_mmap
else {'mmap_mode': unpickler.mmap_mode})
array = unpickler.np.load(filename, **memmap_kwargs)
# Reconstruct subclasses. This does not work with old
# versions of numpy
if (hasattr(array, '__array_prepare__') and
self.subclass not in (unpickler.np.ndarray,
unpickler.np.memmap)):
# We need to reconstruct another subclass
new_array = unpickler.np.core.multiarray._reconstruct(
self.subclass, (0,), 'b')
return new_array.__array_prepare__(array)
else:
return array
class ZNDArrayWrapper(NDArrayWrapper):
"""An object to be persisted instead of numpy arrays.
This object store the Zfile filename in which
the data array has been persisted, and the meta information to
retrieve it.
The reason that we store the raw buffer data of the array and
the meta information, rather than array representation routine
(tostring) is that it enables us to use completely the strided
model to avoid memory copies (a and a.T store as fast). In
addition saving the heavy information separately can avoid
creating large temporary buffers when unpickling data with
large arrays.
"""
def __init__(self, filename, init_args, state):
"""Constructor. Store the useful information for later."""
self.filename = filename
self.state = state
self.init_args = init_args
def read(self, unpickler):
"""Reconstruct the array from the meta-information and the z-file."""
# Here we a simply reproducing the unpickling mechanism for numpy
# arrays
filename = os.path.join(unpickler._dirname, self.filename)
array = unpickler.np.core.multiarray._reconstruct(*self.init_args)
with open(filename, 'rb') as f:
data = read_zfile(f)
state = self.state + (data,)
array.__setstate__(state)
return array
class ZipNumpyUnpickler(Unpickler):
"""A subclass of the Unpickler to unpickle our numpy pickles."""
dispatch = Unpickler.dispatch.copy()
def __init__(self, filename, file_handle, mmap_mode=None):
"""Constructor."""
self._filename = os.path.basename(filename)
self._dirname = os.path.dirname(filename)
self.mmap_mode = mmap_mode
self.file_handle = self._open_pickle(file_handle)
Unpickler.__init__(self, self.file_handle)
try:
import numpy as np
except ImportError:
np = None
self.np = np
def _open_pickle(self, file_handle):
return BytesIO(read_zfile(file_handle))
def load_build(self):
"""Set the state of a newly created object.
We capture it to replace our place-holder objects,
NDArrayWrapper, by the array we are interested in. We
replace them directly in the stack of pickler.
"""
Unpickler.load_build(self)
if isinstance(self.stack[-1], NDArrayWrapper):
if self.np is None:
raise ImportError("Trying to unpickle an ndarray, "
"but numpy didn't import correctly")
nd_array_wrapper = self.stack.pop()
array = nd_array_wrapper.read(self)
self.stack.append(array)
# Be careful to register our new method.
if PY3_OR_LATER:
dispatch[pickle.BUILD[0]] = load_build
else:
dispatch[pickle.BUILD] = load_build
def load_compatibility(filename):
"""Reconstruct a Python object from a file persisted with joblib.dump.
This function ensures the compatibility with joblib old persistence format
(<= 0.9.3).
Parameters
-----------
filename: string
The name of the file from which to load the object
Returns
-------
result: any Python object
The object stored in the file.
See Also
--------
joblib.dump : function to save an object
Notes
-----
This function can load numpy array files saved separately during the
dump.
"""
with open(filename, 'rb') as file_handle:
# We are careful to open the file handle early and keep it open to
# avoid race-conditions on renames. That said, if data is stored in
# companion files, moving the directory will create a race when
# joblib tries to access the companion files.
unpickler = ZipNumpyUnpickler(filename, file_handle=file_handle)
try:
obj = unpickler.load()
except UnicodeDecodeError as exc:
# More user-friendly error message
if PY3_OR_LATER:
new_exc = ValueError(
'You may be trying to read with '
'python 3 a joblib pickle generated with python 2. '
'This feature is not supported by joblib.')
new_exc.__cause__ = exc
raise new_exc
finally:
if hasattr(unpickler, 'file_handle'):
unpickler.file_handle.close()
return obj
|