/usr/share/pyshared/mvpa2/base/node.py is in python-mvpa2 2.1.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 | # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the PyMVPA package for the
# copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Implementation of a common processing object (node)."""
__docformat__ = 'restructuredtext'
import time
from mvpa2.support import copy
from mvpa2.base.dochelpers import _str, _repr, _repr_attrs
from mvpa2.base.state import ClassWithCollections, ConditionalAttribute
if __debug__:
from mvpa2.base import debug
class Node(ClassWithCollections):
"""Common processing object.
A `Node` is an object the processes datasets. It can be called with a
`Dataset` and returns another dataset with the results. In addition, a node
can also be used as a generator. Upon calling ``generate()`` with a datasets
it yields (potentially) multiple result datasets.
Node have a notion of ``space``. The meaning of this space may vary heavily
across sub-classes. In general, this is a trigger that tells the node to
compute and store information about the input data that is "interesting" in
the context of the corresponding processing in the output dataset.
"""
calling_time = ConditionalAttribute(enabled=True,
doc="Time (in seconds) it took to call the node")
raw_results = ConditionalAttribute(enabled=False,
doc="Computed results before invoking postproc. " +
"Stored only if postproc is not None.")
def __init__(self, space=None, postproc=None, **kwargs):
"""
Parameters
----------
space: str, optional
Name of the 'processing space'. The actual meaning of this argument
heavily depends on the sub-class implementation. In general, this is
a trigger that tells the node to compute and store information about
the input data that is "interesting" in the context of the
corresponding processing in the output dataset.
postproc : Node instance, optional
Node to perform post-processing of results. This node is applied
in `__call__()` to perform a final processing step on the to be
result dataset. If None, nothing is done.
"""
ClassWithCollections.__init__(self, **kwargs)
if __debug__:
debug("NO",
"Init node '%s' (space: '%s', postproc: '%s')",
(self.__class__.__name__, space, str(postproc)))
self.set_space(space)
self.set_postproc(postproc)
def __call__(self, ds):
"""
The default implementation calls ``_precall()``, ``_call()``, and
finally returns the output of ``_postcall()``.
Parameters
----------
ds: Dataset
Input dataset.
Returns
-------
Dataset
"""
t0 = time.time() # record the time when call initiated
self._precall(ds)
result = self._call(ds)
result = self._postcall(ds, result)
self.ca.calling_time = time.time() - t0 # set the calling_time
return result
def _precall(self, ds):
"""Preprocessing of data
By default, does nothing.
Parameters
----------
ds: Dataset
Original input dataset.
Returns
-------
Dataset
"""
return ds
def _call(self, ds):
raise NotImplementedError
def _postcall(self, ds, result):
"""Postprocessing of results.
By default, does nothing.
Parameters
----------
ds: Dataset
Original input dataset.
result: Dataset
Preliminary result dataset (as produced by ``_call()``).
Returns
-------
Dataset
"""
if not self.__postproc is None:
if __debug__:
debug("NO",
"Applying post-processing node %s", (self.__postproc,))
self.ca.raw_results = result
result = self.__postproc(result)
return result
def generate(self, ds):
"""Yield processing results.
This methods causes the node to behave like a generator. By default it
simply yields a single result of its processing -- identical to the
output of calling the node with a dataset. Subclasses might implement
generators that yield multiple results.
Parameters
----------
ds: Dataset
Input dataset
Returns
-------
generator
the generator yields the result of the processing.
"""
yield self(ds)
def get_space(self):
"""Query the processing space name of this node."""
return self.__space
def set_space(self, name):
"""Set the processing space name of this node."""
self.__space = name
def get_postproc(self):
"""Returns the post-processing node or None."""
return self.__postproc
def set_postproc(self, node):
"""Assigns a post-processing node
Set to `None` to disable postprocessing.
"""
self.__postproc = node
def __str__(self):
return _str(self)
def __repr__(self, prefixes=[]):
return super(Node, self).__repr__(
prefixes=prefixes
+ _repr_attrs(self, ['space', 'postproc']))
space = property(get_space, set_space,
doc="Processing space name of this node")
postproc = property(get_postproc, set_postproc,
doc="Node to perform post-processing of results")
class ChainNode(Node):
"""Chain of nodes.
This class allows to concatenate a list of nodes into a processing chain.
When called with a dataset, it is sequentially fed through a nodes in the
chain. A ChainNode may also be used as a generator. In this case, all
nodes in the chain are treated as generators too, and the ChainNode
behaves as a single big generator that recursively calls all embedded
generators and yield the results.
A ChainNode behaves similar to a list container: Nodes can be appended,
and the chain can be sliced like a list, etc ...
"""
def __init__(self, nodes, **kwargs):
"""
Parameters
----------
nodes: list
Node instances.
"""
if not len(nodes):
raise ValueError("%s needs at least one embedded node."
% self.__class__.__name__)
Node.__init__(self, **kwargs)
self._nodes = nodes
def __copy__(self):
# XXX how do we safely and exhaustively copy a node?
return self.__class__([copy.copy(n) for n in self])
def _call(self, ds):
mp = ds
for i, n in enumerate(self):
if __debug__:
debug('MAP', "%s: input (%s) -> node (%i/%i): '%s'",
(self.__class__.__name__,
hasattr(mp, 'shape') and mp.shape or '???',
i + 1, len(self),
n))
mp = n(mp)
if __debug__:
debug('MAP', "%s: output (%s)", (self.__class__.__name__, mp.shape))
return mp
def generate(self, ds, startnode=0):
"""
Parameters
----------
ds: Dataset
To be processed dataset
startnode: int
First node in the chain that shall be considered. This argument is
mostly useful for internal optimization.
"""
first_node = self[startnode]
if __debug__:
debug('MAP', "%s: input (%s) -> generator (%i/%i): '%s'",
(self.__class__.__name__, ds.shape,
startnode + 1, len(self), first_node))
# let the first node generator as many datasets as it wants
for gds in first_node.generate(ds):
if startnode == len(self) - 1:
# if this is already the last node yield the result
yield gds
else:
# otherwise feed them through the rest of the chain
for rgds in self.generate(gds, startnode=startnode + 1):
yield rgds
#
# Behave as a container
#
def append(self, node):
"""Append a node to the chain."""
# XXX and if a node is a ChainMapper itself -- should we just
# may be loop and add all the entries?
self._nodes.append(node)
def __len__(self):
return len(self._nodes)
def __iter__(self):
for n in self._nodes:
yield n
def __reversed__(self):
return reversed(self._nodes)
def __getitem__(self, key):
# if just one is requested return just one, otherwise return a
# NodeChain again
if isinstance(key, int):
return self._nodes[key]
else:
# operate on shallow copy of self
sliced = copy.copy(self)
sliced._nodes = self._nodes[key]
return sliced
def __repr__(self, prefixes=[]):
return super(ChainNode, self).__repr__(
prefixes=prefixes
+ _repr_attrs(self, ['nodes']))
def __str__(self):
return _str(self, '-'.join([str(n) for n in self]))
nodes = property(fget=lambda self:self._nodes)
|