/usr/share/pyshared/mvpa/mappers/base.py is in python-mvpa 0.4.8-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 | # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the PyMVPA package for the
# copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Data mapper"""
__docformat__ = 'restructuredtext'
import numpy as N
from mvpa.mappers.metric import Metric
from mvpa.datasets import Dataset
from mvpa.misc.vproperty import VProperty
from mvpa.base.dochelpers import enhancedDocString
if __debug__:
from mvpa.base import warning
from mvpa.base import debug
class Mapper(object):
"""Interface to provide mapping between two spaces: IN and OUT.
Methods are prefixed correspondingly. forward/reverse operate
on the entire dataset. get(In|Out)Id[s] operate per element::
forward
--------->
IN OUT
<--------/
reverse
"""
def __init__(self, metric=None):
"""
:Parameters:
metric : Metric
Optional metric
"""
self.__metric = None
"""Pylint happiness"""
self.setMetric(metric)
"""Actually assign the metric"""
#
# The following methods are abstract and merely define the intended
# interface of a mapper and have to be implemented in derived classes. See
# the docstrings of the respective methods for details about what they
# should do.
#
def forward(self, data):
"""Map data from the IN dataspace into OUT space.
"""
raise NotImplementedError
def reverse(self, data):
"""Reverse map data from OUT space into the IN space.
"""
raise NotImplementedError
def getInSize(self):
"""Returns the size of the entity in input space"""
raise NotImplementedError
def getOutSize(self):
"""Returns the size of the entity in output space"""
raise NotImplementedError
def selectOut(self, outIds):
"""Limit the OUT space to a certain set of features.
:Parameters:
outIds: sequence
Subset of ids of the current feature in OUT space to keep.
"""
raise NotImplementedError
def getInId(self, outId):
"""Translate a feature id into a coordinate/index in input space.
Such a translation might not be meaningful or even possible for a
particular mapping algorithm and therefore cannot be relied upon.
"""
raise NotImplementedError
#
# The following methods are candidates for reimplementation in derived
# classes, in cases where the provided default behavior is not appropriate.
#
def isValidOutId(self, outId):
"""Validate feature id in OUT space.
Override if OUT space is not simly a 1D vector
"""
return(outId >= 0 and outId < self.getOutSize())
def isValidInId(self, inId):
"""Validate id in IN space.
Override if IN space is not simly a 1D vector
"""
return(inId >= 0 and inId < self.getInSize())
def train(self, dataset):
"""Perform training of the mapper.
This method is called to put the mapper in a state that allows it to
perform to intended mapping.
:Parameter:
dataset: Dataset or subclass
.. note::
The default behavior of this method is to do nothing.
"""
pass
def getNeighbor(self, outId, *args, **kwargs):
"""Get feature neighbors in input space, given an id in output space.
This method has to be reimplemented whenever a derived class does not
provide an implementation for :meth:`~mvpa.mappers.base.Mapper.getInId`.
"""
if self.metric is None:
raise RuntimeError, "No metric was assigned to %s, thus no " \
"neighboring information is present" % self
if self.isValidOutId(outId):
inId = self.getInId(outId)
for inId in self.getNeighborIn(inId, *args, **kwargs):
yield self.getOutId(inId)
#
# The following methods provide common functionality for all mappers
# and there should be no immediate need to reimplement them
#
def getNeighborIn(self, inId, *args, **kwargs):
"""Return the list of coordinates for the neighbors.
:Parameters:
inId
id (index) of an element in input dataspace.
*args, **kwargs
Any additional arguments are passed to the embedded metric of the
mapper.
XXX See TODO below: what to return -- list of arrays or list
of tuples?
"""
if self.metric is None:
raise RuntimeError, "No metric was assigned to %s, thus no " \
"neighboring information is present" % self
isValidInId = self.isValidInId
if isValidInId(inId):
for neighbor in self.metric.getNeighbor(inId, *args, **kwargs):
if isValidInId(neighbor):
yield neighbor
def getNeighbors(self, outId, *args, **kwargs):
"""Return the list of coordinates for the neighbors.
By default it simply constructs the list based on
the generator returned by getNeighbor()
"""
return [ x for x in self.getNeighbor(outId, *args, **kwargs) ]
def __repr__(self):
if self.__metric is not None:
s = "metric=%s" % repr(self.__metric)
else:
s = ''
return "%s(%s)" % (self.__class__.__name__, s)
def __call__(self, data):
"""Calls the mappers forward() method.
"""
return self.forward(data)
def getMetric(self):
"""To make pylint happy"""
return self.__metric
def setMetric(self, metric):
"""To make pylint happy"""
if metric is not None and not isinstance(metric, Metric):
raise ValueError, "metric for Mapper must be an " \
"instance of a Metric class . Got %s" \
% `type(metric)`
self.__metric = metric
metric = property(fget=getMetric, fset=setMetric)
nfeatures = VProperty(fget=getOutSize)
class ProjectionMapper(Mapper):
"""Linear mapping between multidimensional spaces.
This class cannot be used directly. Sub-classes have to implement
the `_train()` method, which has to compute the projection matrix
`_proj` and optionally offset vectors `_offset_in` and
`_offset_out` (if initialized with demean=True, which is default)
given a dataset (see `_train()` docstring for more information).
Once the projection matrix is available, this class provides
functionality to perform forward and backwards linear mapping of
data, the latter by default using pseudo-inverse (but could be
altered in subclasses, like hermitian (conjugate) transpose in
case of SVD). Additionally, `ProjectionMapper` supports optional
selection of arbitrary component (i.e. columns of the projection
matrix) of the projection.
Forward and back-projection matrices (a.k.a. *projection* and
*reconstruction*) are available via the `proj` and `recon`
properties.
"""
_DEV__doc__ = """Think about renaming `demean`, may be `translation`?"""
def __init__(self, selector=None, demean=True):
"""Initialize the ProjectionMapper
:Parameters:
selector: None | list
Which components (i.e. columns of the projection matrix)
should be used for mapping. If `selector` is `None` all
components are used. If a list is provided, all list
elements are treated as component ids and the respective
components are selected (all others are discarded).
demean: bool
Either data should be demeaned while computing
projections and applied back while doing reverse()
"""
Mapper.__init__(self)
self._selector = selector
self._proj = None
"""Forward projection matrix."""
self._recon = None
"""Reverse projection (reconstruction) matrix."""
self._demean = demean
"""Flag whether to demean the to be projected data, prior to projection.
"""
self._offset_in = None
"""Offset (most often just mean) in the input space"""
self._offset_out = None
"""Offset (most often just mean) in the output space"""
__doc__ = enhancedDocString('ProjectionMapper', locals(), Mapper)
def train(self, dataset, *args, **kwargs):
"""Determine the projection matrix.
:Parameters:
dataset : Dataset
Dataset to operate on
*args
Optional positional arguments to pass to _train
of subclass
**kwargs
Optional keyword arguments to pass to _train
of subclass
"""
# store the feature wise mean
if isinstance(dataset, Dataset):
samples = dataset.samples
else:
samples = dataset
self._offset_in = samples.mean(axis=0)
# ??? Setting of _offset_out is to be done in a child
# class
# compute projection matrix with subclass logic
self._train(dataset, *args, **kwargs)
# perform component selection
if self._selector is not None:
self.selectOut(self._selector)
def _demeanData(self, data):
"""Helper which optionally demeans
"""
if self._demean:
# demean the training data
data = data - self._offset_in
if __debug__ and "MAP_" in debug.active:
debug("MAP_",
"%s: Mean of data in input space %s was subtracted" %
(self.__class__.__name__, self._offset_in))
return data
def _train(self, dataset):
"""Worker method. Needs to be implemented by subclass.
This method has to train the mapper and store the resulting
transformation matrix in `self._proj`.
"""
raise NotImplementedError
def forward(self, data, demean=None):
"""Perform forward projection.
:Parameters:
data: ndarray
Data array to map
demean: boolean | None
Override demean setting for this method call.
:Returns:
NumPy array
"""
# let arg overwrite instance flag
if demean is None:
demean = self._demean
if self._proj is None:
raise RuntimeError, "Mapper needs to be train before used."
d = N.asmatrix(data)
# Remove input offset if present
if demean and self._offset_in is not None:
d = d - self._offset_in
# Do forward projection
res = (d * self._proj).A
# Add output offset if present
if demean and self._offset_out is not None:
res += self._offset_out
return res
def reverse(self, data):
"""Reproject (reconstruct) data into the original feature space.
:Returns:
NumPy array
"""
if self._proj is None:
raise RuntimeError, "Mapper needs to be trained before used."
d = N.asmatrix(data)
# Remove offset if present in output space
if self._demean and self._offset_out is not None:
d = d - self._offset_out
# Do reverse projection
res = (d * self.recon).A
# Add offset in input space
if self._demean and self._offset_in is not None:
res += self._offset_in
return res
def _computeRecon(self):
"""Given that a projection is present -- compute reconstruction matrix.
By default -- pseudoinverse of projection matrix. Might be overridden
in derived classes for efficiency.
"""
return N.linalg.pinv(self._proj)
def _getRecon(self):
"""Compute (if necessary) and return reconstruction matrix
"""
# (re)build reconstruction matrix
recon = self._recon
if recon is None:
self._recon = recon = self._computeRecon()
return recon
def getInSize(self):
"""Returns the number of original features."""
return self._proj.shape[0]
def getOutSize(self):
"""Returns the number of components to project on."""
return self._proj.shape[1]
def selectOut(self, outIds):
"""Choose a subset of components (and remove all others)."""
self._proj = self._proj[:, outIds]
if self._offset_out is not None:
self._offset_out = self._offset_out[outIds]
# invalidate reconstruction matrix
self._recon = None
proj = property(fget=lambda self: self._proj, doc="Projection matrix")
recon = property(fget=_getRecon, doc="Backprojection matrix")
class CombinedMapper(Mapper):
"""Meta mapper that combines several embedded mappers.
This mapper can be used the map from several input dataspaces into a common
output dataspace. When :meth:`~mvpa.mappers.base.CombinedMapper.forward`
is called with a sequence of data, each element in that sequence is passed
to the corresponding mapper, which in turned forward-maps the data. The
output of all mappers is finally stacked (horizontally or column or
feature-wise) into a single large 2D matrix (nsamples x nfeatures).
.. note::
This mapper can only embbed mappers that transform data into a 2D
(nsamples x nfeatures) representation. For mappers not supporting this
transformation, consider wrapping them in a
:class:`~mvpa.mappers.base.ChainMapper` with an appropriate
post-processing mapper.
CombinedMapper fully supports forward and backward mapping, training,
runtime selection of a feature subset (in output dataspace) and retrieval
of neighborhood information.
"""
def __init__(self, mappers, **kwargs):
"""
:Parameters:
mappers: list of Mapper instances
The order of the mappers in the list is important, as it will define
the order in which data snippets have to be passed to
:meth:`~mvpa.mappers.base.CombinedMapper.forward`.
**kwargs
All additional arguments are passed to the base-class constructor.
"""
Mapper.__init__(self, **kwargs)
if not len(mappers):
raise ValueError, \
'CombinedMapper needs at least one embedded mapper.'
self._mappers = mappers
def forward(self, data):
"""Map data from the IN spaces into to common OUT space.
:Parameter:
data: sequence
Each element in the `data` sequence is passed to the corresponding
embedded mapper and is mapped individually by it. The number of
elements in `data` has to match the number of embedded mappers. Each
element is `data` has to provide the same number of samples
(first dimension).
:Returns:
array: nsamples x nfeatures
Horizontally stacked array of all embedded mapper outputs.
"""
if not len(data) == len(self._mappers):
raise ValueError, \
"CombinedMapper needs a sequence with data for each " \
"Mapper"
# return a big array for the result of the forward mapped data
# of each embedded mapper
try:
return N.hstack(
[self._mappers[i].forward(d) for i, d in enumerate(data)])
except ValueError:
raise ValueError, \
"Embedded mappers do not generate same number of samples. " \
"Check input data."
def reverse(self, data):
"""Reverse map data from OUT space into the IN spaces.
:Parameter:
data: array
Single data array to be reverse mapped into a sequence of data
snippets in their individual IN spaces.
:Returns:
list
"""
# assure array and transpose
# i.e. transpose of 1D does nothing, but of 2D puts features
# along first dimension
data = N.asanyarray(data).T
if not len(data) == self.getOutSize():
raise ValueError, \
"Data shape does match mapper reverse mapping properties."
result = []
fsum = 0
for m in self._mappers:
# calculate upper border
fsum_new = fsum + m.getOutSize()
result.append(m.reverse(data[fsum:fsum_new].T))
fsum = fsum_new
return result
def train(self, dataset):
"""Trains all embedded mappers.
The provided training dataset is splitted appropriately and the
corresponding pieces are passed to the
:meth:`~mvpa.mappers.base.Mapper.train` method of each embedded mapper.
:Parameter:
dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
A dataset with the number of features matching the `outSize` of the
`CombinedMapper`.
"""
if dataset.nfeatures != self.getOutSize():
raise ValueError, "Training dataset does not match the mapper " \
"properties."
fsum = 0
for m in self._mappers:
# need to split the dataset
fsum_new = fsum + m.getOutSize()
m.train(dataset.selectFeatures(range(fsum, fsum_new)))
fsum = fsum_new
def getInSize(self):
"""Returns the size of the entity in input space"""
return N.sum(m.getInSize() for m in self._mappers)
def getOutSize(self):
"""Returns the size of the entity in output space"""
return N.sum(m.getOutSize() for m in self._mappers)
def selectOut(self, outIds):
"""Remove some elements and leave only ids in 'out'/feature space.
.. note::
The subset selection is done inplace
:Parameter:
outIds: sequence
All output feature ids to be selected/kept.
"""
# determine which features belong to what mapper
# and call its selectOut() accordingly
ids = N.asanyarray(outIds)
fsum = 0
for m in self._mappers:
# bool which meta feature ids belongs to this mapper
selector = N.logical_and(ids < fsum + m.getOutSize(), ids >= fsum)
# make feature ids relative to this dataset
selected = ids[selector] - fsum
fsum += m.getOutSize()
# finally apply to mapper
m.selectOut(selected)
def getNeighbor(self, outId, *args, **kwargs):
"""Get the ids of the neighbors of a single feature in output dataspace.
:Parameters:
outId: int
Single id of a feature in output space, whos neighbors should be
determined.
*args, **kwargs
Additional arguments are passed to the metric of the embedded
mapper, that is responsible for the corresponding feature.
Returns a list of outIds
"""
fsum = 0
for m in self._mappers:
fsum_new = fsum + m.getOutSize()
if outId >= fsum and outId < fsum_new:
return m.getNeighbor(outId - fsum, *args, **kwargs)
fsum = fsum_new
raise ValueError, "Invalid outId passed to CombinedMapper.getNeighbor()"
def __repr__(self):
s = Mapper.__repr__(self).rstrip(' )')
# beautify
if not s[-1] == '(':
s += ' '
s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers])
return s
class ChainMapper(Mapper):
"""Meta mapper that embedded a chain of other mappers.
Each mapper in the chain is called successively to perform forward or
reverse mapping.
.. note::
In its current implementation the `ChainMapper` treats all but the last
mapper as simple pre-processing (in forward()) or post-processing (in
reverse()) steps. All other capabilities, e.g. training and neighbor
metrics are provided by or affect *only the last mapper in the chain*.
With respect to neighbor metrics this means that they are determined
based on the input space of the *last mapper* in the chain and *not* on
the input dataspace of the `ChainMapper` as a whole
"""
def __init__(self, mappers, **kwargs):
"""
:Parameters:
mappers: list of Mapper instances
**kwargs
All additional arguments are passed to the base-class constructor.
"""
Mapper.__init__(self, **kwargs)
if not len(mappers):
raise ValueError, 'ChainMapper needs at least one embedded mapper.'
self._mappers = mappers
def forward(self, data):
"""Calls all mappers in the chain successively.
:Parameter:
data
data to be chain-mapped.
"""
mp = data
for m in self._mappers:
mp = m.forward(mp)
return mp
def reverse(self, data):
"""Calls all mappers in the chain successively, in reversed order.
:Parameter:
data: array
data array to be reverse mapped into the orginal dataspace.
"""
mp = data
for m in reversed(self._mappers):
mp = m.reverse(mp)
return mp
def train(self, dataset):
"""Trains the *last* mapper in the chain.
:Parameter:
dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
A dataset with the number of features matching the `outSize` of the
last mapper in the chain (which is identical to the one of the
`ChainMapper` itself).
"""
if dataset.nfeatures != self.getOutSize():
raise ValueError, "Training dataset does not match the mapper " \
"properties."
self._mappers[-1].train(dataset)
def getInSize(self):
"""Returns the size of the entity in input space"""
return self._mappers[0].getInSize()
def getOutSize(self):
"""Returns the size of the entity in output space"""
return self._mappers[-1].getOutSize()
def selectOut(self, outIds):
"""Remove some elements from the *last* mapper in the chain.
:Parameter:
outIds: sequence
All output feature ids to be selected/kept.
"""
self._mappers[-1].selectOut(outIds)
def getNeighbor(self, outId, *args, **kwargs):
"""Get the ids of the neighbors of a single feature in output dataspace.
.. note::
The neighbors are determined based on the input space of the *last
mapper* in the chain and *not* on the input dataspace of the
`ChainMapper` as a whole!
:Parameters:
outId: int
Single id of a feature in output space, whos neighbors should be
determined.
*args, **kwargs
Additional arguments are passed to the metric of the embedded
mapper, that is responsible for the corresponding feature.
Returns a list of outIds
"""
return self._mappers[-1].getNeighbor(outId, *args, **kwargs)
def __repr__(self):
s = Mapper.__repr__(self).rstrip(' )')
# beautify
if not s[-1] == '(':
s += ' '
s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers])
return s
|