/usr/share/pyshared/mvpa/datasets/meta.py is in python-mvpa 0.4.8-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the PyMVPA package for the
# copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Dataset container"""
__docformat__ = 'restructuredtext'
import numpy as N
import random
from mvpa.datasets.mapped import MappedDataset
if __debug__:
from mvpa.base import debug, warning
class MetaDataset(object):
"""Dataset container
The class is useful to combine several Datasets with different origin and
type and bind them together. Such a combined dataset can then by used to
e.g. pass it to a classifier.
MetaDataset does not permanently duplicate data stored in the dataset it
contains. The combined samples matrix is build on demand and samples
attribute access is redirected to the first dataset in the container.
Currently operations other than samples or feature selection are not fully
supported, e.g. passing a MetaDataset to detrend() will initially result in
a detrended MetaDataset, but the combined and detrended samples matrix will
be lost after the next call to selectSamples() or selectFeatures(), which
freshly pulls samples from all datasets in the container. """
# This class is intentionally _not_ implemented as a subclass of Dataset.
# IMHO Dataset contains to much logic unecessary logic.
# XXX implement MappedMetaDataset along with a MetaMapper that simply calls
# the mappers in the datasets in the container; or maybe just add flag to
# MetaDataset to behave like a MappedDataset
def __init__(self, datasets):
"""Initialize dataset instance
:Parameters:
datasets : list
"""
# XXX Maybe add checks that all datasets have identical samples
# attributes
self.__datasets = datasets
# contains the combine samples matrix for caching
self.__samples = None
def rebuildSamples(self):
"""Update the combined samples matrix from all underlying datasets.
"""
# note, that hstack will do a copy of _all_ data
self.__samples = N.hstack([ds.samples for ds in self.__datasets])
def __getattr__(self, name):
"""Implemented to redirect access to underlying datasets.
"""
if name == 'samples':
# do something to combine (and cache) samples arrays
if self.__samples is None:
self.rebuildSamples()
return self.__samples
else:
# redirect all other to first dataset
# ??? maybe limit to some specific supported ones
return self.__datasets[0].__getattribute__(name)
def selectFeatures(self, ids, sort=True):
"""Do feature selection on all underlying datasets at once.
"""
# determine which features belong to what dataset
# and call its selectFeatures() accordingly
ids = N.asanyarray(ids)
result = []
fsum = 0
for ds in self.__datasets:
# bool which meta feature ids belongs to this dataset
selector = N.logical_and(ids < fsum + ds.nfeatures, ids >= fsum)
# make feature ids relative to this dataset
selected = ids[selector] - fsum
# do feature selection on underlying dataset
# XXX not sure if we should keep empty datasets? (probably)
result.append(ds.selectFeatures(selected))
fsum += ds.nfeatures
return MetaDataset(result)
def applyMapper(self, *args, **kwargs):
"""Apply a mapper on all underlying datasets.
"""
return MetaDataset([ds.applyMapper(*args, **kwargs) \
for ds in self.__datasets])
def selectSamples(self, *args, **kwargs):
"""Select samples from all underlying datasets at once.
"""
return MetaDataset([ds.selectSamples(*args, **kwargs) \
for ds in self.__datasets])
def permuteLabels(self, *args, **kwargs):
"""Toggle label permutation.
"""
# permute on first
self.__datasets[0].permuteLabels(*args, **kwargs)
# and apply to all others
for ds in self.__datasets[1:]:
ds.samples[:] = self.__datasets[0].samples
def getRandomSamples(self, nperlabel):
"""Return a MetaDataset with a random subset of samples.
"""
# if interger is given take this value for all classes
if isinstance(nperlabel, int):
nperlabel = [ nperlabel for i in self.__datasets[0].uniquelabels ]
sample = []
# for each available class
for i, r in enumerate(self.__datasets[0].uniquelabels):
# get the list of pattern ids for this class
sample += \
random.sample((self.__datasets[0].labels == r).nonzero()[0],
nperlabel[i] )
return MetaDataset([ds.selectSamples(sample) \
for ds in self.__datasets])
def getNSamples( self ):
"""Currently available number of samples.
"""
return self.__datasets[0].nsamples
def getNFeatures( self ):
"""Number of features per sample.
"""
return N.sum([ds.nfeatures for ds in self.__datasets])
def setSamplesDType(self, dtype):
"""Set the data type of the samples array.
"""
# reset samples
self.__samples = None
for ds in self.__datasets:
if ds.samples.dtype != dtype:
ds.samples = ds.samples.astype(dtype)
def mapReverse(self, val):
"""Perform reverse mapping
:Return:
List of results per each used mapper and the corresponding part of
the provided `val`.
"""
# assure array and transpose for easy slicing
# i.e. transpose of 1D does nothing, but of 2D puts features
# along first dimension
val = N.asanyarray(val).T
# do we have multiple or just one
mflag = len(val.shape) > 1
result = []
fsum = 0
for ds in self.__datasets:
# calculate upper border
fsum_new = fsum + ds.nfeatures
# now map back if mapper is present, otherwise just store
# need to pass transposed!!
if isinstance(ds, MappedDataset):
result.append(ds.mapReverse(val[fsum:fsum_new].T))
else:
result.append(val[fsum:fsum_new].T)
fsum = fsum_new
return result
# read-only class properties
nsamples = property(fget=getNSamples)
nfeatures = property(fget=getNFeatures)
datasets = property(fget=lambda self: self.__datasets)
|