/usr/share/pyshared/mvpa/featsel/ifs.py is in python-mvpa 0.4.8-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the PyMVPA package for the
# copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Incremental feature search (IFS).
Very similar to Recursive feature elimination (RFE), but instead of begining
with all features and stripping some sequentially, start with an empty feature
set and include important features successively.
"""
__docformat__ = 'restructuredtext'
import numpy as N
from mvpa.support.copy import copy
from mvpa.featsel.base import FeatureSelection
from mvpa.featsel.helpers import NBackHistoryStopCrit, \
FixedNElementTailSelector, \
BestDetector
from mvpa.misc.state import StateVariable
if __debug__:
from mvpa.base import debug
class IFS(FeatureSelection):
"""Incremental feature search.
A scalar `DatasetMeasure` is computed multiple times on variations of a
certain dataset. These measures are in turn used to incrementally select
important features. Starting with an empty feature set the dataset measure
is first computed for each single feature. A number of features is selected
based on the resulting data measure map (using an `ElementSelector`).
Next the dataset measure is computed again using each feature in addition
to the already selected feature set. Again the `ElementSelector` is used to
select more features.
For each feature selection the transfer error on some testdatset is
computed. This procedure is repeated until a given `StoppingCriterion`
is reached.
"""
errors = StateVariable()
def __init__(self,
data_measure,
transfer_error,
bestdetector=BestDetector(),
stopping_criterion=NBackHistoryStopCrit(BestDetector()),
feature_selector=FixedNElementTailSelector(1,
tail='upper',
mode='select'),
**kwargs
):
"""Initialize incremental feature search
:Parameters:
data_measure : DatasetMeasure
Computed for each candidate feature selection.
transfer_error : TransferError
Compute against a test dataset for each incremental feature
set.
bestdetector : Functor
Given a list of error values it has to return a boolean that
signals whether the latest error value is the total minimum.
stopping_criterion : Functor
Given a list of error values it has to return whether the
criterion is fulfilled.
"""
# bases init first
FeatureSelection.__init__(self, **kwargs)
self.__data_measure = data_measure
self.__transfer_error = transfer_error
self.__feature_selector = feature_selector
self.__bestdetector = bestdetector
self.__stopping_criterion = stopping_criterion
def __call__(self, dataset, testdataset):
"""Proceed and select the features recursively eliminating less
important ones.
:Parameters:
`dataset`: `Dataset`
used to select features and train classifiers to determine the
transfer error.
`testdataset`: `Dataset`
used to test the trained classifer on a certain feature set
to determine the transfer error.
Returns a tuple with the dataset containing the feature subset of
`dataset` that had the lowest transfer error of all tested sets until
the stopping criterion was reached. The tuple also contains a dataset
with the corrsponding features from the `testdataset`.
"""
errors = []
"""Computed error for each tested features set."""
# feature candidate are all features in the pattern object
candidates = range( dataset.nfeatures )
# initially empty list of selected features
selected = []
# results in here please
results = None
# as long as there are candidates left
# the loop will most likely get broken earlier if the stopping
# criterion is reached
while len( candidates ):
# measures for all candidates
measures = []
# for all possible candidates
for i, candidate in enumerate(candidates):
if __debug__:
debug('IFSC', "Tested %i" % i, cr=True)
# take the new candidate and all already selected features
# select a new temporay feature subset from the dataset
# XXX assume MappedDataset and issue plain=True ??
tmp_dataset = \
dataset.selectFeatures(selected + [candidate])
# compute data measure on this feature set
measures.append(self.__data_measure(tmp_dataset))
measures = [N.asscalar(m) for m in measures]
# Select promissing feature candidates (staging)
# IDs are only applicable to the current set of feature candidates
tmp_staging_ids = self.__feature_selector(measures)
# translate into real candidate ids
staging_ids = [ candidates[i] for i in tmp_staging_ids ]
# mark them as selected and remove from candidates
selected += staging_ids
for i in staging_ids:
candidates.remove(i)
# compute transfer error for the new set
# XXX assume MappedDataset and issue plain=True ??
error = self.__transfer_error(testdataset.selectFeatures(selected),
dataset.selectFeatures(selected))
errors.append(error)
# Check if it is time to stop and if we got
# the best result
stop = self.__stopping_criterion(errors)
isthebest = self.__bestdetector(errors)
if __debug__:
debug('IFSC',
"nselected %i; error: %.4f " \
"best/stop=%d/%d\n" \
% (len(selected), errors[-1], isthebest, stop),
cr=True, lf=True)
if isthebest:
# do copy to survive later selections
results = copy(selected)
# leave the loop when the criterion is reached
if stop:
break
# charge state
self.errors = errors
# best dataset ever is returned
return dataset.selectFeatures(results), \
testdataset.selectFeatures(results)
|