This file is indexed.

/usr/share/pyshared/mvpa/featsel/ifs.py is in python-mvpa 0.4.8-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Incremental feature search (IFS).

Very similar to Recursive feature elimination (RFE), but instead of begining
with all features and stripping some sequentially, start with an empty feature
set and include important features successively.
"""

__docformat__ = 'restructuredtext'

import numpy as N
from mvpa.support.copy import copy

from mvpa.featsel.base import FeatureSelection
from mvpa.featsel.helpers import NBackHistoryStopCrit, \
                                 FixedNElementTailSelector, \
                                 BestDetector

from mvpa.misc.state import StateVariable

if __debug__:
    from mvpa.base import debug


class IFS(FeatureSelection):
    """Incremental feature search.

    A scalar `DatasetMeasure` is computed multiple times on variations of a
    certain dataset. These measures are in turn used to incrementally select
    important features. Starting with an empty feature set the dataset measure
    is first computed for each single feature. A number of features is selected
    based on the resulting data measure map (using an `ElementSelector`).

    Next the dataset measure is computed again using each feature in addition
    to the already selected feature set. Again the `ElementSelector` is used to
    select more features.

    For each feature selection the transfer error on some testdatset is
    computed. This procedure is repeated until a given `StoppingCriterion`
    is reached.
    """

    errors = StateVariable()

    def __init__(self,
                 data_measure,
                 transfer_error,
                 bestdetector=BestDetector(),
                 stopping_criterion=NBackHistoryStopCrit(BestDetector()),
                 feature_selector=FixedNElementTailSelector(1,
                                                            tail='upper',
                                                            mode='select'),
                 **kwargs
                 ):
        """Initialize incremental feature search

        :Parameters:
            data_measure : DatasetMeasure
                Computed for each candidate feature selection.
            transfer_error : TransferError
                Compute against a test dataset for each incremental feature
                set.
            bestdetector : Functor
                Given a list of error values it has to return a boolean that
                signals whether the latest error value is the total minimum.
            stopping_criterion : Functor
                Given a list of error values it has to return whether the
                criterion is fulfilled.
         """
        # bases init first
        FeatureSelection.__init__(self, **kwargs)

        self.__data_measure = data_measure
        self.__transfer_error = transfer_error
        self.__feature_selector = feature_selector
        self.__bestdetector = bestdetector
        self.__stopping_criterion = stopping_criterion


    def __call__(self, dataset, testdataset):
        """Proceed and select the features recursively eliminating less
        important ones.

        :Parameters:
            `dataset`: `Dataset`
                used to select features and train classifiers to determine the
                transfer error.
            `testdataset`: `Dataset`
                used to test the trained classifer on a certain feature set
                to determine the transfer error.

        Returns a tuple with the dataset containing the feature subset of
        `dataset` that had the lowest transfer error of all tested sets until
        the stopping criterion was reached. The tuple also contains a dataset
        with the corrsponding features from the `testdataset`.
        """
        errors = []
        """Computed error for each tested features set."""

        # feature candidate are all features in the pattern object
        candidates = range( dataset.nfeatures )

        # initially empty list of selected features
        selected = []

        # results in here please
        results = None

        # as long as there are candidates left
        # the loop will most likely get broken earlier if the stopping
        # criterion is reached
        while len( candidates ):
            # measures for all candidates
            measures = []

            # for all possible candidates
            for i, candidate in enumerate(candidates):
                if __debug__:
                    debug('IFSC', "Tested %i" % i, cr=True)

                # take the new candidate and all already selected features
                # select a new temporay feature subset from the dataset
                # XXX assume MappedDataset and issue plain=True ??
                tmp_dataset = \
                    dataset.selectFeatures(selected + [candidate])

                # compute data measure on this feature set
                measures.append(self.__data_measure(tmp_dataset))

            measures = [N.asscalar(m) for m in measures]
            # Select promissing feature candidates (staging)
            # IDs are only applicable to the current set of feature candidates
            tmp_staging_ids = self.__feature_selector(measures)

            # translate into real candidate ids
            staging_ids = [ candidates[i] for i in tmp_staging_ids ]

            # mark them as selected and remove from candidates
            selected += staging_ids
            for i in staging_ids:
                candidates.remove(i)

            # compute transfer error for the new set
            # XXX assume MappedDataset and issue plain=True ??
            error = self.__transfer_error(testdataset.selectFeatures(selected),
                                          dataset.selectFeatures(selected))
            errors.append(error)

            # Check if it is time to stop and if we got
            # the best result
            stop = self.__stopping_criterion(errors)
            isthebest = self.__bestdetector(errors)

            if __debug__:
                debug('IFSC',
                      "nselected %i; error: %.4f " \
                      "best/stop=%d/%d\n" \
                      % (len(selected), errors[-1], isthebest, stop),
                      cr=True, lf=True)

            if isthebest:
                # do copy to survive later selections
                results = copy(selected)

            # leave the loop when the criterion is reached
            if stop:
                break

        # charge state
        self.errors = errors

        # best dataset ever is returned
        return dataset.selectFeatures(results), \
               testdataset.selectFeatures(results)