This file is indexed.

/usr/lib/python2.7/dist-packages/mdp/classifier_node.py is in python-mdp 3.5-1ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import mdp
from mdp import PreserveDimNode, numx, VariadicCumulator
import operator


class ClassifierNode(PreserveDimNode):
    """A ClassifierNode can be used for classification tasks that should not
    interfere with the normal execution flow. A reason for that is that the
    labels used for classification do not form a vector space, and so they don't
    make much sense in a flow.
    """
    
    def __init__(self, execute_method=None,
                 input_dim=None, output_dim=None, dtype=None):
        """Initialize classifier.
        
        execute_method -- Set to string value 'label', 'rank', or 'prob' to
            force the corresponding classification method being used instead
            of the standard identity execution (which is used when
            execute_method has the default value None). This can be used when
            the node is last in a flow, the return value from Flow.execute
            will then consist of the classification results.
        """
        self.execute_method = execute_method
        super(ClassifierNode, self).__init__(input_dim=input_dim,
                                             output_dim=output_dim,
                                             dtype=dtype)

    ### Methods to be implemented by the subclasses

    def _label(self, x, *args, **kargs):
        raise NotImplementedError

    def _prob(self, x, *args, **kargs):
        raise NotImplementedError

    ### User interface to the overwritten methods

    def label(self, x, *args, **kwargs):
        """Returns an array with best class labels.

        By default, subclasses should overwrite _label to implement
        their label. The docstring of the '_label' method
        overwrites this docstring.
        """
        self._pre_execution_checks(x)
        return self._label(self._refcast(x), *args, **kwargs)

    def prob(self, x, *args, **kwargs):
        """Returns the probability for each datapoint and label
        (e.g., [{1:0.1, 2:0.0, 3:0.9}, {1:1.0, 2:0.0, 3:0.0}, ...])

        By default, subclasses should overwrite _prob to implement
        their prob. The docstring of the '_prob' method
        overwrites this docstring.
        """
        self._pre_execution_checks(x)
        return self._prob(self._refcast(x), *args, **kwargs)

    def rank(self, x, threshold=None):
        """Returns ordered list with all labels ordered according to prob(x)
        (e.g., [[3 1 2], [2 1 3], ...]).

        The optional threshold parameter is used to exclude labels having equal
        or less probability. E.g. threshold=0 excludes all labels with zero
        probability.
        """
        all_ranking = []
        prob = self.prob(x)
        for p in prob:
            if threshold is None:
                ranking = list(p.items())
            else:
                ranking = ((k, v) for k, v in list(p.items()) if v > threshold)
            result = [k for k, v in
                      sorted(ranking, key=operator.itemgetter(1), reverse=True)]
            all_ranking.append(result)
        return all_ranking
    
    def _execute(self, x):
        if not self.execute_method:
            return x
        elif self.execute_method == "label":
            return self.label(x)
        elif self.execute_method == "rank":
            return self.rank(x)
        elif self.execute_method == "prob":
            return self.prob(x)

# XXX are the _train and _stop_training functions necessary anymore?
class ClassifierCumulator(VariadicCumulator('data', 'labels'), ClassifierNode):
    """A ClassifierCumulator is a Node whose training phase simply collects
    all input data and labels. In this way it is possible to easily implement
    batch-mode learning.

    The data is accessible in the attribute 'self.data' after
    the beginning of the '_stop_training' phase. 'self.tlen' contains
    the number of data points collected.
    'self.labels' contains the assigned label to each data point.
    """

    def __init__(self, input_dim=None, output_dim=None, dtype=None):
        super(ClassifierCumulator, self).__init__(input_dim=input_dim,
                                                  output_dim=output_dim,
                                                  dtype=dtype)

    def _check_train_args(self, x, labels):
        super(ClassifierCumulator, self)._check_train_args(x, labels)
        if (isinstance(labels, (list, tuple, numx.ndarray)) and
            len(labels) != x.shape[0]):
            msg = ("The number of labels must be equal to the number of "
                   "datapoints (%d != %d)" % (len(labels), x.shape[0]))
            raise mdp.TrainingException(msg)

    def _train(self, x, labels):
        """Cumulate all input data in a one dimensional list."""
        self.tlen += x.shape[0]
        self.data.extend(x.ravel().tolist())

        # if labels is a number, all x's belong to the same class
        if isinstance(labels, (list, tuple, numx.ndarray)):
            pass
        else:
            labels = [labels] * x.shape[0]

        self.labels.extend(labels.ravel().tolist())

    def _stop_training(self, *args, **kwargs):
        """Transform the data and labels lists to array objects and reshape them."""
        self.data = numx.array(self.data, dtype=self.dtype)
        self.data.shape = (self.tlen, self.input_dim)
        self.labels = numx.array(self.labels)
        self.labels.shape = (self.tlen)