/usr/lib/python2.7/dist-packages/mdp/classifier_node.py is in python-mdp 3.5-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | import mdp
from mdp import PreserveDimNode, numx, VariadicCumulator
import operator
class ClassifierNode(PreserveDimNode):
"""A ClassifierNode can be used for classification tasks that should not
interfere with the normal execution flow. A reason for that is that the
labels used for classification do not form a vector space, and so they don't
make much sense in a flow.
"""
def __init__(self, execute_method=None,
input_dim=None, output_dim=None, dtype=None):
"""Initialize classifier.
execute_method -- Set to string value 'label', 'rank', or 'prob' to
force the corresponding classification method being used instead
of the standard identity execution (which is used when
execute_method has the default value None). This can be used when
the node is last in a flow, the return value from Flow.execute
will then consist of the classification results.
"""
self.execute_method = execute_method
super(ClassifierNode, self).__init__(input_dim=input_dim,
output_dim=output_dim,
dtype=dtype)
### Methods to be implemented by the subclasses
def _label(self, x, *args, **kargs):
raise NotImplementedError
def _prob(self, x, *args, **kargs):
raise NotImplementedError
### User interface to the overwritten methods
def label(self, x, *args, **kwargs):
"""Returns an array with best class labels.
By default, subclasses should overwrite _label to implement
their label. The docstring of the '_label' method
overwrites this docstring.
"""
self._pre_execution_checks(x)
return self._label(self._refcast(x), *args, **kwargs)
def prob(self, x, *args, **kwargs):
"""Returns the probability for each datapoint and label
(e.g., [{1:0.1, 2:0.0, 3:0.9}, {1:1.0, 2:0.0, 3:0.0}, ...])
By default, subclasses should overwrite _prob to implement
their prob. The docstring of the '_prob' method
overwrites this docstring.
"""
self._pre_execution_checks(x)
return self._prob(self._refcast(x), *args, **kwargs)
def rank(self, x, threshold=None):
"""Returns ordered list with all labels ordered according to prob(x)
(e.g., [[3 1 2], [2 1 3], ...]).
The optional threshold parameter is used to exclude labels having equal
or less probability. E.g. threshold=0 excludes all labels with zero
probability.
"""
all_ranking = []
prob = self.prob(x)
for p in prob:
if threshold is None:
ranking = list(p.items())
else:
ranking = ((k, v) for k, v in list(p.items()) if v > threshold)
result = [k for k, v in
sorted(ranking, key=operator.itemgetter(1), reverse=True)]
all_ranking.append(result)
return all_ranking
def _execute(self, x):
if not self.execute_method:
return x
elif self.execute_method == "label":
return self.label(x)
elif self.execute_method == "rank":
return self.rank(x)
elif self.execute_method == "prob":
return self.prob(x)
# XXX are the _train and _stop_training functions necessary anymore?
class ClassifierCumulator(VariadicCumulator('data', 'labels'), ClassifierNode):
"""A ClassifierCumulator is a Node whose training phase simply collects
all input data and labels. In this way it is possible to easily implement
batch-mode learning.
The data is accessible in the attribute 'self.data' after
the beginning of the '_stop_training' phase. 'self.tlen' contains
the number of data points collected.
'self.labels' contains the assigned label to each data point.
"""
def __init__(self, input_dim=None, output_dim=None, dtype=None):
super(ClassifierCumulator, self).__init__(input_dim=input_dim,
output_dim=output_dim,
dtype=dtype)
def _check_train_args(self, x, labels):
super(ClassifierCumulator, self)._check_train_args(x, labels)
if (isinstance(labels, (list, tuple, numx.ndarray)) and
len(labels) != x.shape[0]):
msg = ("The number of labels must be equal to the number of "
"datapoints (%d != %d)" % (len(labels), x.shape[0]))
raise mdp.TrainingException(msg)
def _train(self, x, labels):
"""Cumulate all input data in a one dimensional list."""
self.tlen += x.shape[0]
self.data.extend(x.ravel().tolist())
# if labels is a number, all x's belong to the same class
if isinstance(labels, (list, tuple, numx.ndarray)):
pass
else:
labels = [labels] * x.shape[0]
self.labels.extend(labels.ravel().tolist())
def _stop_training(self, *args, **kwargs):
"""Transform the data and labels lists to array objects and reshape them."""
self.data = numx.array(self.data, dtype=self.dtype)
self.data.shape = (self.tlen, self.input_dim)
self.labels = numx.array(self.labels)
self.labels.shape = (self.tlen)
|