This file is indexed.

/usr/lib/python3/dist-packages/mdp/test/test_classifier.py is in python3-mdp 3.5-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# -*- coding: utf-8 -*-

"""These are test functions for MDP classifiers.
"""
from __future__ import division
from builtins import zip
from builtins import range
from past.utils import old_div
from ._tools import *

from mdp import ClassifierNode
from mdp.nodes import (SignumClassifier, PerceptronClassifier,
                       SimpleMarkovClassifier, DiscreteHopfieldClassifier,
                       KMeansClassifier)

def _sigmoid(t):
    return old_div(1.0, (1.0 + numx.exp(-t)))

class _BogusClassifier(ClassifierNode):
    @staticmethod
    def is_trainable():
        return False
    def _label(self, x):
        return [r[0] for r in self.rank(x)]
    def _prob(self, x):
        return [{-1: _sigmoid(sum(xi)), \
                  1: 1 - _sigmoid(sum(xi))} for xi in x]


def testClassifierNode_ranking():
    bc = _BogusClassifier()
    test_data = numx_rand.random((30, 20)) - 0.5
    for r, p in zip(bc.rank(test_data), bc.prob(test_data)):
        # check that the ranking order is correct
        assert p[r[0]] >= p[r[1]], "Rank returns labels in incorrect order"
        # check that the probabilities sum up to 100
        assert 0.999 < p[r[0]] + p[r[1]] < 1.001

def testClassifier_execute_method():
    """Test that the execute result has the correct format when execute_method
    is used.
    """
    bc = _BogusClassifier(execute_method="label")
    data = numx_rand.random((5, 20)) - 0.5
    result = bc.execute(data)
    assert isinstance(result, list)
    assert isinstance(result[0], int)
    bc.execute_method = "prob"
    result = bc.execute(data)
    assert isinstance(result, list)
    assert isinstance(result[0], dict)
    bc.execute_method = "rank"
    result = bc.execute(data)
    assert isinstance(result, list)
    assert isinstance(result[0], list)

def testSignumClassifier():
    c = SignumClassifier()
    res = c.label(mdp.numx.array([[1, 2, -3, -4], [1, 2, 3, 4]]))
    assert c.input_dim == 4
    assert res.tolist() == [-1, 1]

def testPerceptronClassifier():
    or_Classifier = PerceptronClassifier()
    for i in range(100):
        or_Classifier.train(mdp.numx.array([[0., 0.]]), -1)
        or_Classifier.train(mdp.numx.array([[0., 1.], [1., 0.], [1., 1.]]), 1)
    assert or_Classifier.input_dim == 2

    res = or_Classifier.label(mdp.numx.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]]))
    assert res.tolist() == [-1, 1, 1, 1]

    and_Classifier = PerceptronClassifier()
    for i in range(100):
        and_Classifier.train(mdp.numx.array([[0., 0.], [0., 1.], [1., 0.]]), -1)
        and_Classifier.train(mdp.numx.array([[1., 1.]]), 1)
    res = and_Classifier.label(mdp.numx.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]]))
    assert res.tolist() == [-1, -1, -1, 1]

    xor_Classifier = PerceptronClassifier()
    for i in range(100):
        xor_Classifier.train(mdp.numx.array([[0., 0.], [1., 1.]]), -1)
        xor_Classifier.train(mdp.numx.array([[0., 1.], [1., 0.]]), 1)
    res = xor_Classifier.label(mdp.numx.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]]))
    assert res.tolist() != [-1, 1, 1, -1], \
        "Something must be wrong here. XOR is impossible in a single-layered perceptron."


def testSimpleMarkovClassifier():
    mc = SimpleMarkovClassifier(dtype="c")
    text = "after the letter e follows either space or the letters r t or i"

    for word in text.split():
        word = word.lower()

        features = list(zip(" " + word))
        labels = list(word + " ")

        mc.train(mdp.numx.array(features), labels)

    assert mc.input_dim == 1

    num_transitions = 0
    features = mc.features
    for feature, count in list(features.items()):
        if count:
            prob = mc.prob(mdp.numx.array([feature]))
            prob_sum = 0
            for p in prob:
                for k, v in list(p.items()):
                    prob_sum += v
                    if v:
                        num_transitions += 1

            assert abs(prob_sum - 1.0) < 1e-5

    # calculate the number of transitions (the negative set deletes the artefact of two spaces)
    trans = len(set((list(zip("  ".join(text.split()) + " ", \
                         " " + "  ".join(text.split()))))) - set([(' ', ' ')]))
    assert num_transitions == trans

    letters_following_e = [' ', 'r', 't', 'i']
    letters_prob = mc.prob(mdp.numx.array([['e']]))[0]
    prob_sum = 0
    for letter, prob in list(letters_prob.items()):
        prob_sum += prob
        if prob > 1e-5:
            assert letter in letters_following_e

    assert abs(prob_sum - 1.0) < 1e-5

def testDiscreteHopfieldClassifier():
    h = DiscreteHopfieldClassifier()

    memory_size = 100
    patterns = numx.array(
               [numx.sin(numx.linspace(0, 100 * numx.pi, memory_size)) > 0,
                numx.sin(numx.linspace(0, 50 * numx.pi, memory_size)) > 0,
                numx.sin(numx.linspace(0, 20 * numx.pi, memory_size)) > 0,
                numx.sin(numx.linspace(0, 15 * numx.pi, memory_size)) > 0,
                numx.sin(numx.linspace(0, 10 * numx.pi, memory_size)) > 0,
                numx.sin(numx.linspace(0, 5 * numx.pi, memory_size)) > 0,
                numx.sin(numx.linspace(0, 2 * numx.pi, memory_size)) > 0
                ])
    h.train(patterns)
    h.input_dim = memory_size

    for p in patterns:
        # check if patterns are fixpoints
        assert numx.all(p == h.label(numx.array([p])))

    for p in patterns:
        # check, if a noisy pattern is recreated
        noisy = numx.array(p)
        for i in range(len(noisy)):
            if numx.random.random() > 0.95:
                noisy[i] = not noisy[i]
        retrieved = h.label(numx.array([noisy]))
        # Hopfield nets are blind for inversion, need to check either case
        assert numx.all(retrieved == p) or numx.all(retrieved != p)

def testKMeansClassifier():
    num_centroids = 3
    k = KMeansClassifier(num_centroids)
    a = numx.random.rand(50, 2)
    k.train(a)
    res = k.label(a)

    # check that the number of centroids is correct
    assert len(set(res)) == num_centroids

    k = KMeansClassifier(2)
    a1 = numx.random.rand(50, 2) - 1
    a2 = numx.random.rand(50, 2) + 1
    k.train(a1)
    k.train(a2)
    res1 = k.label(a1)
    res2 = k.label(a2)
    # check that both clusters are completely identified and different
    assert (len(set(res1)) == 1 and
            len(set(res2)) == 1 and
            set(res1) != set(res2)
            ), ("Error in K-Means classifier. "
                "This might be a bug or just a local minimum.")