This file is indexed.

/usr/share/doc/python-mvpa2-doc/examples/gpr.py is in python-mvpa2-doc 2.6.4-2.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   Copyright (c) 2008 Emanuele Olivetti <emanuele@relativita.com>
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""
The effect of different hyperparameters in GPR
==============================================

.. index:: GPR

The following example runs Gaussian Process Regression (GPR) on a
simple 1D dataset using squared exponential (i.e., Gaussian or RBF)
kernel and different hyperparameters. The resulting classifier
solutions are finally visualized in a single figure.

As usual we start by importing all of PyMVPA:
"""

# Lets use LaTeX for proper rendering of greek
from matplotlib import rc
rc('text', usetex=True)

from mvpa2.suite import *

"""
The next lines build two datasets using one of PyMVPA's data
generators.
"""

# Generate dataset for training:
train_size = 40
F = 1
dataset = data_generators.sin_modulated(train_size, F)

# Generate dataset for testing:
test_size = 100
dataset_test = data_generators.sin_modulated(test_size, F, flat=True)

"""
The last configuration step is the definition of four sets of
hyperparameters to be used for GPR.
"""

# Hyperparameters. Each row is [sigma_f, length_scale, sigma_noise]
hyperparameters = np.array([[1.0, 0.2, 0.4],
                           [1.0, 0.1, 0.1],
                           [1.0, 1.0, 0.1],
                           [1.0, 0.1, 1.0]])

"""
The plotting of the final figure and the actually GPR runs are
performed in a single loop.
"""

rows = 2
columns = 2
pl.figure(figsize=(12, 12))
for i in range(rows*columns):
    pl.subplot(rows, columns, i+1)
    regression = True
    logml = True

    data_train = dataset.samples
    label_train = dataset.sa.targets
    data_test = dataset_test.samples
    label_test = dataset_test.sa.targets

    """
    The next lines configure a squared exponential kernel with the set of
    hyperparameters for the current subplot and assign the kernel to the GPR
    instance.
    """

    sigma_f, length_scale, sigma_noise = hyperparameters[i, :]
    kse = SquaredExponentialKernel(length_scale=length_scale,
                                   sigma_f=sigma_f)
    g = GPR(kse, sigma_noise=sigma_noise)
    if not regression:
        g = RegressionAsClassifier(g)
    print g

    if regression:
        g.ca.enable("predicted_variances")

    if logml:
        g.ca.enable("log_marginal_likelihood")

    """
    After training GPR the predictions are queried by passing the test
    dataset samples and accuracy measures are computed.
    """

    g.train(dataset)
    prediction = g.predict(data_test)

    # print label_test
    # print prediction
    accuracy = None
    if regression:
        accuracy = np.sqrt(((prediction-label_test)**2).sum()/prediction.size)
        print "RMSE:", accuracy
    else:
        accuracy = (prediction.astype('l')==label_test.astype('l')).sum() \
                   / float(prediction.size)
        print "accuracy:", accuracy

    """
    The remaining code simply plots both training and test datasets, as
    well as the GPR solutions.
    """

    if F == 1:
        pl.title(r"$\sigma_f=%0.2f$, $length_s=%0.2f$, $\sigma_n=%0.2f$" \
                % (sigma_f,length_scale,sigma_noise))
        pl.plot(data_train, label_train, "ro", label="train")
        pl.plot(data_test, prediction, "b-", label="prediction")
        pl.plot(data_test, label_test, "g+", label="test")
        if regression:
            pl.plot(data_test, prediction - np.sqrt(g.ca.predicted_variances),
                       "b--", label=None)
            pl.plot(data_test, prediction+np.sqrt(g.ca.predicted_variances),
                       "b--", label=None)
            pl.text(0.5, -0.8, "$RMSE=%.3f$" %(accuracy))
            pl.text(0.5, -0.95, "$LML=%.3f$" %(g.ca.log_marginal_likelihood))
        else:
            pl.text(0.5, -0.8, "$accuracy=%s" % accuracy)

        pl.legend(loc='lower right')

    print "LML:", g.ca.log_marginal_likelihood


if cfg.getboolean('examples', 'interactive', True):
    # show all the cool figures
    pl.show()