This file is indexed.

/usr/lib/python2.7/dist-packages/dipy/reconst/cross_validation.py is in python-dipy 0.10.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""
Cross-validation analysis of diffusion models
"""
from __future__ import division, print_function, absolute_import
from dipy.utils.six.moves import range

import numpy as np
import dipy.core.gradients as gt


def coeff_of_determination(data, model, axis=-1):
    """
    Calculate the coefficient of determination for a model prediction, relative
    to data.

    Parameters
    ----------
    data : ndarray
        The data
    model : ndarray
        The predictions of a model for this data. Same shape as the data.
    axis: int, optional
        The axis along which different samples are laid out (default: -1).

    Returns
    -------
    COD : ndarray
       The coefficient of determination. This has shape `data.shape[:-1]`


    Notes
    -----

    See: http://en.wikipedia.org/wiki/Coefficient_of_determination

    The coefficient of determination is calculated as:

    .. math::

        R^2 = 100 * (1 - \frac{SSE}{SSD})

    where SSE is the sum of the squared error between the model and the data
    (sum of the squared residuals) and SSD is the sum of the squares of the
    deviations of the data from the mean of the data (variance * N).
    """

    residuals = data - model
    ss_err = np.sum(residuals ** 2, axis=axis)

    demeaned_data = data - np.mean(data, axis=axis)[..., np.newaxis]
    ss_tot = np.sum(demeaned_data ** 2, axis=axis)

    # Don't divide by 0:
    if np.all(ss_tot == 0.0):
        return np.nan

    return 100 * (1 - (ss_err/ss_tot))


def kfold_xval(model, data, folds, *model_args, **model_kwargs):
    """
    Perform k-fold cross-validation to generate out-of-sample predictions for
    each measurement.

    Parameters
    ----------
    model : Model class instance
        The type of the model to use for prediction. The corresponding Fit
        object must have a `predict` function implementd One of the following:
        `reconst.dti.TensorModel` or
        `reconst.csdeconv.ConstrainedSphericalDeconvModel`.
    data : ndarray
        Diffusion MRI data acquired with the GradientTable of the model. Shape
        will typically be `(x, y, z, b)` where `xyz` are spatial dimensions and
        b is the number of bvals/bvecs in the GradientTable.
    folds : int
        The number of divisions to apply to the data
    model_args : list
        Additional arguments to the model initialization
    model_kwargs : dict
        Additional key-word arguments to the model initialization. If contains
        the kwarg `mask`, this will be used as a key-word argument to the `fit`
        method of the model object, rather than being used in the
        initialization of the model object

    Notes
    -----
    This function assumes that a prediction API is implemented in the Model
    class for which prediction is conducted. That is, the Fit object that gets
    generated upon fitting the model needs to have a `predict` method, which
    receives a GradientTable class instance as input and produces a predicted
    signal as output.

    It also assumes that the model object has `bval` and `bvec` attributes
    holding b-values and corresponding unit vectors.

    References
    ----------
    .. [1] Rokem, A., Chan, K.L. Yeatman, J.D., Pestilli, F., Mezer, A.,
       Wandell, B.A., 2014. Evaluating the accuracy of diffusion models at
       multiple b-values with cross-validation. ISMRM 2014.
    """
    # This should always be there, if the model inherits from
    # dipy.reconst.base.ReconstModel:
    gtab = model.gtab
    data_b = data[..., ~gtab.b0s_mask]
    div_by_folds = np.mod(data_b.shape[-1], folds)
    # Make sure that an equal number of samples get left out in each fold:
    if div_by_folds != 0:
        msg = "The number of folds must divide the diffusion-weighted "
        msg += "data equally, but "
        msg = "np.mod(%s, %s) is %s" % (data_b.shape[-1], folds, div_by_folds)
        raise ValueError(msg)

    data_0 = data[..., gtab.b0s_mask]
    S0 = np.mean(data_0, -1)
    n_in_fold = data_b.shape[-1] / folds
    prediction = np.zeros(data.shape)
    # We are going to leave out some randomly chosen samples in each iteration:
    order = np.random.permutation(data_b.shape[-1])

    nz_bval = gtab.bvals[~gtab.b0s_mask]
    nz_bvec = gtab.bvecs[~gtab.b0s_mask]

    # Pop the mask, if there is one, out here for use in every fold:
    mask = model_kwargs.pop('mask', None)
    gtgt = gt.gradient_table  # Shorthand
    for k in range(folds):
        fold_mask = np.ones(data_b.shape[-1], dtype=bool)
        fold_idx = order[int(k * n_in_fold): int((k + 1) * n_in_fold)]
        fold_mask[fold_idx] = False
        this_data = np.concatenate([data_0, data_b[..., fold_mask]], -1)

        this_gtab = gtgt(np.hstack([gtab.bvals[gtab.b0s_mask],
                                    nz_bval[fold_mask]]),
                         np.concatenate([gtab.bvecs[gtab.b0s_mask],
                                         nz_bvec[fold_mask]]))
        left_out_gtab = gtgt(np.hstack([gtab.bvals[gtab.b0s_mask],
                                        nz_bval[~fold_mask]]),
                             np.concatenate([gtab.bvecs[gtab.b0s_mask],
                                             nz_bvec[~fold_mask]]))
        this_model = model.__class__(this_gtab, *model_args, **model_kwargs)
        this_fit = this_model.fit(this_data, mask=mask)
        if not hasattr(this_fit, 'predict'):
            err_str = "Models of type: %s " % this_model.__class__
            err_str += "do not have an implementation of model prediction"
            err_str += " and do not support cross-validation"
            raise ValueError(err_str)
        this_predict = S0[..., None] * this_fit.predict(left_out_gtab, S0=1)

        idx_to_assign = np.where(~gtab.b0s_mask)[0][~fold_mask]
        prediction[..., idx_to_assign] =\
            this_predict[..., np.sum(gtab.b0s_mask):]

    # For the b0 measurements
    prediction[..., gtab.b0s_mask] = S0[..., None]
    return prediction