This file is indexed.

/usr/lib/python3/dist-packages/astroML/sum_of_norms.py is in python3-astroml 0.3-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
Functions for regression using sums-of-norms
"""

import numpy as np


def norm(x, x0, sigma):
    return (1. / np.sqrt(2 * np.pi) / sigma
            * np.exp(-0.5 * (x - x0) ** 2 / sigma ** 2))


def sum_of_norms(x, y, num_gaussians=None, locs=None,
                 widths=None, spacing='linear', full_output=False):
    r"""Approximate a function with a sum of gaussians

    Parameters
    ----------
    x : array-like, shape = n_training
        The x-value of the input function
    y : array-like, shape = n_training
        The y-value of the input function
    num_gaussians : integer (optional)
        The number of gaussians to use.  If this is not specified, then the
        number of items in `locs` is used.  If neither is specified, this
        defaults to 30
    locs : array-like (optional)
        The locations of the gaussians to use.  If not specified, locations
        will be uniformly spaced between the end-points of x.
    widths : float or array-like (optional)
        The widths of the gaussians to use.  If a single value, use this for
        all widths.  If multiple values, the length must be equal to
        len(locs), if specified, and/or num_gaussians, if specified.
        If widths is not provided, then widths will be used which are
        half the distance between adjacent gaussians will be used.
    full_output : boolean (default = False)
        if True, return the rms error of the best-fit, the list of locations,
        and the list of widths
    spacing : string, ['linear'|'log']
        spacing to use for automatic determination of locs.  Not referenced
        if locs is specified

    Returns
    -------
    weights if full_output == False
    (weights, rms, locs, widths) if full_output == True

    weights : array-like, length = num_gaussians
        The weights which best approximate the spectrum.  The reconstruction
        is given by
        sum_{i=1}^{num_gaussians} weights[i] * norm(locs[i], widths[i])
    rms : float
        the root-mean-square error of the best-fit solution
    locs : array
        the locations of the gaussians used for the fit
    widths : array
        the widths of the gaussians used for the fit

    Notes
    -----
    This is solved using linear regression.  Our matrix :math:`X` has shape
    :math:`(m, n)` where :math:`m` is the number of training points, and
    :math:`n` is the number of gaussians in the fit.  We seek the linear
    combination of these :math:`n` gaussians which minimizes the squared
    residual error, which in matrix form can be expressed

    .. math:
        \epsilon = \min\left|y - Xw \right|

    here the vector :math:`w` encodes the linear combination.  The vector
    :math:`w` which minimizes :math:`\epsilon` can be shown to be

    .. math:
        w = (X^T X)^{-1} X^T y

    This is the result returned by this function.
    """
    x, y = map(np.asarray, (x, y))
    assert x.ndim == 1
    assert y.shape == x.shape

    n_training = x.shape[0]

    if locs is None:
        if num_gaussians is None:
            num_gaussians = 30
        if spacing == 'linear':
            locs = np.linspace(x[0], x[-1], num_gaussians)
        elif spacing == 'log':
            locs = np.logspace(np.log10(x[0]), np.log10(x[-1]), num_gaussians)
    else:
        locs = np.asarray(locs)
        if num_gaussians is None:
            num_gaussians = len(locs)
        if num_gaussians is not None:
            assert len(locs) == num_gaussians

    if widths is None:
        widths = np.zeros(num_gaussians)
        widths[:-1] = locs[1:] - locs[:-1]
        if len(widths) > 1:
            widths[-1] = widths[-2]
        else:
            widths[-1] = x[-1] - x[0]
    else:
        widths = np.atleast_1d(widths)
        assert widths.size in (1, num_gaussians)
        widths = widths + np.zeros(num_gaussians)  # broadcast to shape

    # use broadcasting to compute X in one go, without slow loops
    X = norm(x.reshape(n_training, 1),
             locs.reshape(1, num_gaussians),
             widths.reshape(1, num_gaussians))

    # use pinv rather than inv for numerical stability
    w_best = np.dot(np.linalg.pinv(np.dot(X.T, X)),
                    np.dot(X.T, y))

    if not full_output:
        return w_best
    else:
        rms = np.sqrt(np.mean(y - np.dot(X, w_best)) ** 2)
        return w_best, rms, locs, widths