This file is indexed.

/usr/lib/python3/dist-packages/astroML/dimensionality/iterative_pca.py is in python3-astroml 0.3-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import sys

import numpy as np

from scipy.linalg import solve


def iterative_pca(X, M, n_ev=5, n_iter=15, norm=None, full_output=False):
    """
    Parameters
    ----------
    X: ndarray, shape = (n_samples, n_features)
        input data
    M: ndarray, bool, shape = (n_samples, n_features)
        mask for input data.  where mask == True, the spectrum is unconstrained
    n_ev: int
        number of eigenvectors to use in reconstructing masked regions
    n_iter: int
        number of iterations to find eigenvectors
    norm: string
        what type of normalization to use on the data. Options are
        - None : no normalization
        - 'L1' : L1-norm
        - 'L2' : L2-norm
    full_output: boolean (optional)
        if False (default) return only the reconstructed data X_recons
        if True, return the full information (see below)

    Returns
    -------
    X_recons: ndarray, shape = (n_samples, n_features)
        data with masked regions reconstructed

    mu: ndarray, shape = (n_features,)
        mean of data
    evecs: ndarray, shape = (min(n_samples, n_features), n_features)
        eigenvectors of the reconstructed data
    evals: ndarray, size = min(n_samples, n_features)
        eigenvalues of the reconstructed data
    norms: ndarray, size = n_samples
        normalization of each input
    coeffs: ndarray, size = (n_samples, n_ev)
        coefficients used to reconstruct X
    """
    X = np.asarray(X, dtype=np.float)
    M = np.asarray(M, dtype=np.bool)

    if X.shape != M.shape:
        raise ValueError('X and M must have the same shape')

    n_samples, n_features = X.shape

    if np.any(M.sum(0) == n_samples):
        raise ValueError('Some features are masked in all samples')

    if type(norm) == str:
        norm = norm.upper()

    if norm not in (None, 'none', 'L1', 'L2'):
        raise ValueError('unrecognized norm: %s' % norm)

    notM = (~M)

    X_recons = X.copy()
    X_recons[M] = 0

    # as an initial guess, we'll fill-in masked regions with the mean
    # of the rest of the sample
    if norm is None:
        mu = (X_recons * notM).sum(0) / notM.sum(0)
        mu = mu * np.ones([n_samples, 1])
        X_recons[M] = mu[M]
    else:
        # since we're normalizing each spectrum, and the norm depends on
        # the filled-in values, we need to iterate a few times to make
        # sure things are consistent.
        for i in range(n_iter):
            # normalize
            if norm == 'L1':
                X_recons /= np.sum(X_recons, 1)[:, None]
            else:
                X_recons /= np.sqrt(np.sum(X_recons ** 2, 1))[:, None]

            # find the mean
            mu = (X_recons * notM).sum(0) / notM.sum(0)
            mu = mu * np.ones([n_samples, 1])
            X_recons[M] = mu[M]

    # Matrix of coefficients
    coeffs = np.zeros((n_samples, n_ev))

    # Now we iterate through, using the principal components to reconstruct
    #  these regions.
    for i in range(n_iter):
        sys.stdout.write(' PCA iteration %i / %i\r' % (i + 1, n_iter))
        sys.stdout.flush()

        # normalize the data
        if norm == 'L1':
            X_recons /= np.sum(X_recons, 1)[:, None]
        else:
            X_recons /= np.sqrt(np.sum(X_recons ** 2, 1))[:, None]

        # now compute the principal components
        mu = X_recons.mean(0)
        X_centered = X_recons - mu
        U, S, VT = np.linalg.svd(X_centered, full_matrices=False)

        # perform a least-squares fit to estimate the coefficients of the
        # first n_ev eigenvectors for each data point.
        # The eigenvectors are in the rows of the matrix VT.
        # The coefficients are given by
        #  a_n = [V_n^T W V_n]^(-1) V_n W x
        # Such that x can be reconstructed via
        #  x_n = V_n a_n
        # Variables here are:
        #  x   : vector length n_features. This is a data point to be
        #        reconstructed
        #  a_n : vector of length n.  These are the reconstruction weights
        #  V_n : eigenvector matrix of size (n_features, n).
        #  W   : diagonal weight matrix of size (n_features, n_features)
        #        such that W[i,i] = M[i]
        #  x_n : vector of length n_features which approximates x
        VWx = np.dot(VT[:n_ev], (notM * X_centered).T)
        for i in range(n_samples):
            VWV = np.dot(VT[:n_ev], (notM[i] * VT[:n_ev]).T)
            coeffs[i] = solve(VWV, VWx[:, i], sym_pos=True, overwrite_a=True)

        X_fill = mu + np.dot(coeffs, VT[:n_ev])
        X_recons[M] = X_fill[M]
    sys.stdout.write('\n')

    # un-normalize X_recons
    norms = np.zeros(n_samples)
    for i in range(n_samples):
        ratio_i = X[i][notM[i]] / X_recons[i][notM[i]]
        norms[i] = ratio_i[~np.isnan(ratio_i)][0]
        X_recons[i] *= norms[i]

    if full_output:
        return X_recons, mu, VT, S, norms, coeffs
    else:
        return X_recons