This file is indexed.

/usr/share/pyshared/statsmodels/graphics/functional.py is in python-statsmodels 0.4.2-1.2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
"""Module for functional boxplots."""

from statsmodels.compatnp.iter_compat import combinations

import numpy as np
from scipy import stats
from scipy.misc import factorial

from . import utils


__all__ = ['fboxplot', 'rainbowplot', 'banddepth']


def fboxplot(data, xdata=None, labels=None, depth=None, method='MBD',
             wfactor=1.5, ax=None, plot_opts={}):
    """Plot functional boxplot.

    A functional boxplot is the analog of a boxplot for functional data.
    Functional data is any type of data that varies over a continuum, i.e.
    curves, probabillity distributions, seasonal data, etc.

    The data is first ordered, the order statistic used here is `banddepth`.
    Plotted are then the median curve, the envelope of the 50% central region,
    the maximum non-outlying envelope and the outlier curves.

    Parameters
    ----------
    data : sequence of ndarrays or 2-D ndarray
        The vectors of functions to create a functional boxplot from.  If a
        sequence of 1-D arrays, these should all be the same size.
        The first axis is the function index, the second axis the one along
        which the function is defined.  So ``data[0, :]`` is the first
        functional curve.
    xdata : ndarray, optional
        The independent variable for the data.  If not given, it is assumed to
        be an array of integers 0..N with N the length of the vectors in
        `data`.
    labels : sequence of scalar or str, optional
        The labels or identifiers of the curves in `data`.  If given, outliers
        are labeled in the plot.
    depth : ndarray, optional
        A 1-D array of band depths for `data`, or equivalent order statistic.
        If not given, it will be calculated through `banddepth`.
    method : {'MBD', 'BD2'}, optional
        The method to use to calculate the band depth.  Default is 'MBD'.
    wfactor : float, optional
        Factor by which the central 50% region is multiplied to find the outer
        region (analog of "whiskers" of a classical boxplot).
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    plot_opts : dict, optional
        A dictionary with plotting options.  Any of the following can be
        provided, if not present in `plot_opts` the defaults will be used::

          - 'cmap_outliers', a Matplotlib LinearSegmentedColormap instance.
          - 'c_inner', valid MPL color. Color of the central 50% region
          - 'c_outer', valid MPL color. Color of the non-outlying region
          - 'c_median', valid MPL color. Color of the median.
          - 'lw_outliers', scalar.  Linewidth for drawing outlier curves.
          - 'lw_median', scalar.  Linewidth for drawing the median curve.
          - 'draw_nonout', bool.  If True, also draw non-outlying curves.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.
    depth : ndarray
        1-D array containing the calculated band depths of the curves.
    ix_depth : ndarray
        1-D array of indices needed to order curves (or `depth`) from most to
        least central curve.
    ix_outliers : ndarray
        1-D array of indices of outlying curves in `data`.

    See Also
    --------
    banddepth, rainbowplot

    Notes
    -----
    The median curve is the curve with the highest band depth.

    Outliers are defined as curves that fall outside the band created by
    multiplying the central region by `wfactor`.  Note that the range over
    which they fall outside this band doesn't matter, a single data point
    outside the band is enough.  If the data is noisy, smoothing may therefore
    be required.

    The non-outlying region is defined as the band made up of all the
    non-outlying curves.

    References
    ----------
    [1] Y. Sun and M.G. Genton, "Functional Boxplots", Journal of Computational
        and Graphical Statistics, vol. 20, pp. 1-19, 2011.
    [2] R.J. Hyndman and H.L. Shang, "Rainbow Plots, Bagplots, and Boxplots for
        Functional Data", vol. 19, pp. 29-25, 2010.

    Examples
    --------
    Load the El Nino dataset.  Consists of 60 years worth of Pacific Ocean sea
    surface temperature data.

    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.api as sm
    >>> data = sm.datasets.elnino.load()

    Create a functional boxplot.  We see that the years 1982-83 and 1997-98 are
    outliers; these are the years where El Nino (a climate pattern
    characterized by warming up of the sea surface and higher air pressures)
    occurred with unusual intensity.

    >>> fig = plt.figure()
    >>> ax = fig.add_subplot(111)
    >>> res = sm.graphics.fboxplot(data.raw_data[:, 1:], wfactor=2.58,
    ...                            labels=data.raw_data[:, 0].astype(int),
    ...                            ax=ax)

    >>> ax.set_xlabel("Month of the year")
    >>> ax.set_ylabel("Sea surface temperature (C)")
    >>> ax.set_xticks(np.arange(13, step=3) - 1)
    >>> ax.set_xticklabels(["", "Mar", "Jun", "Sep", "Dec"])
    >>> ax.set_xlim([-0.2, 11.2])

    >>> plt.show()

    .. plot:: plots/graphics_functional_fboxplot.py

    """
    fig, ax = utils.create_mpl_ax(ax)

    if plot_opts.get('cmap_outliers') is None:
        from matplotlib.cm import rainbow_r
        plot_opts['cmap_outliers'] = rainbow_r

    data = np.asarray(data)
    if xdata is None:
        xdata = np.arange(data.shape[1])

    # Calculate band depth if required.
    if depth is None:
        if method not in ['MBD', 'BD2']:
            raise ValueError("Unknown value for parameter `method`.")

        depth = banddepth(data, method=method)
    else:
        if depth.size != data.shape[0]:
            raise ValueError("Provided `depth` array is not of correct size.")

    # Inner area is 25%-75% region of band-depth ordered curves.
    ix_depth = np.argsort(depth)[::-1]
    median_curve = data[ix_depth[0], :]
    ix_IQR = data.shape[0] // 2
    lower = data[ix_depth[0:ix_IQR], :].min(axis=0)
    upper = data[ix_depth[0:ix_IQR], :].max(axis=0)

    # Determine region for outlier detection
    inner_median = np.median(data[ix_depth[0:ix_IQR], :], axis=0)
    lower_fence = inner_median - (inner_median - lower) * wfactor
    upper_fence = inner_median + (upper - inner_median) * wfactor

    # Find outliers.
    ix_outliers = []
    ix_nonout = []
    for ii in range(data.shape[0]):
        if np.any(data[ii, :] > upper_fence) or np.any(data[ii, :] < lower_fence):
            ix_outliers.append(ii)
        else:
            ix_nonout.append(ii)

    ix_outliers = np.asarray(ix_outliers)

    # Plot envelope of all non-outlying data
    lower_nonout = data[ix_nonout, :].min(axis=0)
    upper_nonout = data[ix_nonout, :].max(axis=0)
    ax.fill_between(xdata, lower_nonout, upper_nonout,
                    color=plot_opts.get('c_outer', (0.75,0.75,0.75)))

    # Plot central 50% region
    ax.fill_between(xdata, lower, upper,
                    color=plot_opts.get('c_inner', (0.5,0.5,0.5)))

    # Plot median curve
    ax.plot(xdata, median_curve, color=plot_opts.get('c_median', 'k'),
            lw=plot_opts.get('lw_median', 2))

    # Plot outliers
    cmap = plot_opts.get('cmap_outliers')
    for ii, ix in enumerate(ix_outliers):
        label = str(labels[ix]) if labels is not None else None
        ax.plot(xdata, data[ix, :],
                color=cmap(float(ii) / (len(ix_outliers)-1)), label=label,
                lw=plot_opts.get('lw_outliers', 1))

    if plot_opts.get('draw_nonout', False):
        for ix in ix_nonout:
            ax.plot(xdata, data[ix, :], 'k-', lw=0.5)

    if labels is not None:
        ax.legend()

    return fig, depth, ix_depth, ix_outliers


def rainbowplot(data, xdata=None, depth=None, method='MBD', ax=None,
                 cmap=None):
    """Create a rainbow plot for a set of curves.

    A rainbow plot contains line plots of all curves in the dataset, colored in
    order of functional depth.  The median curve is shown in black.

    Parameters
    ----------
    data : sequence of ndarrays or 2-D ndarray
        The vectors of functions to create a functional boxplot from.  If a
        sequence of 1-D arrays, these should all be the same size.
        The first axis is the function index, the second axis the one along
        which the function is defined.  So ``data[0, :]`` is the first
        functional curve.
    xdata : ndarray, optional
        The independent variable for the data.  If not given, it is assumed to
        be an array of integers 0..N with N the length of the vectors in
        `data`.
    depth : ndarray, optional
        A 1-D array of band depths for `data`, or equivalent order statistic.
        If not given, it will be calculated through `banddepth`.
    method : {'MBD', 'BD2'}, optional
        The method to use to calculate the band depth.  Default is 'MBD'.
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    cmap : Matplotlib LinearSegmentedColormap instance, optional
        The colormap used to color curves with.  Default is a rainbow colormap,
        with red used for the most central and purple for the least central
        curves.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    See Also
    --------
    banddepth, fboxplot

    References
    ----------
    [1] R.J. Hyndman and H.L. Shang, "Rainbow Plots, Bagplots, and Boxplots for
        Functional Data", vol. 19, pp. 29-25, 2010.

    Examples
    --------
    Load the El Nino dataset.  Consists of 60 years worth of Pacific Ocean sea
    surface temperature data.

    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.api as sm
    >>> data = sm.datasets.elnino.load()

    Create a rainbow plot:

    >>> fig = plt.figure()
    >>> ax = fig.add_subplot(111)
    >>> res = sm.graphics.rainbowplot(data.raw_data[:, 1:], ax=ax)

    >>> ax.set_xlabel("Month of the year")
    >>> ax.set_ylabel("Sea surface temperature (C)")
    >>> ax.set_xticks(np.arange(13, step=3) - 1)
    >>> ax.set_xticklabels(["", "Mar", "Jun", "Sep", "Dec"])
    >>> ax.set_xlim([-0.2, 11.2])
    >>> plt.show()

    .. plot:: plots/graphics_functional_rainbowplot.py

    """
    fig, ax = utils.create_mpl_ax(ax)

    if cmap is None:
        from matplotlib.cm import rainbow_r
        cmap = rainbow_r

    data = np.asarray(data)
    if xdata is None:
        xdata = np.arange(data.shape[1])

    # Calculate band depth if required.
    if depth is None:
        if method not in ['MBD', 'BD2']:
            raise ValueError("Unknown value for parameter `method`.")

        depth = banddepth(data, method=method)
    else:
        if depth.size != data.shape[0]:
            raise ValueError("Provided `depth` array is not of correct size.")

    ix_depth = np.argsort(depth)[::-1]

    # Plot all curves, colored by depth
    num_curves = data.shape[0]
    for ii in range(num_curves):
        ax.plot(xdata, data[ix_depth[ii], :], c=cmap(ii / (num_curves - 1.)))

    # Plot the median curve
    median_curve = data[ix_depth[0], :]
    ax.plot(xdata, median_curve, 'k-', lw=2)

    return fig


def banddepth(data, method='MBD'):
    """Calculate the band depth for a set of functional curves.

    Band depth is an order statistic for functional data (see `fboxplot`), with
    a higher band depth indicating larger "centrality".  In analog to scalar
    data, the functional curve with highest band depth is called the median
    curve, and the band made up from the first N/2 of N curves is the 50%
    central region.

    Parameters
    ----------
    data : ndarray
        The vectors of functions to create a functional boxplot from.
        The first axis is the function index, the second axis the one along
        which the function is defined.  So ``data[0, :]`` is the first
        functional curve.
    method : {'MBD', 'BD2'}, optional
        Whether to use the original band depth (with J=2) of [1]_ or the
        modified band depth.  See Notes for details.

    Returns
    -------
    depth : ndarray
        Depth values for functional curves.

    Notes
    -----
    Functional band depth as an order statistic for functional data was
    proposed in [1]_ and applied to functional boxplots and bagplots in [2]_.

    The method 'BD2' checks for each curve whether it lies completely inside
    bands constructed from two curves.  All permutations of two curves in the
    set of curves are used, and the band depth is normalized to one.  Due to
    the complete curve having to fall within the band, this method yields a lot
    of ties.

    The method 'MBD' is similar to 'BD2', but checks the fraction of the curve
    falling within the bands.  It therefore generates very few ties.

    References
    ----------
    .. [1] S. Lopez-Pintado and J. Romo, "On the Concept of Depth for
           Functional Data", Journal of the American Statistical Association,
           vol.  104, pp. 718-734, 2009.
    .. [2] Y. Sun and M.G. Genton, "Functional Boxplots", Journal of
           Computational and Graphical Statistics, vol. 20, pp. 1-19, 2011.

    """
    def _band2(x1, x2, curve):
        xb = np.vstack([x1, x2])
        if np.any(curve < xb.min(axis=0)) or np.any(curve > xb.max(axis=0)):
            res = 0
        else:
            res = 1

        return res

    def _band_mod(x1, x2, curve):
        xb = np.vstack([x1, x2])
        res = np.logical_and(curve >= xb.min(axis=0),
                             curve <= xb.max(axis=0))
        return np.sum(res) / float(res.size)

    if method == 'BD2':
        band = _band2
    elif method == 'MBD':
        band = _band_mod
    else:
        raise ValueError("Unknown input value for parameter `method`.")

    num = data.shape[0]
    ix = np.arange(num)
    depth = []
    for ii in range(num):
        res = 0
        for ix1, ix2 in combinations(ix, 2):
            res += band(data[ix1, :], data[ix2, :], data[ii, :])

        # Normalize by number of combinations to get band depth
        normfactor = factorial(num) / 2. / factorial(num - 2)
        depth.append(float(res) / normfactor)

    return np.asarray(depth)