This file is indexed.

/usr/share/pyshared/mlpy/_ranking.py is in python-mlpy 2.2.0~dfsg1-2.1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
## This file is part of MLPY.
## Feature Ranking module based on Recursive Feature Elimination (RFE)
## and Reecursive Forward Selection (RFS) methods.

## This code is written by Davide Albanese, <albanese@fbk.eu>.
##(C) 2007 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

__all__ = ['Ranking']

from numpy import *
import math


def project(elem):
    """
    Return an array ranging on [0,1]
    """

    if not isinstance(elem, ndarray):
        raise TypeError('project() argument must be numpy ndarray')

    m = elem.min()
    M = elem.max()
    D = float(M - m)
    return (elem - m) / D


def Entropy(pj):
    E = 0.0
    for p in pj:
        if p != 0.0:
            E += -(p * math.log(p, 2))
    return E


def onestep(R):
    """
    One-step Recursive Feature Elimination.
    
    Return a list containing uninteresting features.
    See:
    I. Guyon, J. Weston, S.Barnhill, V. Vapnik.
    Gene selection for cancer classification using
    support vector machines.
    Machine Learning, (46):389-422, 2002.
    """

    if not isinstance(R, ndarray):
        raise TypeError('onestep() argument must be numpy ndarray')

    return R.argsort()[::-1]


def rfe(R):
    """
    Recursive Feature Elimination.
    
    Return a list containing uninteresting features.

    See:
    I. Guyon, J. Weston, S.Barnhill, V. Vapnik.
    Gene selection for cancer classification using
    support vector machines.
    Machine Learning, (46):389-422, 2002.
    """

    if not isinstance(R, ndarray):
        raise TypeError('rfe() argument must be numpy ndarray')
    
    return argmin(R)


def bisrfe(R):
    """
    Bis Recursive Feature Elimination.
    
    Return a list containing uninteresting features.

    See:
    I. Guyon, J. Weston, S.Barnhill, V. Vapnik.
    Gene selection for cancer classification using
    support vector machines.
    Machine Learning, (46):389-422, 2002.
    """

    if not isinstance(R, ndarray):
        raise TypeError('bisrfe() argument must be numpy ndarray')
    
    idx = R.argsort()[::-1]
    start = int(idx.shape[0] / 2)
    return idx[start:]


def sqrtrfe(R):
    """
    Sqrt Recursive Feature Elimination.
    
    Return a list containing uninteresting features.

    See:
    I. Guyon, J. Weston, S.Barnhill, V. Vapnik.
    Gene selection for cancer classification using
    support vector machines.
    Machine Learning, (46):389-422, 2002.
    """

    if not isinstance(R, ndarray):
        raise TypeError('sqrtrfe() argument must be numpy ndarray')
        
    idx = R.argsort()[::-1]
    start = int(idx.shape[0] - math.sqrt(idx.shape[0]))
    return idx[start:]


def erfe(R):
    """
    Entropy-based Recursive Feature Elimination.

    Return a list containing uninteresting features according
    to the entropy of the weights distribution.

    See:
    C. Furlanello, M. Serafini, S. Merler, and G. Jurman.
    Advances in Neural Network Research: IJCNN 2003.
    An accelerated procedure for recursive feature ranking
    on microarray data.
    Elsevier, 2003.
    """

    if not isinstance(R, ndarray):
        raise TypeError('erfe() argument must be numpy ndarray')
    
    bins = math.sqrt(R.shape[0])
    Ht = 0.5 * math.log(bins, 2)
    Mt = 0.2
    pw = project(R)          
    M = pw.mean()
    
    # Compute the relative frequancies
    pj = (histogram(pw, bins, range=(0.0, 1.0)))[0] / float(pw.size)

    # Compute entropy
    H = Entropy(pj)

    if H > Ht and M > Mt:
        # Return the indices s.t. pw = [0, 1/bins]
        idx = where(pw <= (1 / bins))[0]
        return idx    
    else:
        # Compute L[i] = ln(pw[i])
        L = empty_like(pw)
        for i in xrange(pw.size):
            L[i] = math.log(pw[i] + 1.0)
        M = L.mean()
        # Compute A = #{L[i] < M} and half A
        idx = where(L < M)[0]
        A = idx.shape[0]
        hA = 0.5 * A

        # If #(L[i]==0.0) >= hA return indicies where L==0.0
        iszero = where(L == 0.0)[0]
        if iszero.shape[0] >= hA:
            return iszero
        
        while True:
            M = 0.5 * M
            # Compute B = #{L[i] < M}
            idx = where(L < M)[0]
            B = idx.shape[0]
            # Stop iteration when B <= (0.5 * A)
            if (B <= hA):
                break

        return idx


def rfs(R):
    """
    Recursive Forward Selection.
    """

    if not isinstance(R, ndarray):
        raise TypeError('rfe() argument must be numpy ndarray')
    
    return argmax(R)



class Ranking:
    """
    Ranking class based on Recursive Feature Elimination (RFE) and
    Recursive Forward Selection (RFS) methods.

    Example:

    >>> from numpy import *
    >>> from mlpy import *
    >>> x = array([[1.1, 2.1, 3.1, -1.0],  # first sample
    ...            [1.2, 2.2, 3.2, 1.0],   # second sample
    ...            [1.3, 2.3, 3.3, -1.0]]) # third sample
    >>> y = array([1, -1, 1])              # classes
    >>> myrank = Ranking()                 # initialize ranking class
    >>> mysvm = Svm()                      # initialize svm class
    >>> myrank.compute(x, y, mysvm)        # compute feature ranking
    array([3, 1, 2, 0])
    """  

    RFE_METHODS   = ['rfe', 'bisrfe', 'sqrtrfe', 'erfe']
    RFS_METHODS   = ['rfs']
    OTHER_METHODS = ['onestep']


    def __init__(self, method='rfe', lastsinglesteps = 0):
        """
        Initialize Ranking class.

        Input

          * *method* - [string] method ('onestep', 'rfe', 'bisrfe', 'sqrtrfe', 'erfe', 'rfs')
          * *lastsinglesteps* - [integer] last single steps with 'rfe'
        """      

        if not method in self.RFE_METHODS + self.RFS_METHODS + self.OTHER_METHODS:
            raise ValueError("Method '%s' is not supported." % method)
                    
        self.__method = method       
        self.__lastsinglesteps = lastsinglesteps
        self.__weights = None
        

    def __compute_rfe(self, x, y, debug):       
        loc_x = x.copy()
        glo_idx = arange(x.shape[1], dtype = int)
        tot_disc = arange(0, dtype = int)

        while glo_idx.shape[0] > 1:
            R = self.__weights(loc_x, y)

            if self.__method == 'onestep':
                loc_disc = onestep(R)

            elif self.__method == 'rfe':
                loc_disc = rfe(R)

            elif self.__method == 'sqrtrfe':
                if loc_x.shape[1] > self.__lastsinglesteps: loc_disc = sqrtrfe(R)
                else: loc_disc = rfe(R)

            elif self.__method == 'bisrfe':
                if loc_x.shape[1] > self.__lastsinglesteps: loc_disc = bisrfe(R)
                else: loc_disc = rfe(R)

            elif self.__method == 'erfe':
                if loc_x.shape[1] > self.__lastsinglesteps: loc_disc = erfe(R)
                else: loc_disc = rfe(R)
                
            
            loc_x = delete(loc_x, loc_disc, 1) # remove local discarded from local x     
            glo_disc = glo_idx[loc_disc] # project local discarded into global discarded

            # remove discarded from global indicies
            glo_bool = ones(glo_idx.shape[0], dtype = bool) 
            glo_bool[loc_disc] = False
            glo_idx = glo_idx[glo_bool]
            
            if debug:
                print glo_idx.shape[0], "features remaining"
            
            tot_disc = r_[glo_disc, tot_disc]
            
        if glo_idx.shape[0] == 1:
            tot_disc = r_[glo_idx, tot_disc]
        
        return tot_disc


    def __compute_rfs(self, x, y, debug):
        loc_x = x.copy()
        glo_idx = arange(x.shape[1], dtype = int)
        tot_sel = arange(0, dtype = int)

        while glo_idx.shape[0] > 1:
            R = self.__weights(loc_x, y)
            
            if self.__method == 'rfs':
                loc_sel = rfs(R)
            
            loc_x = delete(loc_x, loc_sel, 1) # remove local selected from local x     
            glo_sel = glo_idx[loc_sel] # project local selected into global selected
        
            # remove selected from global indicies
            glo_bool = ones(glo_idx.shape[0], dtype = bool) 
            glo_bool[loc_sel] = False
            glo_idx = glo_idx[glo_bool]

            if debug:
                print glo_idx.shape[0], "features remaining"
            
            tot_sel = r_[tot_sel, glo_sel]

        if glo_idx.shape[0] == 1:
            tot_sel = r_[tot_sel, glo_idx]
        
        return tot_sel
        
        
    def compute(self, x, y, w, debug = False):
        """
        Compute the feature ranking.

        Input
        
          * *x*     - [2D numpy array float] (sample x feature) training data
          * *y*     - [1D numpy array integer] (1 or -1) classes
          * *w*     - object (e.g. classifier) with weights() method
          * *debug* - [bool] show remaining number of feature at each step (True or False)

        Output
        
          * *feature ranking* - [1D numpy array integer] ranked feature indexes
        """

        try:
            self.__weights = w.weights
        except AttributeError, e:
            raise ValueError(e)

        if self.__method in self.RFE_METHODS + self.OTHER_METHODS:
            return self.__compute_rfe(x, y, debug)
        elif self.__method in self.RFS_METHODS:
            return self.__compute_rfs(x, y, debug)