This file is indexed.

/usr/share/pyshared/MACS2/IO/BinKeeper.py is in macs 2.0.9.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# Time-stamp: <2011-03-14 17:52:00 Tao Liu>

"""Module Description: BinKeeper for Wiggle-like tracks.

Copyright (c) 2008 Tao Liu <taoliu@jimmy.harvard.edu>

This code is free software; you can redistribute it and/or modify it
under the terms of the BSD License (see the file COPYING included with
the distribution).

@status:  experimental
@version: $Revision$
@author:  Tao Liu
@contact: taoliu@jimmy.harvard.edu
"""

# ------------------------------------
# python modules
# ------------------------------------

import sys
import re
from bisect import insort,bisect_left,bisect_right,insort_right
from array import array
# ------------------------------------
# constants
# ------------------------------------
# to determine the byte size
if array('H',[1]).itemsize == 2:
    BYTE2 = 'H'
else:
    raise Exception("BYTE2 type cannot be determined!")

if array('I',[1]).itemsize == 4:
    BYTE4 = 'I'
elif array('L',[1]).itemsize == 4:
    BYTE4 = 'L'
else:
    raise Exception("BYTE4 type cannot be determined!")

if array('f',[1]).itemsize == 4:
    FBYTE4 = 'f'
elif array('d',[1]).itemsize == 4:
    FBYTE4 = 'd'
else:
    raise Exception("BYTE4 type cannot be determined!")

# ------------------------------------
# Misc functions
# ------------------------------------

# ------------------------------------
# Classes
# ------------------------------------
class BinKeeperI:
    """BinKeeper keeps point data from a chromosome in a bin list.

    Example:
    >>> from taolib.CoreLib.Parser import WiggleIO
    >>> w = WiggleIO('sample.wig')
    >>> bk = w.build_binKeeper()
    >>> bk['chrI'].pp2v(1000,2000) # to extract values in chrI:1000..2000
    """
    def __init__ (self,binsize=8000,chromosomesize=1e9):
        """Initializer.

        Parameters:
        binsize : size of bin in Basepair
        chromosomesize : size of chromosome, default is 1G
        """
        self.binsize = binsize
        self.binnumber = int(chromosomesize/self.binsize)+1
        self.cage = []
        a = self.cage.append
        for i in xrange(self.binnumber):
            a([array(BYTE4,[]),array(FBYTE4,[])])

    def add ( self, p, value ):
        """Add a position into BinKeeper.

        Note: position must be sorted before adding. Otherwise, pp2v
        and pp2p will not work.
        """
        bin = p/self.binsize
        self.cage[bin][0].append(p)
        self.cage[bin][1].append(value)        

    def p2bin (self, p ):
        """Return the bin index for a position.
        
        """
        return p/self.binsize

    def p2cage (self, p):
        """Return the bin containing the position.
        
        """
        return self.cage[p/self.binsize]

    def __pp2cages (self, p1, p2):
        assert p1<=p2
        bin1 = self.p2bin(p1)
        bin2 = self.p2bin(p2)+1
        t = [array(BYTE4,[]),array(FBYTE4,[])]
        for i in xrange(bin1,bin2):
            t[0].extend(self.cage[i][0])
            t[1].extend(self.cage[i][1])            
        return t

    def pp2p (self, p1, p2):
        """Give the position list between two given positions.

        Parameters:
        p1 : start position
        p2 : end position
        Return Value:
        list of positions between p1 and p2.
        """
        (ps,vs) = self.__pp2cages(p1,p2)
        p1_in_cages = bisect_left(ps,p1)
        p2_in_cages = bisect_right(ps,p2)
        return ps[p1_in_cages:p2_in_cages]

    def pp2v (self, p1, p2):
        """Give the value list between two given positions.

        Parameters:
        p1 : start position
        p2 : end position
        Return Value:
        list of values whose positions are between p1 and p2.
        """
        (ps,vs) = self.__pp2cages(p1,p2)
        p1_in_cages = bisect_left(ps,p1)
        p2_in_cages = bisect_right(ps,p2)
        return vs[p1_in_cages:p2_in_cages]


    def pp2pv (self, p1, p2):
        """Give the (position,value) list between two given positions.

        Parameters:
        p1 : start position
        p2 : end position
        Return Value:
        list of (position,value) between p1 and p2.
        """
        (ps,vs) = self.__pp2cages(p1,p2)
        p1_in_cages = bisect_left(ps,p1)
        p2_in_cages = bisect_right(ps,p2)
        return zip(ps[p1_in_cages:p2_in_cages],vs[p1_in_cages:p2_in_cages])


class BinKeeperII:
    """BinKeeperII keeps non-overlapping interval data from a chromosome in a bin list.

    This is especially designed for bedGraph type data.

    """
    def __init__ (self,binsize=8000,chromosomesize=1e9):
        """Initializer.

        Parameters:
        binsize : size of bin in Basepair
        chromosomesize : size of chromosome, default is 1G
        """
        self.binsize = binsize
        self.binnumber = int(chromosomesize/self.binsize)+1
        self.cage = []
        a = self.cage.append
        for i in xrange(self.binnumber):
            a([array(BYTE4,[]),array(BYTE4,[]),array(FBYTE4,[])])

    def add ( self, startp, endp, value ):
        """Add an interval data into BinKeeper.

        Note: position must be sorted before adding. Otherwise, pp2v
        and pp2p will not work.
        """
        startbin = startp/self.binsize
        endbin = endp/self.binsize
        if startbin == endbin:
            # some intervals may only be within a bin
            j = bisect.bisect_left(self.cage[startbin][0],startp)
            self.cage[startbin][0].insert(j,startp)
            self.cage[startbin][1].insert(j,endp)
            self.cage[startbin][2].insert(j,value)
        else:
            # some intervals may cover the end of bins
            # first bin
            j = bisect.bisect_left(self.cage[startbin][0],startp)
            self.cage[startbin][0].insert(j,startp)
            self.cage[startbin][1].insert(j,(startbin+1)*self.binsize)
            self.cage[startbin][2].insert(j,value)
            # other bins fully covered
            for i in xrange(startbin+1,endbin):
                p = i*self.binsize
                j = bisect.bisect_left(self.cage[startbin][0],p)
                self.cage[startbin][0].insert(j,p)
                self.cage[startbin][1].insert(j,(i+1)*self.binsize)
                self.cage[startbin][2].insert(j,value)

                insort_right(self.cage[i][0],i*self.binsize)
                insort_right(self.cage[i][1],(i+1)*self.binsize)
                insort_right(self.cage[i][2],value)
            # last bin -- the start of this bin should be covered
            insort_right(self.cage[endbin][0],endbin*self.binsize)
            insort_right(self.cage[endbin][1],endp)
            insort_right(self.cage[endbin][2],value)

    def p2bin (self, p ):
        """Given a position, return the bin index for a position.
        
        """
        return p/self.binsize

    def p2cage (self, p):
        """Given a position, return the bin containing the position.
        
        """
        return self.cage[p/self.binsize]

    def pp2cages (self, p1, p2):
        """Given an interval, return the bins containing this interval.
        
        """
        assert p1<=p2
        bin1 = self.p2bin(p1)
        bin2 = self.p2bin(p2)
        t = [array(BYTE4,[]),array(BYTE4,[]),array(FBYTE4,[])]
        for i in xrange(bin1,bin2+1):
            t[0].extend(self.cage[i][0])
            t[1].extend(self.cage[i][1])
            t[2].extend(self.cage[i][2])                        
        return t

    def pp2intervals (self, p1, p2):
        """Given an interval, return the intervals list between two given positions.

        Parameters:
        p1 : start position
        p2 : end position
        Return Value:
        A list of intervals start and end positions (tuple) between p1 and p2.

        * Remember, I assume all intervals saved in this BinKeeperII
          are not overlapping, so if there is some overlap, this
          function will not work as expected.
        """
        (startposs,endposs,vs) = self.pp2cages(p1,p2)
        p1_in_cages = bisect_left(startposs,p1)
        p2_in_cages = bisect_right(endposs,p2)
        output_startpos_list = startposs[p1_in_cages:p2_in_cages]
        output_endpos_list = endposs[p1_in_cages:p2_in_cages]

        # check if the bin (p1_in_cages-1) covers p1
        if p1 < endposs[p1_in_cages-1]:
            # add this interval
            output_startpos_list = array(BYTE4,[p1,])+output_startpos_list
            output_endpos_list = array(BYTE4,[endposs[p1_in_cages-1],])+output_endpos_list

        # check if the bin (p2_in_cages+1) covers p2
        if p2 > startposs[p2_in_cages+1]:
            # add this interval
            output_startpos_list = array(BYTE4,[startposs[p2_in_cages+1],])+output_startpos_list
            output_endpos_list = array(BYTE4,[p2,])+output_endpos_list

        return zip(output_startpos_list,output_endpos_list)

    def pp2pvs (self, p1, p2):
        """Given an interval, return the values list between two given positions.

        Parameters:
        p1 : start position
        p2 : end position
        Return Value:

        A list of start, end positions, values (tuple) between p1 and
        p2. Each value represents the value in an interval. Remember
        the interval length and positions are lost in the output.

        * Remember, I assume all intervals saved in this BinKeeperII
          are not overlapping, so if there is some overlap, this
          function will not work as expected.
        """

        (startposs,endposs,vs) = self.pp2cages(p1,p2)
        p1_in_cages = bisect_left(startposs,p1)
        p2_in_cages = bisect_right(endposs,p2)
        output_startpos_list = startposs[p1_in_cages:p2_in_cages]
        output_endpos_list = endposs[p1_in_cages:p2_in_cages]
        output_value_list = vs[p1_in_cages:p2_in_cages]

        # print p1_in_cages,p2_in_cages
        # print vs
        print output_startpos_list
        print output_endpos_list
        print output_value_list

        # check if the bin (p1_in_cages-1) covers p1
        
        if p1_in_cages-1 >= 0 and p1 < self.cage[p1_in_cages-1][1]:
            # add this interval
            output_startpos_list = array(BYTE4,[p1,])+output_startpos_list
            output_endpos_list = array(BYTE4,[self.cage[p1_in_cages-1][1],])+output_endpos_list
            output_value_list = array(BYTE4,[self.cage[p1_in_cages-1][2],])+output_value_list
                

        # check if the bin (p2_in_cages+1) covers p2
        #print p2_in_cages+1,len(self.cage)
        #print p2, self.cage[p2_in_cages+1][0]
        if p2_in_cages+1 < len(self.cage) and p2 > self.cage[p2_in_cages+1][0]:
            # add this interval
            output_startpos_list = output_startpos_list+array(BYTE4,[self.cage[p2_in_cages+1][0],])
            output_endpos_list = output_endpos_list+array(BYTE4,[p2,])
            output_value_list = output_value_list+array(BYTE4,[self.cage[p2_in_cages+1][2],])

        print output_startpos_list
        print output_endpos_list
        print output_value_list

        return zip(output_startpos_list,output_endpos_list,output_value_list)