This file is indexed.

/usr/bin/randsample is in macs 2.0.9.1-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#! /usr/bin/python
# Time-stamp: <2011-11-02 14:01:10 Tao Liu>

"""Description: Random sample certain number/percentage of tags.

Copyright (c) 2011 Tao Liu <taoliu@jimmy.harvard.edu>

This code is free software; you can redistribute it and/or modify it
under the terms of the Artistic License (see the file COPYING included
with the distribution).

@status: release candidate
@version: $Id$
@author:  Yong Zhang, Tao Liu
@contact: taoliu@jimmy.harvard.edu
"""

# ------------------------------------
# python modules
# ------------------------------------

import os
import sys
import logging
from subprocess import Popen,PIPE
from optparse import OptionParser
import gzip

# ------------------------------------
# own python modules
# ------------------------------------
from MACS2.OptValidator import opt_validate_randsample as opt_validate
from MACS2.cProb import binomial_cdf_inv
from MACS2.Constants import *
# ------------------------------------
# Main function
# ------------------------------------
def main():
    """The Main function/pipeline for duplication filter.
    
    """
    # Parse options...
    options = opt_validate(prepare_optparser())
    # end of parsing commandline options
    info = options.info
    warn = options.warn
    debug = options.debug
    error = options.error
    #0 check output file
    if options.outputfile:
        assert not os.path.exists(options.outputfile), "%s already exists, please check!" % options.outputfile
        outfhd = open(options.outputfile,"w")
    else:
        outfhd = sys.stdout
    
    #1 Read tag files
    info("read tag files...")
    fwtrack = load_tag_files_options (options)
    
    info("tag size = %d" % options.tsize)
    fwtrack.fw = options.tsize

    t0 = fwtrack.total
    info(" total tags in alignment file: %d" % (t0))
    if options.number:
        if options.number > t0:
            error(" Number you want is bigger than total number of tags in alignment file! Please specify a smaller number and try again!")
            error(" %.2e > %.2e" % (options.number, t0))
            sys.exit(1)
        info(" Number of tags you want to keep: %.2e" % (options.number))
        options.percentage = float(options.number)/t0*100
    info(" Percentage of tags you want to keep: %.2f%%" % (options.percentage))
    
    fwtrack.sample_percent(options.percentage/100.0)

    info(" tags after random sampling in alignment file: %d" % (fwtrack.total))

    info("Write to BED file")
    fwtrack.print_to_bed(fhd=outfhd)
    info("finished! Check %s." % options.outputfile)

def prepare_optparser ():
    """Prepare optparser object. New options will be added in this
    function first.
    
    """
    usage = "usage: %prog <-t file> [-p percent|-n number] [-o outputfile] [options]"
    description = "%prog -- Random sample certain number/percentage of tags. Method: 1. Calculate the percentage of tags needed to be kept; 2. For each chromosome, random sample certain percentage of tags."

    optparser = OptionParser(version="%prog "+RANDSAMPLE_VERSION,description=description,usage=usage,add_help_option=False)
    optparser.add_option("-h","--help",action="help",help="show this help message and exit.")
    optparser.add_option("-t",dest="tfile",type="string",
                         help="Sequencing alignment file. REQUIRED.")
    optparser.add_option("-p","--percentage",dest="percentage",type="float",
                         help="Percentage of tags you want to keep. Input 80.0 for 80%. This option can't be used at the same time with -n/--num. REQUIRED")
    optparser.add_option("-n","--number",dest="number",type="float",
                         help="Number of tags you want to keep. Input 8000000 or 8e+6 for 8 million. This option can't be used at the same time with -p/--percent. Note that the number of tags in output is approximate as the number specified here. REQUIRED")
    optparser.add_option("-o",dest="outputfile",type="string",
                         help="Output BED file name. If not specified, will write to standard output. DEFAULT: stdout",
                         default=None)
    optparser.add_option("-s","--tsize",dest="tsize",type="int",default=None,
                         help="Tag size. This will overide the auto detected tag size. DEFAULT: Not set")
    optparser.add_option("-f","--format",dest="format",type="string",
                         help="Format of tag file, \"AUTO\", \"BED\" or \"ELAND\" or \"ELANDMULTI\" or \"ELANDEXPORT\" or \"SAM\" or \"BAM\" or \"BOWTIE\". The default AUTO option will let %prog decide which format the file is. Please check the definition in 00README file if you choose ELAND/ELANDMULTI/ELANDEXPORT/SAM/BAM/BOWTIE. DEFAULT: \"AUTO\"",
                         default="AUTO")
    optparser.add_option("--verbose",dest="verbose",type="int",default=2,
                         help="Set verbose level. 0: only show critical message, 1: show additional warning message, 2: show process information, 3: show debug messages. If you want to know where are the duplicate reads, use 3. DEFAULT:2")
    return optparser
   
def load_tag_files_options ( options ):
    """From the options, load alignment tags.

    """
    options.info("read alignment tags...")
    tp = options.parser(open2(options.tfile))

    if not options.tsize:           # override tsize if user specified --tsize
        ttsize = tp.tsize()
        options.tsize = ttsize

    treat = tp.build_fwtrack()
    treat.sort()

    options.info("tag size is determined as %d bps" % options.tsize)
    return treat

def open2(path, mode='r', bufsize=-1):
    # try gzip first
    f = gzip.open(path, mode)
    try:
        f.read(10)
    except IOError:
        # not a gzipped file
        f.close()
        f = open(path, mode, bufsize)
    else:
        f.seek(0)
    return f

if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        sys.stderr.write("User interrupt me! ;-) Bye!\n")
        sys.exit(0)
    except AssertionError as e:
        sys.stderr.write(e.message+"\n")
        sys.exit(0)