/usr/bin/randsample is in macs 2.0.9.1-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | #! /usr/bin/python
# Time-stamp: <2011-11-02 14:01:10 Tao Liu>
"""Description: Random sample certain number/percentage of tags.
Copyright (c) 2011 Tao Liu <taoliu@jimmy.harvard.edu>
This code is free software; you can redistribute it and/or modify it
under the terms of the Artistic License (see the file COPYING included
with the distribution).
@status: release candidate
@version: $Id$
@author: Yong Zhang, Tao Liu
@contact: taoliu@jimmy.harvard.edu
"""
# ------------------------------------
# python modules
# ------------------------------------
import os
import sys
import logging
from subprocess import Popen,PIPE
from optparse import OptionParser
import gzip
# ------------------------------------
# own python modules
# ------------------------------------
from MACS2.OptValidator import opt_validate_randsample as opt_validate
from MACS2.cProb import binomial_cdf_inv
from MACS2.Constants import *
# ------------------------------------
# Main function
# ------------------------------------
def main():
"""The Main function/pipeline for duplication filter.
"""
# Parse options...
options = opt_validate(prepare_optparser())
# end of parsing commandline options
info = options.info
warn = options.warn
debug = options.debug
error = options.error
#0 check output file
if options.outputfile:
assert not os.path.exists(options.outputfile), "%s already exists, please check!" % options.outputfile
outfhd = open(options.outputfile,"w")
else:
outfhd = sys.stdout
#1 Read tag files
info("read tag files...")
fwtrack = load_tag_files_options (options)
info("tag size = %d" % options.tsize)
fwtrack.fw = options.tsize
t0 = fwtrack.total
info(" total tags in alignment file: %d" % (t0))
if options.number:
if options.number > t0:
error(" Number you want is bigger than total number of tags in alignment file! Please specify a smaller number and try again!")
error(" %.2e > %.2e" % (options.number, t0))
sys.exit(1)
info(" Number of tags you want to keep: %.2e" % (options.number))
options.percentage = float(options.number)/t0*100
info(" Percentage of tags you want to keep: %.2f%%" % (options.percentage))
fwtrack.sample_percent(options.percentage/100.0)
info(" tags after random sampling in alignment file: %d" % (fwtrack.total))
info("Write to BED file")
fwtrack.print_to_bed(fhd=outfhd)
info("finished! Check %s." % options.outputfile)
def prepare_optparser ():
"""Prepare optparser object. New options will be added in this
function first.
"""
usage = "usage: %prog <-t file> [-p percent|-n number] [-o outputfile] [options]"
description = "%prog -- Random sample certain number/percentage of tags. Method: 1. Calculate the percentage of tags needed to be kept; 2. For each chromosome, random sample certain percentage of tags."
optparser = OptionParser(version="%prog "+RANDSAMPLE_VERSION,description=description,usage=usage,add_help_option=False)
optparser.add_option("-h","--help",action="help",help="show this help message and exit.")
optparser.add_option("-t",dest="tfile",type="string",
help="Sequencing alignment file. REQUIRED.")
optparser.add_option("-p","--percentage",dest="percentage",type="float",
help="Percentage of tags you want to keep. Input 80.0 for 80%. This option can't be used at the same time with -n/--num. REQUIRED")
optparser.add_option("-n","--number",dest="number",type="float",
help="Number of tags you want to keep. Input 8000000 or 8e+6 for 8 million. This option can't be used at the same time with -p/--percent. Note that the number of tags in output is approximate as the number specified here. REQUIRED")
optparser.add_option("-o",dest="outputfile",type="string",
help="Output BED file name. If not specified, will write to standard output. DEFAULT: stdout",
default=None)
optparser.add_option("-s","--tsize",dest="tsize",type="int",default=None,
help="Tag size. This will overide the auto detected tag size. DEFAULT: Not set")
optparser.add_option("-f","--format",dest="format",type="string",
help="Format of tag file, \"AUTO\", \"BED\" or \"ELAND\" or \"ELANDMULTI\" or \"ELANDEXPORT\" or \"SAM\" or \"BAM\" or \"BOWTIE\". The default AUTO option will let %prog decide which format the file is. Please check the definition in 00README file if you choose ELAND/ELANDMULTI/ELANDEXPORT/SAM/BAM/BOWTIE. DEFAULT: \"AUTO\"",
default="AUTO")
optparser.add_option("--verbose",dest="verbose",type="int",default=2,
help="Set verbose level. 0: only show critical message, 1: show additional warning message, 2: show process information, 3: show debug messages. If you want to know where are the duplicate reads, use 3. DEFAULT:2")
return optparser
def load_tag_files_options ( options ):
"""From the options, load alignment tags.
"""
options.info("read alignment tags...")
tp = options.parser(open2(options.tfile))
if not options.tsize: # override tsize if user specified --tsize
ttsize = tp.tsize()
options.tsize = ttsize
treat = tp.build_fwtrack()
treat.sort()
options.info("tag size is determined as %d bps" % options.tsize)
return treat
def open2(path, mode='r', bufsize=-1):
# try gzip first
f = gzip.open(path, mode)
try:
f.read(10)
except IOError:
# not a gzipped file
f.close()
f = open(path, mode, bufsize)
else:
f.seek(0)
return f
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
sys.stderr.write("User interrupt me! ;-) Bye!\n")
sys.exit(0)
except AssertionError as e:
sys.stderr.write(e.message+"\n")
sys.exit(0)
|