/usr/bin/contig_to_chr_coords is in tophat 2.1.1+dfsg1-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | #!/usr/bin/env python
# encoding: utf-8
"""
contig_to_chr_coords.py
Created by Cole Trapnell on 2008-09-05.
Copyright (c) 2008 Cole Trapnell. All rights reserved.
"""
import sys
import getopt
help_message = '''
Takes the NCBI seq_contig file and maps contig coords to whole chromosome
coords in a GTF, GFF, or BED file
contig_to_chr_coords.py <format_flag> <seq_contig.md> <junctions.bed|transcripts.gff|transcripts.gtf>
'''
class Usage(Exception):
def __init__(self, msg):
self.msg = msg
def main(argv=None):
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], "ho:vbg", ["help", "output=", "bed", "gff"])
except getopt.error, msg:
raise Usage(msg)
arg_is_splice = False
arg_is_gff = False
# option processing
for option, value in opts:
if option == "-v":
verbose = True
if option in ("-h", "--help"):
raise Usage(help_message)
if option in ("-o", "--output"):
output = value
if option in ("-b", "--bed"):
arg_is_splice = True
if option in ("-g", "--gff"):
arg_is_gff = True
if (arg_is_splice == False and arg_is_gff == False) or (arg_is_splice == True and arg_is_gff == True):
print >> sys.stderr, "Error: please specify either -b or -g, but not both"
raise Usage(help_message)
if len(args) < 1:
raise Usage(help_message)
contig_to_chr_file = open(args[0])
contigs = {}
for line in contig_to_chr_file.readlines():
if line[0] == "#":
continue
line = line.strip()
cols = line.split('\t')
if len(cols) < 9:
continue
chromosome = cols[1]
group = cols[8]
feature_name = cols[5]
if not feature_name in ["start", "end"]:
contigs[feature_name] = (chromosome, int(cols[2]))
#print feature_name, chromosome, int(cols[2])
if arg_is_gff:
gff_file = open(args[1])
lines = gff_file.readlines()
print lines[0],
for line in lines[1:]:
line = line.strip()
cols = line.split('\t')
if len(cols) < 8:
continue
contig = cols[0]
chr_fields = contig.split('|')
refseq_id = chr_fields[3]
contig = contigs.get(refseq_id)
chr_name = contig[0]
pipe_idx = chr_name.find('|')
if pipe_idx != -1:
chr_name = chr_name[:pipe_idx]
if contig != None:
#print line
left_pos = contig[1] + int(cols[3])
right_pos = contig[1] + int(cols[4])
print "chr%s\tTopHat\tisland\t%d\t%d\t%s\t.\t.\t%s" % (chr_name, left_pos, right_pos, cols[5],cols[8])
#print >>sys.stderr, "%s\t%d\t%d\t%s\t%s\t%s\t%s" % (contig[0], left_pos, right_pos,cols[3],cols[6],cols[0],cols[1])
if arg_is_splice:
splice_file = open(args[1])
lines = splice_file.readlines()
print lines[0],
for line in lines[1:]:
line = line.strip()
cols = line.split('\t')
contig = cols[0]
chr_fields = contig.split('|')
refseq_id = chr_fields[3]
contig = contigs.get(refseq_id)
chr_name = contig[0]
pipe_idx = chr_name.find('|')
if pipe_idx != -1:
chr_name = chr_name[:pipe_idx]
if contig != None:
#print line
left_pos = contig[1] + int(cols[1])
right_pos = contig[1] + int(cols[2])
print "chr%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t255,0,0\t2\t1,1\t%s" % (chr_name, left_pos, right_pos, cols[3],cols[5],left_pos, right_pos,cols[11])
#print >>sys.stderr, "%s\t%d\t%d\t%s\t%s\t%s\t%s" % (contig[0], left_pos, right_pos,cols[3],cols[6],cols[0],cols[1])
except Usage, err:
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
print >> sys.stderr, "\t for help use --help"
return 2
if __name__ == "__main__":
sys.exit(main())
|