/usr/lib/python3/dist-packages/pymummer/alignment.py is in python3-pymummer 0.7.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | import pyfastaq
from pymummer import variant
class Error (Exception): pass
class Alignment:
def __init__(self, line):
'''Constructs Alignment object from a line of show-coords -dTlro'''
# nucmer:
# [S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [LEN R] [LEN Q] [FRM] [TAGS]
#1162 25768 24536 4 24607 24533 99.32 640851 24536 1 -1 ref qry [CONTAINS]
# promer:
#[S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [% SIM] [% STP] [LEN R] [LEN Q] [FRM] [TAGS]
# 1 1398 4891054 4892445 1398 1392 89.55 93.18 0.21 1398 5349013 1 1 ref qry [CONTAINED]
fields = line.rstrip().split('\t')
try:
self.ref_start = int(fields[0]) - 1
self.ref_end = int(fields[1]) - 1
self.qry_start = int(fields[2]) - 1
self.qry_end = int(fields[3]) - 1
self.hit_length_ref = int(fields[4])
self.hit_length_qry = int(fields[5])
self.percent_identity = float(fields[6])
if len(fields) >= 15: # promer has more fields
self.ref_length = int(fields[9])
self.qry_length = int(fields[10])
self.frame = int(fields[11])
self.ref_name = fields[13]
self.qry_name = fields[14]
else:
self.ref_length = int(fields[7])
self.qry_length = int(fields[8])
self.frame = int(fields[9])
self.ref_name = fields[11]
self.qry_name = fields[12]
except:
raise Error('Error reading this nucmer line:\n' + line)
def __eq__(self, other):
return type(other) is type(self) and self.__dict__ == other.__dict__
def __hash__(self):
return hash((self.ref_start, self.ref_end, self.qry_start, self.qry_end, self.hit_length_ref, self.hit_length_qry, self.percent_identity, self.ref_length, self.qry_length, self.frame, self.ref_name, self.qry_name))
def _swap(self):
'''Swaps the alignment so that the reference becomes the query and vice-versa. Swaps their names, coordinates etc. The frame is not changed'''
self.ref_start, self.qry_start = self.qry_start, self.ref_start
self.ref_end, self.qry_end = self.qry_end, self.ref_end
self.hit_length_ref, self.hit_length_qry = self.hit_length_qry, self.hit_length_ref
self.ref_length, self.qry_length = self.qry_length, self.ref_length
self.ref_name, self.qry_name = self.qry_name, self.ref_name
def qry_coords(self):
'''Returns a pyfastaq.intervals.Interval object of the start and end coordinates in the query sequence'''
return pyfastaq.intervals.Interval(min(self.qry_start, self.qry_end), max(self.qry_start, self.qry_end))
def ref_coords(self):
'''Returns a pyfastaq.intervals.Interval object of the start and end coordinates in the reference sequence'''
return pyfastaq.intervals.Interval(min(self.ref_start, self.ref_end), max(self.ref_start, self.ref_end))
def on_same_strand(self):
'''Returns true iff the direction of the alignment is the same in the reference and the query'''
return (self.ref_start < self.ref_end) == (self.qry_start < self.qry_end)
def is_self_hit(self):
'''Returns true iff the alignment is of a sequence to itself: names and all coordinates are the same and 100 percent identity'''
return self.ref_name == self.qry_name \
and self.ref_start == self.qry_start \
and self.ref_end == self.qry_end \
and self.percent_identity == 100
def reverse_query(self):
'''Changes the coordinates as if the query sequence has been reverse complemented'''
self.qry_start = self.qry_length - self.qry_start - 1
self.qry_end = self.qry_length - self.qry_end - 1
def reverse_reference(self):
'''Changes the coordinates as if the reference sequence has been reverse complemented'''
self.ref_start = self.ref_length - self.ref_start - 1
self.ref_end = self.ref_length - self.ref_end - 1
def __str__(self):
'''Returns a tab delimited string containing the values of this alignment object'''
return '\t'.join(str(x) for x in
[self.ref_start + 1,
self.ref_end + 1,
self.qry_start + 1,
self.qry_end + 1,
self.hit_length_ref,
self.hit_length_qry,
'{0:.2f}'.format(self.percent_identity),
self.ref_length,
self.qry_length,
self.frame,
self.ref_name,
self.qry_name])
def to_msp_crunch(self):
'''Returns the alignment as a line in MSPcrunch format. The columns are space-separated and are:
1. score
2. percent identity
3. match start in the query sequence
4. match end in the query sequence
5. query sequence name
6. subject sequence start
7. subject sequence end
8. subject sequence name'''
# we don't know the alignment score. Estimate it. This approximates 1 for a match.
aln_score = int(self.percent_identity * 0.005 * (self.hit_length_ref + self.hit_length_qry))
return ' '.join(str(x) for x in [
aln_score,
'{0:.2f}'.format(self.percent_identity),
self.qry_start + 1,
self.qry_end + 1,
self.qry_name,
self.ref_start + 1,
self.ref_end + 1,
self.ref_name
])
def qry_coords_from_ref_coord(self, ref_coord, variant_list):
'''Given a reference position and a list of variants ([variant.Variant]),
works out the position in the query sequence, accounting for indels.
Returns a tuple: (position, True|False), where second element is whether
or not the ref_coord lies in an indel. If it is, then
returns the corresponding start position
of the indel in the query'''
if self.ref_coords().distance_to_point(ref_coord) > 0:
raise Error('Cannot get query coord in qry_coords_from_ref_coord because given ref_coord ' + str(ref_coord) + ' does not lie in nucmer alignment:\n' + str(self))
indel_variant_indexes = []
for i in range(len(variant_list)):
if variant_list[i].var_type not in {variant.INS, variant.DEL}:
continue
if variant_list[i].ref_start <= ref_coord <= variant_list[i].ref_end:
return variant_list[i].qry_start, True
elif variant_list[i].ref_start < ref_coord:
indel_variant_indexes.append(i)
distance = ref_coord - min(self.ref_start, self.ref_end)
for i in indel_variant_indexes:
if variant_list[i].var_type == variant.INS:
distance += len(variant_list[i].qry_base)
else:
assert variant_list[i].var_type == variant.DEL
distance -= len(variant_list[i].ref_base)
if self.on_same_strand():
return min(self.qry_start, self.qry_end) + distance, False
else:
return max(self.qry_start, self.qry_end) - distance, False
|