/usr/share/pyshared/Bio/Wise/dnal.py is in python-biopython 1.58-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #!/usr/bin/env python
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
# Bio.Wise contains modules for running and processing the output of
# some of the models in the Wise2 package by Ewan Birney available from:
# ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/
# http://www.ebi.ac.uk/Wise2/
#
# Bio.Wise.psw is for protein Smith-Waterman alignments
# Bio.Wise.dnal is for Smith-Waterman DNA alignments
__version__ = "$Revision: 1.12 $"
import commands
import itertools
import os
import re
from Bio import Wise
_SCORE_MATCH = 4
_SCORE_MISMATCH = -1
_SCORE_GAP_START = -5
_SCORE_GAP_EXTENSION = -1
_CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"]
def _build_dnal_cmdline(match, mismatch, gap, extension):
res = _CMDLINE_DNAL[:]
res.extend(["-match", str(match)])
res.extend(["-mis", str(mismatch)])
res.extend(["-gap", str(-gap)]) # negative: convert score to penalty
res.extend(["-ext", str(-extension)]) # negative: convert score to penalty
return res
_CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
def _fgrep_count(pattern, file):
return int(commands.getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
_re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
def _alb_line2coords(line):
return tuple([int(coord)+1 # one-based -> zero-based
for coord
in _re_alb_line2coords.match(line).groups()])
def _get_coords(filename):
alb = file(filename)
start_line = None
end_line = None
for line in alb:
if line.startswith("["):
if not start_line:
start_line = line # rstrip not needed
else:
end_line = line
if end_line is None: # sequence is too short
return [(0, 0), (0, 0)]
return zip(*map(_alb_line2coords, [start_line, end_line])) # returns [(start0, end0), (start1, end1)]
def _any(seq, pred=bool):
"Returns True if pred(x) is True at least one element in the iterable"
return True in itertools.imap(pred, seq)
class Statistics(object):
"""
Calculate statistics from an ALB report
"""
def __init__(self, filename, match, mismatch, gap, extension):
self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename)
self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename)
self.gaps = _fgrep_count('"INSERT" %s' % gap, filename)
if gap == extension:
self.extensions = 0
else:
self.extensions = _fgrep_count('"INSERT" %s' % extension, filename)
self.score = (match*self.matches +
mismatch*self.mismatches +
gap*self.gaps +
extension*self.extensions)
if _any([self.matches, self.mismatches, self.gaps, self.extensions]):
self.coords = _get_coords(filename)
else:
self.coords = [(0, 0), (0,0)]
def identity_fraction(self):
return self.matches/(self.matches+self.mismatches)
header = "identity_fraction\tmatches\tmismatches\tgaps\textensions"
def __str__(self):
return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
cmdline = _build_dnal_cmdline(match, mismatch, gap, extension)
temp_file = Wise.align(cmdline, pair, **keywds)
try:
return Statistics(temp_file.name, match, mismatch, gap, extension)
except AttributeError:
try:
keywds['dry_run']
return None
except KeyError:
raise
def main():
import sys
stats = align(sys.argv[1:3])
print "\n".join(["%s: %s" % (attr, getattr(stats, attr))
for attr in
("matches", "mismatches", "gaps", "extensions")])
print "identity_fraction: %s" % stats.identity_fraction()
print "coords: %s" % stats.coords
def _test(*args, **keywds):
import doctest, sys
doctest.testmod(sys.modules[__name__], *args, **keywds)
if __name__ == "__main__":
if __debug__:
_test()
main()
|