/usr/share/pyshared/cogent/parse/dialign.py is in python-cogent 1.5.1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | #!/usr/bin/env python
import re
from cogent import ASCII
__author__ = "Gavin Huttley"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Gavin Huttley"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Gavin Huttley"
__email__ = "gavin.huttley@anu.edu.au"
__status__ = "Production"
_header = re.compile("^\s+[=]+")
_quality_scores = re.compile("^ +\d+[\s\d]*$")
def align_block_lines(lines):
counter = 0
for line in lines:
if "Alignment (DIALIGN format):" in line:
counter += 1
continue
elif counter == 1 and _header.findall(line):
counter += 2
continue
elif not counter or not line:
continue
elif "Sequence tree:" in line:
break
yield line
def parse_data_line(line):
if _quality_scores.findall(line):
line = line.split()
name = None
seq = "".join(line)
elif line[0].isspace():
name, seq = None, None
else:
line = line.split()
name = line[0]
seq = "".join(line[2:])
return name, seq
def DialignParser(lines, seq_maker=None, get_scores=False):
"""Yields label, sequence pairs.
The alignment quality info is recorded in the sequence
case and the score line. Font info can be handled by
providing a custom seq_maker function. The quality
scores are returned as the last value pair with
name 'QualityScores' when get_scores is True."""
if seq_maker is None:
seq_maker = ASCII.Sequence
seqs = {}
quality_scores = []
for line in align_block_lines(lines):
name, seq = parse_data_line(line)
if seq is None:
continue
elif name is None and seq:
quality_scores.append(seq)
continue
if name in seqs:
seqs[name].append(seq)
else:
seqs[name] = [seq]
# concat sequence blocks
for name, seq_segs in seqs.items():
seq = "".join(seq_segs)
yield name, seq_maker(seq, Name=name)
if get_scores:
yield "QualityScores", "".join(quality_scores)
|