/usr/share/pyshared/cogent/parse/gcg.py is in python-cogent 1.5.3-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | #!/usr/bin/env python
__author__ = "Matthew Wakefield"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Matthew Wakefield"
__email__ = "wakefield@wehi.edu.au"
__status__ = "Production"
import warnings
def MsfParser(f):
"""Read sequences from a msf format file"""
alignmentdict = {}
#parse optional header
#parse optional text information
#file header and sequence header are seperated by a line ending in '..'
line = f.readline().strip()
for line in f:
line = line.strip()
if line.endswith('..'):
break
#parse sequence info
seqinfo = {}
for line in f:
line = line.strip()
if line.startswith('//'):
break
line = line.split()
if line and line[0] == 'Name:':
seqinfo[line[1]] = int(line[3])
#parse sequences
sequences = {}
for line in f:
line = line.strip().split()
if line and line[0] in sequences:
sequences[line[0]] += ''.join(line[1:])
elif line and line[0] in seqinfo:
sequences[line[0]] = ''.join(line[1:])
#consistency check
if len(sequences) != len(seqinfo):
warnings.warn("Number of loaded seqs[%s] not same as "\
"expected[%s]." % (len(sequences), len(seqinfo)))
for name in sequences:
if len(sequences[name]) != seqinfo[name]:
warnings.warn("Length of loaded seqs [%s] is [%s] not "\
"[%s] as expected." % (name,len(sequences[name]),seqinfo[name]))
#yield sequences
for name in sequences:
yield (name, sequences[name])
|