/usr/share/pyshared/cogent/parse/nexus.py is in python-cogent 1.5.1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | #!/usr/bin/env python
"""
parses Nexus formatted tree files and Branchlength info in log files
"""
import re
from string import strip
from cogent.parse.record import RecordError
__author__ = "Catherine Lozupone"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Catherine Lozuopone", "Rob Knight", "Micah Hamady"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Catherine Lozupone"
__email__ = "lozupone@colorado.edu"
__status__ = "Production"
def parse_nexus_tree(tree_f):
"""returns a dict mapping taxa # to name from the translation table,
and a dict mapping tree name to dnd string;
takes a handle for a Nexus formatted file as input"""
trans_table = None
tree_info = get_tree_info(tree_f)
check_tree_info(tree_info)
header_s, trans_table_s, dnd_s = split_tree_info(tree_info)
if trans_table_s:
trans_table = parse_trans_table(trans_table_s)
dnd = parse_dnd(dnd_s)
return trans_table, dnd
def get_tree_info(tree_f):
"""returns the trees section of a Nexus file:
takes a handle for a Nexus formatted file as input:
returns the section describing trees as a list of strings"""
in_tree = False
result = []
for line in tree_f:
#get lines from the 'Begin trees;' tag to the 'End;' tag
line_lower = line.lower()
if line_lower.startswith('begin trees;'):
in_tree = True
if in_tree:
if line_lower.startswith('end;') or \
line_lower.startswith('endblock;'):
return result
else:
result.append(line)
def check_tree_info(tree_info):
"""makes sure that there is a tree section in the file"""
if tree_info:
pass
else:
raise RecordError, "not a valid Nexus Tree File"
def split_tree_info(tree_info):
"""Returns header, table, and dnd info from tree section of Nexus file.:
Expects to receive the output of get_tree_info"""
header = []
trans_table = []
dnd = []
state = "in_header"
for line in tree_info:
line_lower = line.lower()
if state == "in_header":
header.append(line)
if line_lower.strip() == 'translate':
state = "in_trans"
elif line_lower.startswith('tree'):
state = "in_dnd"
dnd.append(line)
elif state == "in_trans":
trans_table.append(line)
if line.strip() == ';':
state = "in_dnd"
elif state == "in_dnd":
dnd.append(line)
return header, trans_table, dnd
def parse_trans_table(trans_table):
"""returns a dict with the taxa names indexed by number"""
result = {}
for line in trans_table:
line = line.strip()
if line == ';':
pass
else:
label, name = line.split(None, 1)
#take comma out of name if it is there
if name.endswith(','):
name = name[0:-1]
# remove single quotes
if name.startswith("'") and name.endswith("'"):
name = name[1:-1]
result[label] = name
return result
def parse_dnd(dnd):#get rooted info
"""returns a dict with dnd indexed by name"""
dnd_dict = {}
for line in dnd:
line = line.strip()
name, dnd_s = map(strip, line.split('=', 1))
#get dnd from dnd_s and populate
dnd_index = dnd_s.find('(')
data = dnd_s[dnd_index:]
dnd_dict[name] = data
return dnd_dict
def get_BL_table(branch_lengths):
"""returns the section of the log file with the BL table
as a list of strings"""
in_table = 0
result = []
beg_tag = re.compile('\s+Node\s+to node\s+length')
end_tag = re.compile('Sum')
for line in branch_lengths:
if end_tag.match(line):
in_table = 0
if beg_tag.match(line):
in_table = 1
if in_table == 1:
if line.startswith("---") or beg_tag.match(line) or line.strip()== '':
pass
else:
result.append(line)
return result
def find_fields(line, field_order = ["taxa", "parent", "bl"], \
field_delims = [0, 21, 36, 49]):
"""takes line from BL table and returns dict with field names mapped to info
field order is the order of field names to extract from the file and
field_delims is a list of index numbers indicating where the field is split
"""
field_dict = {}
for i, f in enumerate(field_order):
start = field_delims[i]
try:
end = field_delims[i + 1]
except IndexError:
end = None
field_dict[f] = line[start:end].strip()
return field_dict
def parse_taxa(taxa_field):
"""gets taxa # from taxa field extracted with find_fields"""
#look for lines with a number in parentheses
term_match = re.search(r'\(\d+\)', taxa_field)
if not term_match:
data = taxa_field
else:
term = term_match.group(0)
data_match = re.search(r'\d+', term)
data = data_match.group(0)
return data
def parse_PAUP_log(branch_lengths):
"""gets branch length info from a PAUP log file
returns a dictionary mapping the taxon number to the parent number
and the branch length"""
BL_table = get_BL_table(branch_lengths)
BL_dict = {}
for line in BL_table:
info = find_fields(line)
parent = info["parent"]
bl = float(info["bl"])
taxa = parse_taxa(info["taxa"])
BL_dict[taxa] = (parent, bl)
return BL_dict
|