/usr/share/pyshared/cogent/evolve/simulate.py is in python-cogent 1.5.1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | #!/usr/bin/env python
"""Random sequences and random evolution of sequences in a tree"""
import numpy
import bisect
__author__ = "Peter Maxwell"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Peter Maxwell"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Peter Maxwell"
__email__ = "pm67nz@gmail.com"
__status__ = "Production"
def argpicks(freqs, random_series):
partition = numpy.add.accumulate(freqs)
assert abs(partition[-1]-1.0) < 1e-6, (freqs, partition)
while True:
x = random_series.uniform(0.0,1.0)
i = bisect.bisect_left(partition, x)
yield i
def argpick(freqs, random_series):
return argpicks(freqs, random_series).next()
def _randomMotifGenerator(random_series, motif_probs):
motifs = motif_probs.keys()
freqs = [motif_probs[m] for m in motifs]
for i in argpicks(freqs, random_series):
yield motifs[i]
def evolveSequence(random_series, motifs, parent_seq, site_cats,
psubs, preserved_sites=()):
"""Evolve a new sequence derived from parent_seq. Uses psubs[site_cats[i]]
to pick a new motif derived from parent_seq[i]"""
seq = []
randomMotifSources = {}
for (i, parent_motif) in enumerate(parent_seq):
if i in preserved_sites:
edge_motif = preserved_sites[i]
else:
if parent_motif not in randomMotifSources:
mprobs = {}
parent_motif_index = motifs.index(parent_motif)
site_cat = site_cats[i]
psub = psubs[site_cat]
for (dest_motif_index, dest_motif) in enumerate(motifs):
prob = psub[parent_motif_index, dest_motif_index]
mprobs[dest_motif] = prob
randomMotifSources[site_cat, parent_motif] = \
_randomMotifGenerator(random_series, mprobs)
edge_motif = randomMotifSources[site_cat, parent_motif].next()
seq.append(edge_motif)
return seq
def randomSequence(random_series, motif_probs, sequence_length):
getRootRandomMotif = _randomMotifGenerator(random_series, motif_probs).next
return [getRootRandomMotif() for i in range(sequence_length)]
class AlignmentEvolver(object):
# Encapsulates settings that are constant throughout the recursive generation
# of a synthetic alignment.
def __init__(self, random_series, orig_ambig, exclude_internal,
bin_names, site_bins, psub_for, motifs):
self.random_series = random_series
self.orig_ambig = orig_ambig
self.exclude_internal = exclude_internal
self.bin_names = bin_names
self.site_bins = site_bins
self.psub_for = psub_for
self.motifs = motifs
def __call__(self, tree, root_sequence):
#probsd = dict(enumerate(self.bin_probs))
#bprobs = _randomMotifGenerator(self.random_series, probsd)
#site_bins = [bprobs.next() for c in range(len(root_sequence))]
return self.generateSimulatedSeqs(tree, root_sequence)
def generateSimulatedSeqs(self, parent, parent_seq):
"""recursively generate the descendant sequences by descending the tree
from root.
Each child will be set by mutating the parent motif based on the probs
in the psub matrix of this edge.
random_series - get a random numer 0-1 by calling random_series.random()
length - the desired alignment length
parent - the edge structure.
parent_seq - the corresponding sequence. This will be mutated for each
of its children, based on their psub matricies.
"""
# This depends on parameter names 'mprobs', 'alignment2', 'bprobs' and
# 'psubs'. Might be better to integrate it into likelihood_calculation.
if self.exclude_internal and parent.Children:
simulated_sequences = {}
else:
simulated_sequences = {parent.Name : ''.join(parent_seq)}
for edge in parent.Children:
# The result for this edge - a list of motifs
# Keep original ambiguity codes
if edge.Name in self.orig_ambig:
orig_seq_ambig = self.orig_ambig[edge.Name]
else:
orig_seq_ambig = {}
# Matrix of substitution probabilities
psubs = [self.psub_for(edge.Name, bin) for bin in self.bin_names]
# Make the semi-random sequence for this edge.
edge_seq = evolveSequence(self.random_series, self.motifs,
parent_seq, self.site_bins, psubs, orig_seq_ambig)
# Pass this new edge sequence on down the tree
descendant_sequences = self.generateSimulatedSeqs(
edge, edge_seq)
simulated_sequences.update(descendant_sequences)
return simulated_sequences
|