This file is indexed.

/usr/share/pyshared/cogent/evolve/simulate.py is in python-cogent 1.5.1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
"""Random sequences and random evolution of sequences in a tree"""

import numpy
import bisect

__author__ = "Peter Maxwell"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Peter Maxwell"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Peter Maxwell"
__email__ = "pm67nz@gmail.com"
__status__ = "Production"

def argpicks(freqs, random_series):
    partition = numpy.add.accumulate(freqs)
    assert abs(partition[-1]-1.0) < 1e-6, (freqs, partition)
    while True:
        x = random_series.uniform(0.0,1.0)
        i = bisect.bisect_left(partition, x)
        yield i

def argpick(freqs, random_series):
    return argpicks(freqs, random_series).next()

def _randomMotifGenerator(random_series, motif_probs):
    motifs = motif_probs.keys()
    freqs = [motif_probs[m] for m in motifs]
    for i in argpicks(freqs, random_series):
        yield motifs[i]

def evolveSequence(random_series, motifs, parent_seq, site_cats,
        psubs, preserved_sites=()):
    """Evolve a new sequence derived from parent_seq.  Uses psubs[site_cats[i]]
    to pick a new motif derived from parent_seq[i]"""
    seq = []
    randomMotifSources = {}
    for (i, parent_motif) in enumerate(parent_seq):
        if i in preserved_sites:
            edge_motif = preserved_sites[i]
        else:
            if parent_motif not in randomMotifSources:
                mprobs = {}
                parent_motif_index = motifs.index(parent_motif)
                site_cat = site_cats[i]
                psub = psubs[site_cat]
                for (dest_motif_index, dest_motif) in enumerate(motifs):
                    prob = psub[parent_motif_index, dest_motif_index]
                    mprobs[dest_motif] = prob
                randomMotifSources[site_cat, parent_motif] = \
                        _randomMotifGenerator(random_series, mprobs)
            edge_motif = randomMotifSources[site_cat, parent_motif].next()
        seq.append(edge_motif)
    return seq

def randomSequence(random_series, motif_probs, sequence_length):
    getRootRandomMotif = _randomMotifGenerator(random_series, motif_probs).next
    return [getRootRandomMotif() for i in range(sequence_length)]

class AlignmentEvolver(object):
    # Encapsulates settings that are constant throughout the recursive generation
    # of a synthetic alignment.
    def __init__(self, random_series, orig_ambig, exclude_internal,
            bin_names, site_bins, psub_for, motifs):
        self.random_series = random_series
        self.orig_ambig = orig_ambig
        self.exclude_internal = exclude_internal
        self.bin_names = bin_names
        self.site_bins = site_bins
        self.psub_for = psub_for
        self.motifs = motifs
    
    def __call__(self, tree, root_sequence):
        #probsd = dict(enumerate(self.bin_probs))
        #bprobs = _randomMotifGenerator(self.random_series, probsd)
        #site_bins = [bprobs.next() for c in range(len(root_sequence))]
        return self.generateSimulatedSeqs(tree, root_sequence)
    
    def generateSimulatedSeqs(self, parent, parent_seq):
        """recursively generate the descendant sequences by descending the tree
        from root.
        Each child will be set by mutating the parent motif based on the probs
        in the psub matrix of this edge.
        
        random_series - get a random numer 0-1 by calling random_series.random()
        length - the desired alignment length
        parent - the edge structure.
        parent_seq - the corresponding sequence. This will be mutated for each
        of its children, based on their psub matricies.
        """
        
        # This depends on parameter names 'mprobs', 'alignment2', 'bprobs' and
        # 'psubs'.  Might be better to integrate it into likelihood_calculation.
        
        if self.exclude_internal and parent.Children:
            simulated_sequences = {}
        else:
            simulated_sequences = {parent.Name : ''.join(parent_seq)}
        
        for edge in parent.Children:
            # The result for this edge - a list of motifs
            
            # Keep original ambiguity codes
            if edge.Name in self.orig_ambig:
                orig_seq_ambig = self.orig_ambig[edge.Name]
            else:
                orig_seq_ambig = {}
            
            # Matrix of substitution probabilities
            psubs = [self.psub_for(edge.Name, bin) for bin in self.bin_names]
            
            # Make the semi-random sequence for this edge.
            edge_seq = evolveSequence(self.random_series, self.motifs,
                    parent_seq, self.site_bins, psubs, orig_seq_ambig)
            
            # Pass this new edge sequence on down the tree
            descendant_sequences = self.generateSimulatedSeqs(
                    edge, edge_seq)
            simulated_sequences.update(descendant_sequences)
        
        return simulated_sequences