This file is indexed.

/usr/share/pyshared/cogent/evolve/bootstrap.py is in python-cogent 1.5.1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python
"""
Provides services for parametric bootstrapping. These include
the ability to estimate probabilities or estimate confidence intervals.

The estimation of probabilities is done by the EstimateProbability class.
Functions that provide ParameterController objects for the 'null' and
'alternative' cases are provided to the constructor. Numerous aspects of the
bootstrapping can be controlled such as the choice of numerical optimiser, and
the number of samples from which to estimate the probability. This class
can be run in serial or in parallel (at the level of each random sample).

An observed Likelihood Ratio (LR) statistic is estimated using the provided
'observed' data. Random data sets are simulated under the null model and the
LR estimated from these. The probability of the observed LR is taken as the
number of sample LR's that were >= to the observed.

Confidence interval estimation can be done using the EstimateConfidenceIntervals
class. Multiple statistics associated with an analysis can be evaluated
simultaneously. Similar setup, and parallelisation options as provided by
the EstimateProbability class.

"""
from __future__ import with_statement, division
from cogent.util import parallel
from cogent.util import progress_display as UI

import random

__author__ = "Gavin Huttley, Andrew Butterfield and Peter Maxwell"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Gavin Huttley","Andrew Butterfield", "Matthew Wakefield",
                    "Edward Lang", "Peter Maxwell"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Gavin Huttley"
__email__ = "gavin.huttley@anu.edu.au"
__status__ = "Production"

class ParametricBootstrapCore(object):
    """Core parametric bootstrap services."""
    
    def __init__(self):
        """Constructor for core parametric bootstrap services class."""
        self._numreplicates = 10
        self.seed = None
        self.results = []
    
    def setNumReplicates(self, num):
        self._numreplicates = num
    
    def setSeed(self, seed):
        self.seed = seed
    
    @UI.display_wrap
    def run(self, ui, **opt_args):
        # Sets self.observed and self.results (a list _numreplicates long) to
        # whatever is returned from self.simplify([LF result from each PC]).
        # self.simplify() is used as the entire LF result might not be picklable
        # for MPI. Subclass must provide self.alignment and
        # self.parameter_controllers
        if 'random_series' not in opt_args and not opt_args.get('local', None):
            opt_args['random_series'] = random.Random()
        
        null_pc = self.parameter_controllers[0]
        pcs = len(self.parameter_controllers)
        if pcs == 1:
            model_label = ['']
        elif pcs == 2:
            model_label = ['null', 'alt ']
        else:
            model_label = ['null'] + ['alt%s'%i for i in range(1,pcs)]
        
        @UI.display_wrap
        def each_model(alignment, ui):
            def one_model(pc):
                pc.setAlignment(alignment)
                return pc.optimise(return_calculator=True, **opt_args)
            # This is not done in parallel because we depend on the side-
            # effect of changing the parameter_controller current values 
            memos = ui.eager_map(one_model, self.parameter_controllers, 
                    labels=model_label, pure=False)
            concise_result = self.simplify(*self.parameter_controllers)
            return (memos, concise_result)
        
        #optimisations = pcs * (self._numreplicates + 1)
        init_work = pcs / (self._numreplicates + pcs)
        ui.display('Original data', 0.0, init_work)
        (starting_points, self.observed) = each_model(self.alignment)
        
        ui.display('Randomness', init_work, 0.0)
        alignment_random_state = random.Random(self.seed).getstate()
        if self.seed is None:
            comm  = parallel.getCommunicator()
            alignment_random_state = comm.bcast(alignment_random_state, 0)
        
        def one_replicate(i):
            for (pc, start_point) in zip(self.parameter_controllers, starting_points):
                # may have fewer CPUs per replicate than for original
                pc.setupParallelContext()
                # using a calculator as a memo object to reset the params
                pc.updateFromCalculator(start_point)
            aln_rnd = random.Random(0)
            aln_rnd.setstate(alignment_random_state)
            aln_rnd.jumpahead(i*10**9)
            simalign = null_pc.simulateAlignment(random_series=aln_rnd)
            (dummy, result) = each_model(simalign)
            return result
        
        ui.display('Bootstrap', init_work)
        self.results = ui.eager_map(
                one_replicate, range(self._numreplicates), noun='replicate',
                start=init_work)


class EstimateProbability(ParametricBootstrapCore):
    # 2 parameter controllers, LR
    
    def __init__(self, null_parameter_controller, alt_parameter_controller, alignment):
        ParametricBootstrapCore.__init__(self)
        self.alignment = alignment
        self.null_parameter_controller = null_parameter_controller
        self.alt_parameter_controller = alt_parameter_controller
        self.parameter_controllers =  [self.null_parameter_controller, self.alt_parameter_controller]
    
    def simplify(self, null_result, alt_result):
        return (null_result.getLogLikelihood(), alt_result.getLogLikelihood())
    
    def getObservedlnL(self):
        return self.observed
    
    def getSamplelnL(self):
        return self.results
    
    def getSampleLRList(self):
        LR = [2 * (alt_lnL - null_lnL) for (null_lnL, alt_lnL) in self.results]
        LR.sort()
        LR.reverse()
        return LR
    
    def getObservedLR(self):
        return 2 * (self.observed[1] - self.observed[0])
    
    def getEstimatedProb(self):
        """Return the estimated probability.
        
        Calculated as the number of sample LR's >= observed LR
        divided by the number of replicates.
        """
        
        observed_LR = self.getObservedLR()
        sample_LRs = self.getSampleLRList()
        
        for (count, value) in enumerate(sample_LRs):
            if value <= observed_LR:
                return float(count) / len(sample_LRs)
        return 1.0
    

class EstimateConfidenceIntervals(ParametricBootstrapCore):
    """Estimate confidence interval(s) for one or many statistics
    by parametric bootstrapping."""
    
    def __init__(self, parameter_controller, func_calcstats, alignment):
        # func_calcstats takes a param dict and returns the statistic of interest
        ParametricBootstrapCore.__init__(self)
        self.alignment = alignment
        self.parameter_controller = parameter_controller
        self.parameter_controllers =  [parameter_controller]
        self.func_calcstats = func_calcstats
    
    def simplify(self, result):
        return (result.getLogLikelihood(), self.func_calcstats(result))
    
    def getObservedStats(self):
        return self.observed[1]
    
    def getSampleStats(self):
        return [s for (lnL, s) in self.results]
    
    def getSamplelnL(self):
        return [lnL for (lnL, s) in self.results]
    
    def getObservedlnL(self):
        return self.observed[0]