This file is indexed.

/usr/share/pyshared/cogent/app/gctmpca.py is in python-cogent 1.5.3-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python
# Author: Greg Caporaso (gregcaporaso@gmail.com)
# gctmpca.py

"""Application controller for the Generalized Continuous-Time Markov 
 Process Coevolutionary Algorithm (GCTMPCA). GCTMPCA is presented in:
 
 Detecting coevolution in and among protein domains.
 Yeang CH, Haussler D., PLoS Comput Biol. 2007 Nov;3(11):e211.
 
 Detecting the coevolution of biosequences--an example 
 of RNA interaction prediction. Yeang CH, Darot JF, Noller HF, 
 Haussler D. Mol Biol Evol. 2007 Sep;24(9):2119-31. 
 
This code requires the GCTMPCA package to be installed. As of Nov. 2008,
 that software is available at:
 http://www.sns.ias.edu/~chyeang/coevolution_download.zip

Note that the authors did not name their algorithm or software when they 
 published it. GCTMPCA was suggested as a name by the first author via e-mail.
"""

from __future__ import division
from cogent.app.util import CommandLineApplication, ResultPath,\
    ApplicationError
from cogent.app.parameters import FilePath
from cogent.evolve.models import DSO78_freqs, DSO78_matrix

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Beta"

# Are these values in PyCogent somewhere?
gctmpca_base_order = 'ACGU'
default_gctmpca_rna_priors = {'A':0.2528,'C':0.2372,'G':0.3099,'U':0.2001}
default_gctmpca_rna_sub_matrix = """-1.4150\t0.2372\t0.9777\t0.2001
0.2528\t-1.1940\t0.3099\t0.6313
0.7976\t0.2372\t-1.2349\t0.2001
0.2528\t0.7484\t0.3099\t-1.3111"""

gctmpca_aa_order = 'ARNDCQEGHILKMFPSTWYV'
# By default, the Gctmpca method used the Dayhoff 78 frequencies and rate matrix
default_gctmpca_aa_priors = DSO78_freqs
default_gctmpca_aa_sub_matrix = """-133.941451\t1.104408\t3.962336\t5.624640\t1.205064\t3.404695\t9.806940\t21.266880\t0.773214\t2.397590\t3.499637\t2.092532\t1.062216\t0.715896\t12.670000\t28.456993\t21.719082\t0.000000\t0.717984\t13.461344
2.352429\t-86.970372\t1.293824\t0.000000\t0.769902\t9.410730\t0.049530\t0.797508\t8.068320\t2.360704\t1.280355\t37.343648\t1.327770\t0.556808\t5.220040\t10.714858\t1.522092\t2.109294\t0.239328\t1.553232
8.538446\t1.308928\t-179.776579\t42.419160\t0.000000\t3.940265\t7.330440\t12.317068\t17.985630\t2.840222\t2.902138\t25.593276\t0.014753\t0.556808\t2.128560\t34.440615\t13.406118\t0.241362\t2.842020\t0.970770
10.455240\t0.000000\t36.590960\t-142.144945\t0.000000\t5.126170\t57.108090\t11.076500\t2.891148\t0.885264\t0.000000\t5.714222\t0.000000\t0.000000\t0.658840\t6.609815\t3.863772\t0.000000\t0.000000\t1.164924
3.136572\t0.940792\t0.000000\t0.000000\t-26.760991\t0.000000\t0.000000\t0.974732\t0.941304\t1.622984\t0.000000\t0.000000\t0.000000\t0.000000\t0.962920\t11.201897\t0.936672\t0.000000\t2.871936\t3.171182
7.754303\t10.062384\t4.164496\t6.280848\t0.000000\t-124.487960\t35.463480\t2.481136\t20.372508\t0.663948\t6.231061\t12.313746\t1.681842\t0.000000\t7.754040\t3.896312\t3.102726\t0.000000\t0.000000\t2.265130
17.251146\t0.040904\t5.983936\t54.043416\t0.000000\t27.390580\t-136.769106\t7.177572\t1.445574\t2.250046\t0.938927\t6.680006\t0.442590\t0.000000\t2.584680\t5.496583\t1.990428\t0.000000\t0.658152\t2.394566
20.910480\t0.368136\t5.620048\t5.859000\t0.368214\t1.071140\t4.011930\t-65.418192\t0.336180\t0.000000\t0.597499\t2.173014\t0.250801\t0.596580\t1.723120\t16.281018\t1.756260\t0.000000\t0.000000\t3.494772
2.003921\t9.816960\t21.631120\t4.030992\t0.937272\t23.182530\t2.129790\t0.886120\t-88.051504\t0.258202\t3.755708\t2.092532\t0.000000\t1.909056\t4.763920\t2.435195\t1.287924\t0.283338\t3.799332\t2.847592
5.663255\t2.617856\t3.113264\t1.124928\t1.472856\t0.688590\t3.021330\t0.000000\t0.235326\t-128.487912\t21.936749\t3.702172\t4.957008\t7.795312\t0.608160\t1.669848\t11.240064\t0.000000\t1.106892\t57.534302
3.572207\t0.613560\t1.374688\t0.000000\t0.000000\t2.792615\t0.544830\t0.620284\t1.479192\t9.479702\t-53.327266\t1.448676\t7.774831\t6.244204\t1.621760\t1.182809\t1.931886\t0.482724\t0.837648\t11.325650
2.265302\t18.979456\t12.857376\t3.327912\t0.000000\t5.853015\t4.110990\t2.392524\t0.874068\t1.696756\t1.536426\t-74.828436\t3.584979\t0.000000\t1.672440\t6.679392\t7.961712\t0.000000\t0.388908\t0.647180
6.273144\t3.681360\t0.040432\t0.000000\t0.000000\t4.361070\t1.485900\t1.506404\t0.000000\t12.393696\t44.983139\t19.557126\t-125.902241\t3.659024\t0.861560\t4.313774\t6.088368\t0.000000\t0.000000\t16.697244
1.568286\t0.572656\t0.566048\t0.000000\t0.000000\t0.000000\t0.000000\t1.329180\t1.613664\t7.229656\t13.401049\t0.000000\t1.357276\t-54.612411\t0.557480\t3.200542\t0.761046\t0.797544\t20.881368\t0.776616
21.781750\t4.213112\t1.698144\t0.609336\t0.636006\t5.853015\t2.526030\t3.012808\t3.160092\t0.442632\t2.731424\t2.655906\t0.250801\t0.437492\t-74.727653\t17.046365\t4.566276\t0.000000\t0.000000\t3.106464
35.634943\t6.299216\t20.013840\t4.452840\t5.389314\t2.142280\t3.912870\t20.735208\t1.176630\t0.885264\t1.451069\t7.726272\t0.914686\t1.829512\t12.416600\t-160.924378\t32.198100\t0.787050\t1.017144\t1.941540
32.324117\t1.063504\t9.258928\t3.093552\t0.535584\t2.027515\t1.684020\t2.658360\t0.739596\t7.082112\t2.816781\t10.945552\t1.534312\t0.517036\t3.953040\t38.267350\t-129.918557\t0.000000\t1.256472\t10.160726
0.000000\t8.221704\t0.929936\t0.000000\t0.000000\t0.000000\t0.000000\t0.000000\t0.907686\t0.000000\t3.926422\t0.000000\t0.000000\t3.022672\t0.000000\t5.218275\t0.000000\t-24.051571\t1.824876\t0.000000
2.091048\t0.327232\t3.841040\t0.000000\t3.213504\t0.000000\t1.089660\t0.000000\t4.269486\t1.364782\t2.389996\t1.046266\t0.000000\t27.760856\t0.000000\t2.365618\t2.458764\t0.640134\t-54.670490\t1.812104
18.122416\t0.981696\t0.606480\t0.843696\t1.640226\t1.338925\t1.832610\t4.785048\t1.479192\t32.791654\t14.937475\t0.804820\t3.806274\t0.477264\t2.432640\t2.087310\t9.191094\t0.000000\t0.837648\t-98.996468"""

class Gctmpca(CommandLineApplication):
    """ App controller for the GCTMPCA algorithm for detecting sequence coevolution
    
        The Generalized Continuous-Time Markov Process Coevolutionary
         Algorithm (GCTMPCA) is presented in:
         
         Detecting coevolution in and among protein domains.
         Yeang CH, Haussler D., PLoS Comput Biol. 2007 Nov;3(11):e211.
         
         Detecting the coevolution of biosequences--an example 
         of RNA interaction prediction. Yeang CH, Darot JF, Noller HF, 
         Haussler D. Mol Biol Evol. 2007 Sep;24(9):2119-31. 
         
        This code requires the GCTMPCA package to be installed. As of 11/08,
         that software is available at:
          http://www.sns.ias.edu/~chyeang/coevolution_download.zip
    
    """

    _command = 'calculate_likelihood'
    _input_handler = '_gctmpca_cl_input'
    _data = {'mol_type':None,'comparison_type':0,'seqs1':None,\
             'seqs2':'-','tree1':None,'tree2':'-',\
             'seq_names':None,'species_tree':None,\
             'seq_to_species1':None,'seq_to_species2':'-',\
             'char_priors':None,'sub_matrix':None,'epsilon':0.7,\
             'max_gap_threshold':1.0,'max_seq_distance':1.0,\
             'covariation_threshold':0.0,'likelihood_threshold':0.0,\
             'output_path':None,'single_pair_only':0,'family_reps':'-',\
             'pos1':'','pos2':''}
    _parameter_order = ['mol_type','comparison_type','seqs1','seqs2',\
         'tree1','tree2','seq_names','species_tree',\
         'seq_to_species1','seq_to_species2','char_priors',\
         'sub_matrix','epsilon','max_gap_threshold','max_seq_distance',\
         'covariation_threshold','likelihood_threshold','output_path',\
         'single_pair_only','family_reps','pos1','pos2']

    _potential_paths = ['seqs1','tree1','seq_names',\
        'species_tree','seq_to_species1']

    _mol_type_lookup = {'rna':0,'0':0,'protein':1,'1':1}
    _default_priors = {0:default_gctmpca_rna_priors, 1:default_gctmpca_aa_priors}
    _default_sub_matrix = {0:default_gctmpca_rna_sub_matrix, 1:default_gctmpca_aa_sub_matrix}
    _char_order = {0:gctmpca_base_order,1:gctmpca_aa_order}
    _required_parameters = {}.fromkeys(['mol_type','seqs1','tree1',\
     'seq_names','species_tree','seq_to_species1'])

    def _set_command_line_parameters(self,data):
        """ Get the right setting for each command line parameter """
        # This function could be cleaned up.

        # for each command line parameter, set it to the value passed in or
        # the default value.
        for p in self._parameter_order:
            if p not in data:
                if p in self._required_parameters: 
                    raise ApplicationError,\
                     "Required parameter %s missing." % p
                else: data[p] = self._data[p]
            # Write necessary files to disk -- need to modify this so paths
            # to existing files can be passed in.
            if p in self._potential_paths:
                try:
                    data[p] = self._input_as_lines(data[p])
                except TypeError:
                    pass
        if data['single_pair_only'] == 1 and \
           not (data['pos1'] and data['pos2']):
            raise ApplicationError,\
                "Must specify pos1 and pos2 if single_pair_only == 1."

        # Make sure the MolType is in the correct format (i.e., 1 or 0)
        data['mol_type'] = mol_type = \
         self._mol_type_lookup[str(data['mol_type']).lower()]

        char_order = self._char_order[mol_type]
        # If we didn't get several values as parameters, set the defaults. 
        # These are done outside of the above loop b/c they require special 
        # handling.
        if not data['char_priors']: 
            data['char_priors'] = self._default_priors[mol_type]
        data['char_priors'] = \
             self._input_as_lines(\
              self._input_as_gctmpca_char_priors(\
              data['char_priors'],char_order))
        if not data['sub_matrix']: 
            data['sub_matrix'] = \
             self._input_as_multiline_string(\
              self._default_sub_matrix[mol_type])
        else:
            data['sub_matrix'] = \
             self._input_as_lines(\
              self._input_as_gctmpca_rate_matrix(\
              data['sub_matrix'],char_order))
        if not data['output_path']: 
            data['output_path'] = \
             self._input_as_path(self.getTmpFilename())
        return data

    def _gctmpca_cl_input(self,data):
        """ Write the list of 22 command line parameters to a string
        """
        # Get the right setting for each parameter
        data = self._set_command_line_parameters(data)
        # Explicitly disallow intermolecular experiments (I do this here to
        # make sure I'm looking at the final version of data)
        if data['comparison_type'] == 1: 
            raise NotImplementedError,\
                "Intermolecular experiments currently supported only via coevolve_alignments."
        # Create the command line parameter string and return it 
        return ' '.join([str(data[p]) for p in self._parameter_order]).strip()

    def _input_as_gctmpca_char_priors(self,priors,char_order):
        """convert dict of priors to string and write it to tmp file
        """
        # priors t be followed by a newline
        return ['\t'.join([str(priors[c]) for c in char_order]),'']

    def _input_as_gctmpca_rate_matrix(self,matrix,char_order):
        """convert 2D dict rate matrix to string and write it to tmp file
        """
        matrix_rows = []
        for c in char_order:
            matrix_rows.append('\t'.join([str(matrix[c][col_c]) \
                for col_c in char_order]))
        return matrix_rows

    def _get_result_paths(self,data):
        """A single file is written, w/ name specified in command line input
        """
        return {'output':ResultPath(Path=data['output_path'],IsWritten=True)}