This file is indexed.

/usr/share/pyshared/cogent/parse/meme.py is in python-cogent 1.5.1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#!/usr/bin/env python

"""Parses MEME output file and creates Module objects.

    Supports MEME version 3.0 - 4.0
"""
from cogent.parse.record_finder import LabeledRecordFinder
from cogent.parse.record import DelimitedSplitter
from cogent.motif.util import Location, ModuleInstance, Module, Motif,\
     MotifResults, make_remap_dict
from cogent.core.moltype import DNA, RNA, PROTEIN

__author__ = "Jeremy Widmann"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Jeremy Widmann", "Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Jeremy Widmann"
__email__ = "jeremy.widmann@colorado.edu"
__status__ = "Development"

def getDataBlock(lines):
    """Returns main block of data as list.
    """
    #Get main data block: All lines following "COMMAND LINE SUMMARY"
    meme_command = LabeledRecordFinder(lambda x: x.startswith('COMMAND'))
    main_block = list(meme_command(lines))
    alphabet = getMolType(main_block[0])
    return main_block[1], alphabet

def getMolType(lines):
    """Returns alphabet type that sequences belong to.
    """
    for line in lines:
        if 'ALPHABET' in line:
            alphabet_line = line
    #Split on equal sign
    alphabet_line = alphabet_line.strip().split('ALPHABET= ')[1].split()[0]
    #Get Set of alphabet letters
    alphabet = set(alphabet_line)
    #get Protein Set
    protein_order = set(PROTEIN.Alphabet)
    #get RNA Set
    rna_order = set(RNA.Alphabet)
    #Find out which alphabet is used
    
    if len(alphabet) >= 20:
        return PROTEIN
    elif alphabet == rna_order:
        return RNA
    else:
        return DNA
    

def getCommandModuleBlocks(main_block):
    """Returns command line summary block and list of module blocks.
    """
    #Get Command line summary and all module information
    meme_module = LabeledRecordFinder(lambda x: x.startswith('MOTIF'))
    main_block = list(meme_module(main_block))
    command_block = main_block[0]
    module_blocks = []
    if len(main_block) > 1:
        module_blocks = main_block[1:]
    return command_block, module_blocks

def getSummaryBlock(module_blocks):
    """Returns summary of motifs block.
    """
    meme_summary = LabeledRecordFinder(lambda x: x.startswith('SUMMARY'))
    summary_block = list(meme_summary(module_blocks))
    return summary_block[1]

def dictFromList(data_list):
    """Returns a dict given a list.

        - Dict created from a list where list contains alternating key, value
        pairs.
        - ex: [key1, value1, key2, value2] returns: {key1:value1, key2:value2}
    """
    data_dict = {}
    for i in range(0,len(data_list)-1,2):
        #If there is already a value for the given key
        if data_list[i] in data_dict:
            #Add the rest of data to the value string
            data_dict[data_list[i]] = data_dict[data_list[i]] + ' ' + \
                                     data_list[i+1]
        else:
            #Otherwise add value to given key
            data_dict[data_list[i]] = data_list[i+1]
    return data_dict

def extractCommandLineData(command_block):
    """Returns a dict of all command line data from MEME output.
    """
    data_dict = {}
    #Get only necessary Command Line Summary data
    ignore = lambda x: x.startswith('*')
    meme_model = LabeledRecordFinder(lambda x: 'model:' in x,
                                    ignore=ignore)
    cmd_data = list(meme_model(command_block))
    cmd_data = cmd_data[1]
    cmd_data = cmd_data[:-4]

    #Just return list of strings rather than parse data
    """
    cmd_data = '^'.join(cmd_data)
    cmd_data = cmd_data.split()
    cmd_data = ' '.join(cmd_data)
    cmd_data = cmd_data.split(': ')
    lastkarat = DelimitedSplitter('^',-1)
    cmd_data_temp = []
    for line in cmd_data:
        cmd_data_temp.extend(lastkarat(line))
    cmd_data = '>'.join(cmd_data_temp)
    cmd_data = cmd_data.replace('= ','=')
    cmd_data = cmd_data.replace('^',' ')
    cmd_data = cmd_data.split('>')
    """

    return cmd_data

def getModuleDataBlocks(module_blocks):
    """Returns list data blocks for each module.
    """
    #Get blocks of module information for each module
    meme_module_data = LabeledRecordFinder(lambda x: x.startswith('Motif'))
    module_data_blocks = []
    for module in module_blocks:
        module_data_blocks.append(list(meme_module_data(module)))
    return module_data_blocks

def extractModuleData(module_data, alphabet, remap_dict):
    """Creates Module object given module_data list.

        - Only works on 1 module at a time: only pass in data from one module.

    """
    #Create Module object
    meme_module = {}
    
    #Only keep first 3 elements of the list
    module_data = module_data[:3]

    #Get Module general information: module_data[0]
    #Only need to keep first line
    general_dict = getModuleGeneralInfo(module_data[0][0])
    module_length = int(general_dict['width'])

    #Get ModuleInstances: module_data[2]
    instance_data = module_data[2][4:-2]
    for i in xrange(len(instance_data)):
        instance_data[i] = instance_data[i].split()
    #Create a ModuleInstance object and add it to Module for each instance
    for instance in instance_data:
        seqId = remap_dict[instance[0]]
        start = int(instance[1])-1
        Pvalue = float(instance[2])
        sequence = instance[4]
        #Create Location object for ModuleInstance
        location = Location(seqId, start, start + module_length)
        #Create ModuleInstance
        mod_instance = ModuleInstance(sequence,location,Pvalue)

        #Add ModuleInstance to Module
        meme_module[(seqId,start)] = mod_instance
    
    meme_module = Module(meme_module, MolType=alphabet)
    #Get Multilevel Consensus Sequence
    meme_module.ConsensusSequence = getConsensusSequence(module_data[1])
    #Pull out desired values from dict    
    meme_module.Llr = int(general_dict['llr'])
    meme_module.Evalue = float(general_dict['E-value'])
    meme_module.ID = general_dict['MOTIF']

    return meme_module

def getConsensusSequence(first_block):
    """Returns multilevel consensus sequences string.
    """
    for line in first_block:
        if line.upper().startswith('MULTILEVEL'):
            return line.split()[1]

def getModuleGeneralInfo(module_general):
    """Returns dict with Module general information.

        - Module general information includes:
            - width, sites, llr, E-value
    """
    module_id = module_general[:8]
    module_general = module_general[8:].strip().replace(' =','')
    module_general = module_general.split()
    #Get dict of Module general info from list
    general_dict = dictFromList(module_general)
    general_dict['MOTIF']=module_id[5:].strip()
    return general_dict

def extractSummaryData(summary_block):
    """Returns dict of sequences and combined P values.

        - {'CombinedP':{
            'seqId1': Pvalue1,
            'seqId2': Pvalue2,}
            }
    """
    #Get slice of necessary data from summary_block
    summary = summary_block[7:]
    summary_dict = {}
    #Split on whitespace
    for i in xrange(len(summary)):
        summary[i] = summary[i].split()
    #Add necesary data to dict
    for seq in summary:
        #Stop when '--------------------' is found: end of data
        if seq[0].startswith('--------------------'):
            break
        summary_dict[seq[0]] = float(seq[1])
    return {'CombinedP':summary_dict}

def MemeParser(lines, allowed_ids=[]):
    """Returns a MotifResults object given a MEME results file.
    """
    warnings = []
    #Create MotifResults object
    meme_motif_results = MotifResults()
    #Get main block and alphabet
    main_block, alphabet = getDataBlock(lines)
    #Add alphabet to MotifResults object
    meme_motif_results.MolType = alphabet
    #Get command line summary block and module blocks
    command_block, module_blocks = getCommandModuleBlocks(main_block)
    if command_block:
        #Extract command line data and put in dict
        parameters_list = extractCommandLineData(command_block)
        #Add parameters dict to MotifResults object parameters
        meme_motif_results.Parameters = parameters_list
    #make sure modules were found
    if len(module_blocks) > 0:
        #Get Summary of motifs block
        summary_block = getSummaryBlock(module_blocks[-1])
        #Extract summary data and get summary_dict
        summary_dict = extractSummaryData(summary_block)
        seq_names = summary_dict['CombinedP'].keys()
        if allowed_ids:
            remap_dict,warning = make_remap_dict(seq_names,allowed_ids)
            if warning:
                warnings.append(warning)
            sd = {}
            for k,v in summary_dict['CombinedP'].items():
                sd[remap_dict[k]]=v
            summary_dict['CombinedP']=sd
        else:
            remap_dict = dict(zip(seq_names,seq_names))
        
        #Add summary dict to MotifResults object
        meme_motif_results.Results = summary_dict
        
        #Add warnings to MotifResults object
        meme_motif_results.Results['Warnings']=warnings
        
        #Get blocks for each module
        module_blocks = getModuleDataBlocks(module_blocks)
        #Extract modules and put in MotifResults.Modules list
        for module in module_blocks:
            meme_motif_results.Modules.append(extractModuleData(module,\
                alphabet,remap_dict))
        for module in meme_motif_results.Modules:
            meme_motif_results.Motifs.append(Motif(module))
    return meme_motif_results