This file is indexed.

/usr/share/pyshared/cogent/parse/macsim.py is in python-cogent 1.5.1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python

from cogent.core import annotation, moltype

__author__ = "Peter Maxwell"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Raymond Sammut", "Peter Maxwell", "Gavin Huttley",
                    "Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Peter Maxwell"
__email__ = "pm67nz@gmail.com"
__status__ = "Production"

#<?xml version="1.0"?>
#<!DOCTYPE macsim SYSTEM "http://www-bio3d-igbmc.u-strasbg.fr/macsim.dtd">

# As used by BAliBASE

def MacsimParser(doc):
    doc = doc.getElementsByTagName('macsim')[0]
    align = doc.getElementsByTagName('alignment')[0]
    for record in align.getElementsByTagName('sequence'):
        name = record.getElementsByTagName(
                        'seq-name')[0].childNodes[0].nodeValue
        raw_seq = record.getElementsByTagName(
                        'seq-data')[0].childNodes[0].nodeValue
        
        #cast as string to de-unicode
        raw_string = ''.join(str(raw_seq).upper().split())
        name=str(name).strip()
        
        if str(record.getAttribute('seq-type')).lower() == 'protein':
            alphabet = moltype.PROTEIN
        else:
            alphabet = moltype.DNA

        seq = alphabet.makeSequence(raw_string, Name=name)
                
        yield (name, seq)