This file is indexed.

/usr/share/pyshared/cogent/parse/unigene.py is in python-cogent 1.5.1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
"""Parsers for the various files in the UniGene database.
"""
from cogent.parse.record import MappedRecord, ByPairs, semi_splitter, \
    equal_pairs, LineOrientedConstructor, list_adder, int_setter
from cogent.parse.record_finder import GbFinder
from string import maketrans, strip

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Development"

def _read_sts(line):
    """Turns an STS line (without label) into a record.
    
    Infuritatingly, STS lines are not semicolon-delimited, and spaces appear
    in places they shouldn't. This was the case as of 10/9/03: expect this
    'feature' to be unstable!
    """
    filtered = line.replace('=', ' ')
    return MappedRecord(list(ByPairs(filtered.split())))

def _read_expression(line):
    """Turns a semicolon-delimited  expression line into list of expressions"""
    return semi_splitter(line)

class UniGeneSeqRecord(MappedRecord):
    Aliases = {'ACC':'Accession', 'CLONE':'CloneId', 'END':'End',\
        'LID':'LibraryId', 'SEQTYPE':'SequenceType', 'TRACE':'Trace', \
        'EST':'EstId', 'NID':'NucleotideId', 'PID':'ProteinId'}

class UniGeneProtSimRecord(MappedRecord):
    Aliases = {'ORG':'Species', 'PROTGI':'ProteinGi', 'ProtId':'ProteinId',\
        'PCT':'PercentSimilarity', 'ALN':'AlignmentScore'}

def _read_seq(line):
    """Turns a sequence line into a UniGeneSeqRecord.
    
    BEWARE: first level delimiter is ';' and second level delimiter is '=', but
    '=' can also appear inside the _value_ of the second level!
    """
    first_level = semi_splitter(line)
    second_level = map(equal_pairs, first_level)
    return UniGeneSeqRecord(second_level)

def _read_protsim(line):
    """Turns a protsim line into a UniGeneProtSim record.
    
    BEWARE: first level delimiter is ';' and second level delimiter is '=', but
    '=' can also appear inside the _value_ of the second level!
    """
    first_level = semi_splitter(line)
    second_level = map(equal_pairs, first_level)
    return UniGeneProtSimRecord(second_level)

class UniGene(MappedRecord):
    """Holds data for a UniGene record."""
    Required = {    'STS':[], 'PROTSIM':[], 'SEQUENCE':[], 'EXPRESS': []}
    Aliases = {'STS':'Sts', 'PROTSIM':'ProteinSimilarities',\
    'SEQUENCE':'SequenceIds','SCOUNT':'SequenceCount','CTYOBAND':'CytoBand',\
    'EXPRESS':'ExpressedIn', 'CHROMOSOME':'Chromosome','ID':'UniGeneId', \
    'TITLE':'UniGeneTitle','LOCUSLINK':'LocusLinkId'}

def _expressions_setter(obj, field, val):
    """Sets specified field to a list of expressions"""
    setattr(obj, field, semi_splitter(val))

def _sts_adder(obj, field, val):
    """Appends the current STS-type record to specified field"""
    list_adder(obj, field, _read_sts(val))

def _seq_adder(obj, field, val):
    """Appends the current Sequence-type record to specified field"""
    list_adder(obj, field, _read_seq(val))

def _protsim_adder(obj, field, val):
    """Appends the current ProtSim record to specified field"""
    list_adder(obj, field, _read_protsim(val))
 
LinesToUniGene = LineOrientedConstructor()
LinesToUniGene.Constructor = UniGene
LinesToUniGene.FieldMap = {
    'LOCUSLINK':int_setter,
    'EXPRESS':_expressions_setter,
    'PROTSIM':_protsim_adder,
    'SCOUNT':int_setter,
    'SEQUENCE':_seq_adder,
    'STS':_sts_adder,
}

def UniGeneParser(lines):
    """Treats lines as a stream of unigene records"""
    for record in GbFinder(lines):
        curr = LinesToUniGene(record)
        del curr['//']  #clean up delimiter
        yield curr

if __name__ == '__main__':
    from sys import argv, stdout
    filename = argv[1]
    count = 0
    for record in UniGeneParser(open(filename)):
        stdout.write('.')
        stdout.flush()
        count += 1
    print "read %s records" % count