This file is indexed.

/usr/share/pyshared/cogent/parse/phylip.py is in python-cogent 1.5.3-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python
from cogent.parse.record import RecordError
from cogent.core.alignment import Alignment

__author__ = "Micah Hamady"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Micah Hamady", "Peter Maxwell", "Gavin Huttley",
                    "Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Micah Hamady"
__email__ = "hamady@colorado.edu"
__status__ = "Prototype"

def is_blank(x):
    """Checks if x is blank."""
    return not x.strip()


def _get_header_info(line):
    """
    Get number of sequences and length of sequence
    """
    header_parts = line.split()
    num_seqs, length = map(int, header_parts[:2])
    is_interleaved = len(header_parts) > 2
    return num_seqs, length, is_interleaved

def _split_line(line, id_offset):
    """
    First 10 chars must be blank or contain id info
    """
    if not line or not line.strip():
        return None, None
  
    # extract id and sequence
    curr_id = line[0:id_offset].strip()
    curr_seq = line[id_offset:].strip().replace(" ", "")

    return curr_id, curr_seq

def MinimalPhylipParser(data, id_map=None, interleaved=True):
    """Yields successive sequences from data as (label, seq) tuples.

    **Need to implement id map.

    **NOTE if using phylip interleaved format, will cache entire file in
        memory before returning sequences. If phylip file not interleaved
        then will yield each successive sequence.

    data: sequence of lines in phylip format (an open file, list, etc)
    id_map: optional id mapping from external ids to phylip labels - not sure
        if we're going to implement this


    returns (id, sequence) tuples
    """
    
    seq_cache = {}
    interleaved_id_map = {}
    id_offset = 10
    curr_ct = -1 

    for line in data:
        if curr_ct == -1:
            # get header info
            num_seqs, seq_len, interleaved = _get_header_info(line)
          
            if not num_seqs or not seq_len:
                return 
            curr_ct += 1
            continue

        curr_id, curr_seq = _split_line(line, id_offset)

        # skip blank lines
        if not curr_id and not curr_seq:
            continue

        if not interleaved:
            if curr_id:
                if seq_cache:
                    yield seq_cache[0], ''.join(seq_cache[1:])
                seq_cache = [curr_id, curr_seq]
            else:
                seq_cache.append(curr_seq)
        else:
            curr_id_ix = curr_ct % num_seqs

            if (curr_ct + 1) % num_seqs == 0:
                id_offset = 0

            if curr_id_ix not in interleaved_id_map:
                interleaved_id_map[curr_id_ix] = curr_id
                seq_cache[curr_id_ix] = []

            seq_cache[curr_id_ix].append(curr_seq)
        curr_ct += 1


    # return joined sequences if interleaved
    if interleaved:
        for curr_id_ix, seq_parts in seq_cache.items():
            join_seq = ''.join(seq_parts)

            if len(join_seq) != seq_len:
                raise RecordError(
                    "Length of sequence '%s' is not the same as in header "
                    "Found %d, Expected %d" % (
                    interleaved_id_map[curr_id_ix], len(join_seq), seq_len))

            yield interleaved_id_map[curr_id_ix], join_seq
    #return last seq if not interleaved
    else:
        if seq_cache:
            yield seq_cache[0], ''.join(seq_cache[1:])

def get_align_for_phylip(data, id_map=None):
    """
    Convenience function to return aligment object from phylip data

    data: sequence of lines in phylip format (an open file, list, etc)
    id_map: optional id mapping from external ids to phylip labels - not sure
        if we're going to implement this

    returns Alignment object
    """

    mpp = MinimalPhylipParser(data, id_map)

    tuples = []
    for tup in mpp:
        tuples.append(tup)
    return Alignment(tuples)