This file is indexed.

/usr/share/pyshared/cogent/parse/pdb.py is in python-cogent 1.5.1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#!/usr/bin/env python
"""PDB parser class and parsing utility functions."""

from re import compile
from numpy import array, linalg

from cogent.data.protein_properties import AA_NAMES
from cogent.core.entity import StructureBuilder, ConstructionWarning, ConstructionError

__author__ = "Marcin Cieslik"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Marcin Cieslik"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Marcin Cieslik"
__email__ = "mpc4p@virginia.edu"
__status__ = "Development"

match_coords = compile('^HETATM|ATOM|MODEL')    # start of coordinates
match_trailer = compile('^CONECT')      # end of coordinates

# default PDB format string -> \n neccesery for writelines
# try not to parse a second \n eg. by trying to parse charge
PDB_COORDS_STRING = "%s%5i %-4s%c%3s %c%4i%c   %8.3f%8.3f%8.3f%6.2f%6.2f      %4s%2s\n"
PDB_TER_STRING = "%s%5i %-4s%c%3s %c%4i%c\n"

def dict2pdb(d):
    """Transform an atom dictionary into a valid PDB line."""
    (x, y, z) = d['coords']
    args = (d['at_type'], d['ser_num'], d['at_name'], d['alt_loc'],
            d['res_name'][-3:], d['chain_id'], d['res_id'], d['res_ic'],
            x , y , z , d['occupancy'], d['bfactor'], d['seg_id'], d['element'])
    return PDB_COORDS_STRING % args

def dict2ter(d):
    """Transforms an atom dictionary into a valid TER line AFTER it."""
    args = ('TER   ', d['ser_num'] + 1, '   ', ' ',
            d['res_name'][-3:], d['chain_id'], d['res_id'], d['res_ic'])
    return PDB_TER_STRING % args

def pdb2dict(line):
    """Parses a valid PDB line into an atomic dictionary."""
    at_type = line[0:6]
    ser_num = int(line[6:11])   # numbers are ints's?
    at_name = line[12:16]       # " N B "
    at_id = at_name.strip()     # "N B"
    alt_loc = line[16:17]       # always keep \s
    res_name = line[17:20]      # non standard is 4chars long

    chain_id = line[21:22]
    res_id = int(line[22:26].strip())   # pdb requirement int
    res_ic = line[26:27]

    x = line[30:38]             # no float conversion necessery
    y = line[38:46]             # it gets loaded straight into an
    z = line[46:54]             # numpy array.
    occupancy = float(line[54:60])
    bfactor = float(line[60:66])
    seg_id = line[72:76]
    element = line[76:78]

    h_flag = ' '                # this is the default
    if at_type == 'HETATM':     # hetatms get the het flag (it's not used for writing)
        if res_name not in ('MSE', 'SEL'): # SeMet are not ligands
            h_flag = 'H'
            res_name = '%s_%s' % (h_flag, res_name)

    at_long_id = (at_id, alt_loc)
    res_long_id = (res_name, res_id, res_ic)
    coords = array((x, y, z)).astype("double")

    result = {
    'at_type': at_type, 'ser_num': ser_num, 'at_name': at_name,
    'at_id': at_id, 'alt_loc': alt_loc, 'res_name': res_name,
    'chain_id': chain_id, 'res_id': res_id, 'res_ic': res_ic,
    'h_flag': h_flag, 'coords': coords, 'occupancy': occupancy,
    'bfactor': bfactor, 'seg_id': seg_id, 'res_long_id': res_long_id,
    'at_long_id': at_long_id, 'element':element}
    return result

def get_symmetry(header):
    """Extracts symmetry operations from header, either by parsing of
    conversion matrices (SMTRY or BIOMT) or using CCTBX based on the
    space group group data name and a,b,c,alpha,beta,gamma.
    """
    header_result = {}
    for (mode, remark) in (('uc', 'REMARK 290   SMTRY'), ('bio', 'REMARK 350   BIOMT')):
        # parsing the raw-header matrices
        # uc: parsing the symmetry matrices for a given space group
        # bio: parsing the symmetry matrices to construct the biological molecule
        #'REMARK 290   SMTRY3  96 -1.000000  0.000000  0.000000        0.00000'
        # how should we deal with the ORIGx matrix?
        # needed function to check if the SCALEn matrix is correct with
        # respect to the CRYST1 card.
        mx_num = 1
        new_chain = False
        fmx, mxs, mx, cmx = [], [], [], []
        for line in header:
            if line.startswith('SCALE') and mode == 'uc':
                data = map(float, line[6:].split()[:-1])
                fmx.append(data)
            elif line.startswith(remark):
                m_line = map(float, line[20:].split())
                if mx_num != m_line[0] or new_chain:
                    mx.append([ 0., 0., 0., 1.])
                    mxs.append(mx)
                    mx = []
                    if mode == 'bio':
                        if new_chain:
                            new_chain = False
                        else:
                            cmx[-1][1] += 1
                mx_num = m_line[0]
                mx.append(m_line[1:])
            elif line.startswith('REMARK 350 APPLY') and mode == 'bio':
                if cmx: new_chain = True; cmx[-1][1] += 1
                chains = [(c.strip(),) for c in line[42:].split(',')]
                cmx.append([chains, 0])
        mx.append([ 0., 0., 0., 1.]) # finish the last matrix
        mxs.append(mx)
        mxs = array(mxs)        # symmetry matrices
        header_result[mode + "_mxs"] = mxs
        if mode == 'uc':
            fmx = array(fmx)        # fractionalization_matrix
            omx = linalg.inv(fmx)   # orthogonalization_matrix
            header_result["uc_fmx"] = fmx
            header_result["uc_omx"] = omx
        elif mode == 'bio':
            cmx[-1][1] += 1
            header_result[mode + "_cmx"] = cmx
    return header_result

def get_coords_offset(line_list):
    """Determine the line number where coordinates begin."""
    i = 0
    for i, line in enumerate(line_list):
        if match_coords.match(line):
            break
    return i

def get_trailer_offset(line_list):
    """Determine the line number where coordinates end."""
    i = 0
    for i, line in enumerate(line_list):
        if match_trailer.match(line):
            break
    return i

def parse_header(header):
    """Parse parts of the PDB header."""
    id = (compile('HEADER\s{4}\S+.*\S+\s*\S{9}\s+(\S{4})\s*$'), 'id')
    dt = (compile('HEADER\s{4}\S+.*\S+\s+(\S{9})\s+\S{4}\s*$'), 'date')
    nm = (compile('HEADER\s{4}(\S+.*\S+)\s+\S{9}\s+\S{4}\s*$'), 'name')
    mc = (compile('REMARK 280\s+MATTHEWS COEFFICIENT,\s+VM\s+\(ANGSTROMS\*\*3/DA\):\s+(\d+\.\d+)'), 'matthews')
    sc = (compile('REMARK 280\s+SOLVENT CONTENT,\s+VS\s+\(%\):\s+(\d+\.\d+)'), 'solvent_content')
    sg = (compile('REMARK 290\s+SYMMETRY OPERATORS FOR SPACE GROUP:\s+(.*\S)'), 'space_group')
    xt = (compile('REMARK 200\s+EXPERIMENT TYPE\s+:\s+(.*\S)'), 'experiment_type')
    rs = (compile('REMARK   2\s+RESOLUTION\.\s+(\d+\.\d+)\s+ANGSTROMS\.'), 'resolution')
    rf = (compile('REMARK   3\s+FREE R VALUE\s+:\s+(\d+\.\d+)'), 'r_free')
    xd = (compile('EXPDTA\s+([\w\-]+).*'), 'expdta')
    c1 = (compile('CRYST1\s+((\d+\.\d+\s+){6})'), 'cryst1')
    ra = (compile('DBREF\s+\S{4}\s+\S\s+\d+\s+\d+\s+\S+\s+(\S+)\s+\S+\s+\d+\s+\d+\s+$'), 'dbref_acc')
    rx = (compile('DBREF\s+\S{4}\s+\S\s+\d+\s+\d+\s+\S+\s+\S+\s+(\S+)\s+\d+\s+\d+\s+$'), 'dbref_acc_full')
    #CRYST1   60.456   60.456   82.526  90.00  90.00  90.00 P 41          4
    #DBREF  1UI9 A    1   122  UNP    Q84FH6   Q84FH6_THETH     1    122             \n'
    tests = [id, dt, nm, mc, sc, sg, xt, rs, rf, xd, c1, ra, rx]
    results = {}
    for line in header:
        for (regexp, name) in tests:
            try:
                results.update({name:regexp.search(line).group(1).strip()})
                continue # taking only the first grep.
            except AttributeError:
                pass
    return results

def parse_coords(builder, coords, forgive=1):
    """Parse coordinate lines."""
    current_model_id = 0
    model_open = False
    current_chain_id = None
    current_seg_id = None
    current_res_long_id = None
    current_res_name = None

    for line in coords:
        record_type = line[0:6]

        if record_type == 'MODEL ':
            builder.initModel(current_model_id)
            current_model_id += 1
            model_open = True
            current_chain_id = None
            current_res_id = None

        elif record_type == 'ENDMDL':
            model_open = False
            current_chain_id = None
            current_res_id = None

        elif record_type == 'ATOM  ' or record_type == 'HETATM':
            if not model_open:
                builder.initModel(current_model_id)
                current_model_id += 1
                model_open = 1
            new_chain = False
            fields = pdb2dict(line)

            if current_seg_id != fields['seg_id']:
                current_seg_id = fields['seg_id']
                builder.initSeg(current_seg_id)

            if current_chain_id != fields['chain_id']:
                current_chain_id = fields['chain_id']
                new_chain = True
                try:
                    builder.initChain(current_chain_id)
                except ConstructionWarning:
                    if not forgive:
                        raise ConstructionError

            if current_res_name != fields['res_name'] or current_res_long_id != fields['res_long_id'] or new_chain:
                current_res_long_id = fields['res_long_id']
                current_res_name = fields['res_name']
                try:
                    builder.initResidue(fields['res_long_id'], fields['h_flag'])
                except ConstructionWarning:
                    if not forgive:
                        raise ConstructionError

                new_chain = False
            try:
                builder.initAtom(fields['at_long_id'], fields['at_name'], fields['ser_num'], \
                                  fields['coords'], fields['occupancy'], fields['bfactor'], \
                                  fields['element'])
            except ConstructionError:
                if not forgive > 1:
                    raise ConstructionError


    return builder.getStructure()

def parse_trailer(trailer):
    return {}

def PDBParser(open_file, structure_id=None, forgive=2):
    """Parse a PDB file and return a Structure object."""
    file_ = open_file.readlines()
    builder = StructureBuilder()

    c_offset = get_coords_offset(file_)
    t_offset = get_trailer_offset(file_)

    raw_coords = file_[c_offset:t_offset]
    raw_trailer = file_[t_offset:]
    raw_header = file_[:c_offset]

    parsed_header = parse_header(raw_header)
    parsed_trailer = parse_trailer(raw_trailer)
    structure_id = (structure_id or parsed_header.get('id'))
    builder.initStructure(structure_id)
    structure = parse_coords(builder, raw_coords, forgive)

    # only X-ray structures will contain crystallographic data
    if parsed_header.get('expdta') == 'X-RAY':
        symetry_info = get_symmetry(raw_header)
        parsed_header.update(symetry_info)

    structure.header = parsed_header
    structure.trailer = parsed_trailer
    structure.raw_header = raw_header
    structure.raw_trailer = raw_trailer

    return structure