/usr/lib/python3/dist-packages/screed/fastq.py is in python3-screed 1.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | # Copyright (c) 2016, The Regents of the University of California.
from __future__ import absolute_import
from . import DBConstants
from .screedRecord import Record
from .utils import to_str
FieldTypes = (('name', DBConstants._INDEXED_TEXT_KEY),
('annotations', DBConstants._STANDARD_TEXT),
('sequence', DBConstants._STANDARD_TEXT),
('quality', DBConstants._STANDARD_TEXT))
def fastq_iter(handle, line=None, parse_description=False):
"""
Iterator over the given FASTQ file handle returning records. handle
is a handle to a file opened for reading
"""
if line is None:
line = handle.readline()
line = to_str(line.strip())
while line:
data = {}
if line and not line.startswith('@'):
raise IOError("Bad FASTQ format: no '@' at beginning of line")
# Try to grab the name and (optional) annotations
if parse_description:
try:
data['name'], data['annotations'] = line[1:].split(' ', 1)
except ValueError: # No optional annotations
data['name'] = line[1:]
data['annotations'] = ''
pass
else:
data['name'] = line[1:]
data['annotations'] = ''
# Extract the sequence lines
sequence = []
line = to_str(handle.readline().strip())
while line and not line.startswith('+') and not line.startswith('#'):
sequence.append(line)
line = to_str(handle.readline().strip())
data['sequence'] = ''.join(sequence)
# Extract the quality lines
quality = []
line = to_str(handle.readline().strip())
seqlen = len(data['sequence'])
aclen = 0
while not line == '' and aclen < seqlen:
quality.append(line)
aclen += len(line)
line = to_str(handle.readline().strip())
data['quality'] = ''.join(quality)
if len(data['sequence']) != len(data['quality']):
raise IOError('sequence and quality strings must be '
'of equal length')
yield Record(**data)
|