/usr/lib/python2.7/dist-packages/biotools/IO/fastq.py is in python-biotools 1.2.12-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | '''
Functions for manipulating FASTQ files.
'''
from biotools.sequence import Sequence
def read(fh):
'''
Read sequences in FASTQ format; identifiers are on lines that begin with
at symbols ('@'), sequence follows on the next line, then a line that
begins and sequence is with a plus sign ('+') and finally the quality
scores on the subsequent line. Quality scores are encoded in Phred format,
the type of which (either 32 or 64) is determined when the file is probed
for opening. The scores are decoded into a list of integers. This function
is a generator that yields `Sequence` objects.
'''
while 1:
try:
line = fh.next().strip()
if line[0] == '@':
name = line[1:].split()[0]
defline = line[1 + len(name):].strip()
seq = f.next().strip()
fh.next()
qual = [ord(c) - self.phred for c in fh.next().strip()]
yield Sequence(name, seq, qual=qual, defline=defline)
except StopIteration:
raise
finally:
fh.close()
def write(fh, s):
'''
Write sequences in FASTA format, i.e.,
```
@name
sequence ...
+
quality scores
```
'''
fh.write('@%s %s\n%s\n+\n%s\n' % (s.name, s.defline, s.seq,
''.join(q + chr('A') - 1 for q in s.qual)) + '\n')
def probe(fh):
'''
Probe a file to determine whether or not it is a FASTQ file. That is,
the first non-empty line should begin with a caret ('@') and the 3rd line
following that first non-empty line should contain no character with
ordinal value less than 32. If none of the characters have ordinal value
less than 64, then the file is guessed to be encoded in Phred64, otherwise
it is encoded in Phred32. This function will return False if the file is
not in FASTQ format and will return a dictionary with the phred score and
type ('fastq') if the file is FASTQ.
'''
for line in fh:
st = line.strip()
if st:
fh.close()
if st[0] == '@':
fh.next()
fh.next()
qual = [ord(c) for c in fh.next().strip()]
phred = 32 if min(qual) < ord('A') else 64
qual = [q - phred for q in qual]
return {'type': 'fastq', 'phred': phred}
return False
return {'type': 'fastq', 'phread': 64}
|