/usr/include/kmer/seqStream.H is in libmeryl-dev 0~20150903+r2013-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | #ifndef SEQSTREAM_H
#define SEQSTREAM_H
#include "util++.H"
#include "bio++.H"
#include "seqFile.H"
struct seqStreamIndex {
uint32 _iid; // seqFile IID
uint32 _len; // length of the sequence
uint64 _bgn; // begin position in the stream
};
class seqStream {
public:
seqStream(const char *filename);
seqStream(const char *sequence, uint32 length);
~seqStream();
// Separate sequences with this letter. Non-ACGT is always
// returned as 'N'. Changing the length of the separator AFTER
// setting the range will result in the wrong range being used.
//
void setSeparator(char sep, uint32 len);
// get() returns one letter per input letter -- a gap of size n
// will return n gap symbols.
//
unsigned char get(void);
bool eof(void) { return(_eof); };
// Returns to the start of the range.
//
void rewind(void);
// Set the range of ACGT sequence we will return. Coordinates are
// space-based. Example:
//
// >one
// AAA
// >two
// C
// >three
// GGG
//
// We separate these sequences with three '-' letters.
//
// strPos 012...3...456
// AAA---C---GGG
//
// range(0,0) -> nothing
// range(0,1) -> A
// range(0,3) -> AAA
// range(0,4) -> AAAnnnC
// range(0,5) -> AAAnnnCnnnG
//
void setRange(uint64 bgn, uint64 end);
void setPosition(uint64 pos);
// seqPos() is the position we are at in the current sequence;
// seqIID() is the iid of that sequence;
// strPos() is the position we are at in the chained sequence
//
// Values are not defined if the letter is a separator.
//
uint32 seqPos(void) { return(_currentPos); };
uint32 seqIID(void) { return(_idx[_currentIdx]._iid); };
uint64 strPos(void) { return(_streamPos); };
uint32 numberOfSequences(void) { return(_idxLen); };
// Return the length of, position of (in the chain) and IID of the
// (s)th sequence in the chain.
//
uint32 lengthOf(uint32 s) { return((s >= _idxLen) ? ~uint32ZERO : _idx[s]._len); };
uint32 IIDOf(uint32 s) { return((s >= _idxLen) ? ~uint32ZERO : _idx[s]._iid); };
uint64 startOf(uint32 s) { return((s >= _idxLen) ? ~uint64ZERO : _idx[s]._bgn); };
// For a chain position p, returns the s (above) for that position.
//
uint32 sequenceNumberOfPosition(uint64 p);
void tradeSpaceForTime(void);
private:
void fillBuffer(void);
seqFile *_file; // Backed by a seqFile.
char *_string; // Backed by a character string.
uint64 _bgn; // Begin/End position in chained sequence
uint64 _end;
uint32 _currentIdx; // index into _idx of the current sequence
uint32 _currentPos; // position in the current sequence
uint64 _streamPos; // position in the chained sequence
// Buffer for holding sequence from the seqFile.
uint32 _bufferMax; // Max size of the buffer
uint32 _bufferLen; // Actual size of the buffer
uint32 _bufferPos; // Position we are at in the buffer
uint32 _bufferSep; // How much of the buffer is separator
char *_buffer;
// Info about the raw sequences
uint32 _idxLen;
seqStreamIndex *_idx;
uint32 *_seqNumOfPos;
uint64 _lengthOfSequences;
bool _eof;
char _separator;
uint32 _separatorLength;
};
#endif // SEQSTREAM_H
|