/usr/include/pbseq/pbdata/SMRTSequence.hpp is in libpbdata-dev 0~20161219-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | // Author: Mark Chaisson
#ifndef _BLASR_SMRT_SEQUENCE_HPP_
#define _BLASR_SMRT_SEQUENCE_HPP_
#include <cassert>
#include <iostream>
#include <sstream>
#include "Types.h"
#include "Enumerations.h"
#include "NucConversion.hpp"
#include "FASTQSequence.hpp"
#include "reads/RegionTable.hpp"
#include "reads/ZMWGroupEntry.hpp"
class SMRTSequence : public FASTQSequence {
friend class HDFZMWReader;
friend class HDFZMWWriter;
friend class HDFZMWMetricsWriter;
private:
enum SnrIndex4Base {A=0, C=1, G=2, T=3};
float hqRegionSnr_[4]; // Always saved as 'ACGT'
DNALength subreadStart_;
DNALength subreadEnd_;
// read group id associated with each SMRTSequence
std::string readGroupId_;
public:
ZMWGroupEntry zmwData;
DNALength lowQualityPrefix, lowQualitySuffix;
int highQualityRegionScore; // High quality region score in region table.
float readScore;
// Whether or not this is originally copied from a BamRecord.
bool copiedFromBam;
HalfWord *preBaseFrames;
HalfWord *widthInFrames;
//
// The following are fields that are read in from the pulse file.
// Because they are not standard in bas.h5 files, these fields
// should not be preallocated when resizing a SMRTSequence, and
// memory should be managed separately. For now, these fields all
// have the same length as the number of bases, but this could
// change so that all pulse values are stored in a SMRTSequence.
//
HalfWord *meanSignal, *maxSignal, *midSignal;
float *classifierQV;
unsigned int *startFrame;
int *pulseIndex;
public:
SMRTSequence();
inline ~SMRTSequence();
/// \name Sets and gets attributes.
/// \{
/// Set HoleNumber.
/// \returns this SMRTSequence
SMRTSequence & HoleNumber(UInt holeNumber);
/// \reutrns HoleNumber
UInt HoleNumber(void) const;
/// Set HoleXY
SMRTSequence & HoleXY(const int x, const int y);
/// \returns HoleX
UInt HoleX(void) const;
/// \returns HoleY
UInt HoleY(void) const;
/// Set HoleStatus
SMRTSequence & HoleStatus(const unsigned char);
/// \returns HoleStatus
unsigned char HoleStatus(void) const;
/// \returns movie name parsed from sequence title
std::string MovieName(void) const;
/// \returns start pos of this sequence in coordinate of zmw polymerase sequence
DNALength SubreadStart(void) const;
/// Sets subreadStart.
SMRTSequence & SubreadStart(const DNALength start);
/// \returns subread end pos of this sequence in coordinate of zmw polymerase sequence
DNALength SubreadEnd(void) const;
/// Set subread end pos in coordinate of polymerase sequence.
SMRTSequence & SubreadEnd(const DNALength end);
/// A SMRTSequence's this->seq may point to sequence of a whole
/// polymerase read, but only represents a subread [subreadStart_, subreadEnd_).
/// \returns subread length (SubreadEnd() - SubreadStart())
DNALength SubreadLength(void) const;
/// \returns read group id for this sequence.
std::string ReadGroupId(void) const;
/// Set readGroup Id for this sequence.
SMRTSequence & ReadGroupId(const std::string & rid);
/// Access to HQRegion SNRs must be done via public API.
float HQRegionSnr(const char base) const;
/// Set HQRegion SNR of base as v.
SMRTSequence & HQRegionSnr(const char base, float v);
/// \}
public:
/// \name Clip subread
/// \{
SMRTSequence & Clip(const DNALength subreadStart, const DNALength subreadEnd);
/// \}
/// \name Allocate
/// \{
/// Allocate space for all possible QVs.
void Allocate(DNALength length);
/// Compact allocate space for QVs needed in order to compute alignment score.
/// \param[in] hasInsertionDeletionQVTag: insertionQV, deletionQV and deletionTag exist
/// \param[in] hasSubstitutionQVTag: substitutionQV and substitutionTag exist
void CompactAllocate(const DNALength length,
const bool hasInsertionDeletionQVTag,
const bool hasSubstitutionQVTag);
/// \}
/// Reconstruct this as a polymerase read of a zmw given subreads of the zmw.
/// Copy only QVs necessary for computing alignment score, ignore irrelavent QVs.
/// Length of this polymerase read is max {subread.SubreadEnd()}.
/// Pad sequences and tags of scraps regions with 'N' and QVs of scraps regions with 0s.
/// \param[in] subreads: subreads of a zmw
void MadeFromSubreadsAsPolymerase(const std::vector<SMRTSequence> & subreads);
void SetSubreadTitle(SMRTSequence &subread, DNALength subreadStart,
DNALength subreadEnd);
void SetSubreadBoundaries(SMRTSequence &subread, DNALength subreadStart,
DNALength subreadEnd);
void MakeSubreadAsMasked(SMRTSequence &subread, DNALength subreadStart = 0,
int subreadEnd = -1);
void MakeSubreadAsReference(SMRTSequence &subread, DNALength subreadStart = 0,
int subreadEnd = -1);
void Copy(const SMRTSequence &rhs);
void Copy(const SMRTSequence &rhs, DNALength rhsPos, DNALength rhsLength);
void Print(std::ostream &out) const;
SMRTSequence& operator=(const SMRTSequence &rhs);
void Free();
#ifdef USE_PBBAM
public:
/// \returns if record is a valid bam record.
/// FIXME: remove this function when pbbam.BamRecord.IsValid is available.
static bool IsValid(const PacBio::BAM::BamRecord & record);
// Copy read sequence, title, holeNumber, readGroupId, and QVs
// (iq, dq, sq, mq, st, dt) from BamRecord to this SMRTSequence.
// If copyAllQVs is false, also copy all QVs.
void Copy(const PacBio::BAM::BamRecord & record,
bool copyAllQVs = false);
// Keep track of BamRecord from which this SMRTSequence is
// originally copied. However, one should NOT assume
// that this SMRTSequence has the same sequence, title, QVs as
// the BamRecord, because this SMRTSequence may be created by
// MakeSubreadAsMasked(...) or MakeRC(...).
PacBio::BAM::BamRecord bamRecord;
#endif
};
inline SMRTSequence::~SMRTSequence(){
SMRTSequence::Free();
}
#endif // _BLASR_SMRT_SEQUENCE_HPP_
|