/usr/include/blasr/datastructures/alignment/Alignment.hpp is in libblasr-dev 0~20151014+gitbe5d1bf-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | #ifndef _BLASR_ALIGNMENT_HPP_
#define _BLASR_ALIGNMENT_HPP_
#include "Path.h"
#include <vector>
#include <string>
#include "DNASequence.hpp"
#include "datastructures/alignment/AlignmentStats.hpp"
namespace blasr {
class Block {
public:
//
// An alignment is a collection of blocks. The qPos and tPos in a block
// is relative to the beginning of the alignment rather than the
// target or query.
//
DNALength qPos, tPos, length;
friend std::ostream &operator<<(std::ostream &out, const Block &b);
Block& Assign(Block &rhs);
DNALength QEnd();
DNALength TEnd();
void Clear();
};
class Gap {
public:
enum GapSeq {Query, Target};
GapSeq seq;
int length;
Gap();
Gap(GapSeq seqP, int lengthP);
};
typedef std::vector<Gap> GapList;
class Alignment : public AlignmentStats {
public:
// the FASTA titles of each sequence
std::string qName, tName;
// Strands represented in the alignment, 0=forward, 1=reverse
int qStrand, tStrand;
// The starting pos in the text and query of the start of the
// alignment, in the window that is matched.
DNALength qPos, tPos;
DNALength qAlignLength;
DNALength tAlignLength;
DNALength qLength;
DNALength tLength;
double probability;
float zScore;
float probScore;
int sumQVScore;
int nCells;
int nSampledPaths;
std::vector<Block> blocks;
std::vector<GapList> gaps;
Alignment();
void CopyStats(Alignment &rhs);
//
// The position in the query is qPos + block[i].qPos
// and the position in the text is tPos + block[i].tPos
//
void Clear();
Alignment& operator=(const Alignment &rhs);
unsigned int size();
void Assign(Alignment &rhs);
int ComputeNumAnchors(int minAnchorSize, int &nAnchors, int &nAnchorBases);
void AllocateBlocks(int nBlocks);
void AppendAlignmentGaps(Alignment &next, bool mergeFirst=false);
void AppendAlignmentBlocks(Alignment &next, int qOffset = 0, int tOffset = 0);
void AppendAlignment(Alignment &next);
/*
Transform the series of operations in an optimal dynamic
programming path to a block representation of the alignment.
Since it is possible to have an adjacent insertion and deletion,
the gap blocks are tracked in addition to the match blocks.
*/
void ArrowPathToAlignment(std::vector<Arrow> &optPath);
//
// samtools / picard do not like the pattern
// insertion/deletion/insertion (or the opposite). To get around
// this, reorder the idi patterns to iid (or did to idd). This
// produces the same scoring alignment, however it is reordered so
// that Picard / samtools accepts the alignments.
//
void OrderGapsByType();
//
// Transform an alignment that has up to one long gap in it to a
// block based alignment.
void LongGapArrowPathToAlignment(std::vector<Arrow> &optPath, DNALength lengthOfLongGap);
//
// The length of the aligned sequence in the query.
//
DNALength QEnd();
//
// The lenght of the aligned sequence in the target.
//
DNALength TEnd();
DNALength GenomicTBegin();
DNALength GenomicTEnd();
//
// Some programs do not accept alignments that have gaps at their
// ends. This is used to trim gaps at the ends of alignments (even
// if the structure represents an acceptable alignment).
//
void RemoveEndGaps();
};
//
// This data structure holds two things: alignments, of course, and in addition
// the coordinates of sequences that are successively refined in order to produce
// the alignment. This is somewhat tricky when the target genome has been
// transformed by some noise-reducing function phi(t).
//
// Before aligning a read, it is first mapped to the genome, or transformed
// then mapped to the transformed genome. Because the mapping is inexact, the
// region a read is mapped to is typically much larger than the read.
// The coordinates of the mapped region are stored in tStart and tEnd
// Alignments are performed in native nucleotide space (not transformed).
// For now, the query is always qStart=0, qEnd = queryLength.
//
// When mapping a read to a set of concatenated chromosomes, each chromosome
// has an offset into the file. Therefore though a sequence may be aligned
// to a region starting at tStart, the relative offset into the chromosome
// is tStart - tChromOffset. This is used when printing the coordinates of a match.
//
class MatchedAlignment : public Alignment {
public:
int refIndex;
int readIndex;
DNALength tStart, tEnd, qStart, qEnd;
int tChromOffset;
MatchedAlignment &Assign(MatchedAlignment &rhs);
};
/*
* Create a structure for storing the information output by compare sequences.
* Namely, the two string representations of the alignment.
*/
class CompSeqAlignment : public Alignment {
public:
std::string tString, qString, alignString;
};
} // namespace blasr
#endif // _BLASR_ALIGNMENT_HPP_
|