This file is indexed.

/usr/include/blasr/datastructures/alignment/Alignment.hpp is in libblasr-dev 0~20151014+gitbe5d1bf-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#ifndef _BLASR_ALIGNMENT_HPP_
#define _BLASR_ALIGNMENT_HPP_

#include "Path.h"
#include <vector>
#include <string>
#include "DNASequence.hpp"
#include "datastructures/alignment/AlignmentStats.hpp"

namespace blasr {
class Block {
public:
    //
    // An alignment is a collection of blocks. The qPos and tPos in a block
    // is relative to the beginning of the alignment rather than the
    // target or query.
    //
    DNALength qPos, tPos, length;
    friend std::ostream &operator<<(std::ostream &out, const Block &b);

    Block& Assign(Block &rhs);

    DNALength QEnd(); 

    DNALength TEnd();

    void Clear(); 
};

class Gap {
public:
    enum GapSeq {Query, Target};
    GapSeq seq;
    int length;
    Gap();
    Gap(GapSeq seqP, int lengthP); 
};

typedef std::vector<Gap> GapList;

class Alignment : public AlignmentStats {
public:
    // the FASTA titles of each sequence
    std::string qName, tName;

    // Strands represented in the alignment, 0=forward, 1=reverse
    int qStrand, tStrand;

    // The starting pos in the text and query of the start of the 
    // alignment, in the window that is matched.
    DNALength qPos, tPos;
    DNALength  qAlignLength;
    DNALength tAlignLength;
    DNALength qLength;
    DNALength tLength;

    double probability;
    float zScore;
    float probScore;
    int   sumQVScore; 
    int   nCells;
    int   nSampledPaths;
    std::vector<Block> blocks;
    std::vector<GapList> gaps;

    Alignment();

    void CopyStats(Alignment &rhs);

    // 
    // The position in the query is qPos + block[i].qPos
    // and the position in the text is tPos + block[i].tPos
    //
    void Clear();

    Alignment& operator=(const Alignment &rhs);

    unsigned int size(); 

    void Assign(Alignment &rhs);

    int ComputeNumAnchors(int minAnchorSize, int &nAnchors, int &nAnchorBases);

    void AllocateBlocks(int nBlocks); 

    void AppendAlignmentGaps(Alignment &next, bool mergeFirst=false); 

    void AppendAlignmentBlocks(Alignment &next, int qOffset = 0, int tOffset = 0); 

    void AppendAlignment(Alignment &next); 

    /*
       Transform the series of operations in an optimal dynamic
       programming path to a block representation of the alignment.

       Since it is possible to have an adjacent insertion and deletion,
       the gap blocks are tracked in addition to the match blocks.
       */

    void ArrowPathToAlignment(std::vector<Arrow> &optPath); 

    //
    // samtools / picard do not like the pattern
    // insertion/deletion/insertion (or the opposite).  To get around
    // this, reorder the idi patterns to iid (or did to idd).  This
    // produces the same scoring alignment, however it is reordered so
    // that Picard / samtools accepts the alignments.
    //
    void OrderGapsByType(); 

    //
    // Transform an alignment that has up to one long gap in it to a
    // block based alignment.

    void LongGapArrowPathToAlignment(std::vector<Arrow> &optPath, DNALength lengthOfLongGap); 

    //
    // The length of the aligned sequence in the query.
    //
    DNALength QEnd(); 

    //
    // The lenght of the aligned sequence in the target.
    //
    DNALength TEnd(); 

    DNALength GenomicTBegin(); 

    DNALength GenomicTEnd(); 

    //
    // Some programs do not accept alignments that have gaps at their
    // ends.  This is used to trim gaps at the ends of alignments (even
    // if the structure represents an acceptable alignment).
    //

    void RemoveEndGaps();

};

//
// This data structure holds two things: alignments, of course, and in addition
// the coordinates of sequences that are successively refined in order to produce 
// the alignment.  This is somewhat tricky when the target genome has been
// transformed by some noise-reducing function phi(t). 
//
// Before aligning a read, it is first mapped to the genome, or transformed 
// then mapped to the transformed genome.  Because the mapping is inexact, the
// region a read is mapped to is typically much larger than the read.  
// The coordinates of the mapped region are stored in tStart and tEnd
// Alignments are performed in native nucleotide space (not transformed).

// For now, the query is always qStart=0, qEnd = queryLength.


//
// When mapping a read to a set of concatenated chromosomes, each chromosome
// has an offset into the file.  Therefore though a sequence may be aligned 
// to a region starting at tStart, the relative offset into the chromosome
// is tStart - tChromOffset.  This is used when printing the coordinates of a match.
//
class MatchedAlignment : public Alignment {
public: 
    int refIndex;
    int readIndex;
    DNALength tStart, tEnd, qStart, qEnd;
    int tChromOffset;

    MatchedAlignment &Assign(MatchedAlignment &rhs); 
};


/*
 *  Create a structure for storing the information output by compare sequences.
 *  Namely, the two string representations of the alignment.
 */
class CompSeqAlignment : public Alignment {
    public:
        std::string tString, qString, alignString;
};

} // namespace blasr

#endif // _BLASR_ALIGNMENT_HPP_