This file is indexed.

/usr/include/pbseq/alignment/algorithms/anchoring/LISPValueImpl.hpp is in libblasr-dev 0~20161219-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#ifndef _BLASR_LISP_VALUE_IMPL_HPP_
#define _BLASR_LISP_VALUE_IMPL_HPP_

template<typename T_MatchPos>
void StoreNonOverlappingIndices(std::vector<T_MatchPos> &lis, 
    std::vector<T_MatchPos> &noOvpLis) {
	unsigned int i;

	//
	// Greedily add lis matches according to weight.  A match may be added
	// as long as it does not overlap with any other matches.
	//
 
	// do nothing on empty lists
	if (lis.empty()) return;

	//
	// First build a list of matches sorted by weight.
	SortMatchPosListByWeight(lis);

	//
	// The first match is guaranteed to not overlap.
	noOvpLis.push_back(lis[0]);

	//
	// Nothing is overlapping, and everything is sorted when there is 
	// just one value.
	if (lis.size() == 1) return;

	//
	// Next, add matches as long as they do not overlap.
	for (i = 1; i < lis.size(); i++ ){
		VectorIndex j;
		int lts = lis[i].t;
		int lte = lis[i].t + lis[i].GetLength();
		int lqs = lis[i].q;
		int lqe = lis[i].q + lis[i].GetLength();

		int ovpFound = 0;
		for (j =0; j < noOvpLis.size(); j++ ){
			int tIntvStart = noOvpLis[j].t;
			int tIntvEnd   = noOvpLis[j].t + noOvpLis[j].GetLength();
			int qIntvStart = noOvpLis[j].q;
			int qIntvEnd   = noOvpLis[j].q + noOvpLis[j].GetLength();
			if ((lts >= tIntvStart and lts < tIntvEnd) or 
					(lte >  tIntvStart and lte <= tIntvEnd) or
					(lqs >= qIntvStart and lqs < qIntvEnd) or
					(lqe >  qIntvStart and lqe <= qIntvEnd)) {
				ovpFound = 1;
				break;
			}
		}
		if (!ovpFound) {
			noOvpLis.push_back(lis[i]);
		}
	}
	
	//
	// Now, the matches are found in order of size, but they need to
	// be stored in order of text.
	//
	SortMatchPosList(noOvpLis);

	//
	// The match pos list was sorted in order of weight. 
	// Just in case it causes problems down the line, re-sort it 
	// according to query pos.
	//
	lis = noOvpLis;
	SortMatchPosList(lis);
}


template<typename T_TextSequence, 
         typename T_Sequence, 
         typename T_MatchPos, 
         typename T_Tuple>
float ComputeLISPValue(std::vector<T_MatchPos> &lis, 
    T_TextSequence &text, T_Sequence &read,
	TupleMetrics &tm, TupleCountTable<T_TextSequence, T_Tuple> &ct,
    int &lisNBases, int &lisSize ) {
    (void)(read);
	//
	// First, find a subset of the lis that has non-overlapping matches.
	//
    size_t i;
    lisNBases = 0;
    for (i = 0; i < lis.size(); i++) {
        lisNBases += lis[i].l;
    }
    lisSize = lis.size();

    if (lis.size() == 1) {
        //
        // When just one LIS is found, the pvalue of the match is just the
        // pvalue of getting a single hit with one read, which we estimate
        // as the pvalue of at least one hit.
        // 
        float matchProb;
        //
        // Weight the single match based on how frequently it appears in
        // the genome.
        if (POneOrMoreMatches<T_TextSequence>(text, lis[0].t, 
                lis[0].GetLength(), tm, ct, matchProb)) {		
            return matchProb;
        }
        else {
            // return non-significant match value.
            return 1;
        }
    }
	else {
		//
		// There is more than one non overlapping match.  This evokes a 
        // totally different probability metric on LIS values.  Rather 
        // than computing the probability of a single match, compute 
        // the probability of seeing several matches in a row.
		//
		VectorIndex i;
		float pChain = 0; // In other words, pChain = log(1)
		// 
		// prob of chain starts out as prob of first match.
		//
		if (POneOrMoreMatches(text, lis[0].t, lis[0].GetLength(),
                tm, ct, pChain)) {
			//
			// Next, for each match, compute the expected probability of
            // having to wait at most tGap time for a match.  Since this
            // has screened for anchors that are not overlapping, 
            // consider the events to be independent, and therefore
			// the probabilities multiply.
			//
			for (i = 1; i < lis.size(); i++ ){ 
				//
				// Find out how many times this word appears in the genome.
				//
				// Assume all hits are uniformly distributed across the
				// genome.  qLambda is the frequency of seeing hit i in the
				// genome.
				//
				float qLambda;
				qLambda = lis[i].GetMultiplicity() / (1.0*text.length);
			
				// Now compute the probability of the chain.  Since the
				// matches are uniformly distributed across the genome, the
				// waiting time between matches is exponentially distributed.
				pChain = pChain + log(qLambda);
            }
            return pChain;
        }
        else {
            return 1;
        }
    }
}

#endif