This file is indexed.

/usr/include/pbseq/alignment/bwt/PackedHash.hpp is in libblasr-dev 0~20161219-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
#ifndef _BLASR_PACKED_HASH_HPP_
#define _BLASR_PACKED_HASH_HPP_

#include <vector>
#include <map>
#include <iostream>
#include <fstream>
#include <cstring>
#include "../../pbdata/Types.h"
#include "../../pbdata/utils.hpp"
#include "../../pbdata/utils/BitUtils.hpp"
#include "../../pbdata/DNASequence.hpp"

class PackedHash {
public:

    DNALength tableLength;
    uint32_t *table;
    uint64_t *values;
    std::vector<int> hashLengths;
    static const uint32_t BinNumBits = 5;
    static const uint32_t BinSize = 1 <<(BinNumBits);
    PackedHash() {
        table = NULL;
        values = NULL;
        tableLength = 0;
    }

    ~PackedHash() {
        Free();
    }

    void Free() {
        // In general convertions between int and pointer is not desired, 
        // Consequences depending on the implementation, as the resulting 
        // pointers may incorrectly aligned. See C standard subclause 6.3.2.3.
        if (tableLength <= 0) {
            table = NULL;
            values = NULL;
            tableLength = 0;
            hashLengths.clear();
            return;
        }

        for(DNALength i = 0; i < tableLength; i++) {
            int nSetBits = CountBits(table[i]);
            if (nSetBits >= 3) {
                //values[i] is a pointer to a list of uint32 integers
                volatile uintptr_t iptr = values[i];
                uint32_t * ptr = (uint32_t *)iptr;
                if (ptr != NULL) {
                    delete [] ptr;
                } //otherwise, values[i] is an uint_64.
            }
        }
        if (values) {delete [] values;}
        if (table) {delete [] table;}
        table = NULL;
        values = NULL;
        tableLength = 0;
        hashLengths.clear();
    }

    /*
     * Create a mask that retains the lower 5 bits (0 .. 31) of a
     * position so that pos % 32 may be computed by a shift.
     */
    static const uint32_t BinModMask = 0x1FU; 

    void Allocate(uint32_t sequenceLength) {
        Free();

        tableLength = CeilOfFraction(sequenceLength, (DNALength) BinSize);
        table  = ProtectedNew<uint32_t>(tableLength);
        values = ProtectedNew<uint64_t>(tableLength);
        std::fill(&table[0], &table[tableLength], 0);
        std::fill(&values[0], &values[tableLength], 0);
        hashLengths.resize(tableLength);
        std::fill(hashLengths.begin(), hashLengths.end(), 0);
    }

    void PrintBinSummary() {
        //
        // Report some stats on the hash.
        //
        DNALength p;
        std::map<int,int> countMap;
        int card;
        for (p = 0; p < tableLength; p++ ){ 
            card = CountBits(table[p]);
            countMap[card]++;
        }
        std::map<int,int>::iterator mapit;
        for (mapit = countMap.begin(); mapit != countMap.end(); ++mapit) {
            std::cout << mapit->first << " " << mapit->second << std::endl;
        }
    }

    uint32_t LookupBinAtPos(DNALength pos) {
        /* 
         * Each bucket contains 32 positions.  Membership is simply when
         * the bit at pos & BinModMask is set.  There should never be
         * collisions of multiple positions (from different areas in the
         * genome) mapping to the same position in a bin.
         */
        return table[pos/BinSize] & (1 << (pos & BinModMask));
    }

    void ValueToList(uint64_t &storage, DNALength newValue, int newValuePos) {

        /*
         * This is called when one is attempting to add a spot to storage,
         * but there are already two values stored in it, so storage must
         * be converted to a pointer, and the values added to a list on
         * the heap.  The size of the list is necessarily 3 at this point,
         * because there are two values in storage that must be moved to
         * the list, and the new value as well. 
         * The values are copied to the list in sorted order, and sorting
         * is handled case-by-case since there are only 3 cases.
         */

        DNALength v0, v1;
        v0 = ((DNALength)storage);
        v1 = ((DNALength)(storage >> 32));
        DNALength *storagePtr = ProtectedNew<DNALength>(3);
        storage = (uint64_t) storagePtr;

        //
        // Only a couple of options, so handle them directly here
        //
        if (newValuePos == 0)	{
            storagePtr[0] = newValue; 
            storagePtr[1] = v0; storagePtr[2] = v1;
        }
        else if (newValuePos == 1) {
            storagePtr[0] = v0; storagePtr[1] = newValue; storagePtr[2] = v1;
        }
        else if (newValuePos == 2) {
            storagePtr[0] = v0; storagePtr[1] = v1; storagePtr[2] = newValue;
        }
        else {
            assert("ERROR! Somehow expected to only add 3 elements to an array of length 3, but the position of the new value is greater than the length of the array" && 0);
        }
    }

    void InsertValueInList(uint64_t &storage, int curStorageLength, DNALength newValue, int newValuePos) {
        /*
         * This simply creates a new list with size 1 larger than before,
         * and inserts the new value into its position that maintains
         * sorted order in the list.
         */
        DNALength *newListPtr = ProtectedNew<DNALength>(curStorageLength + 1);
        //
        // Copy the values from the old list making space for the new
        // value.
        // 
        if (newValuePos > 0) {
            memcpy(newListPtr, ((DNALength*)storage), newValuePos * sizeof(DNALength));
        }
        if (newValuePos < curStorageLength) {
            memcpy(&newListPtr[newValuePos+1], &((DNALength*)storage)[newValuePos], (curStorageLength - newValuePos)*sizeof(uint32_t));
        }
        assert(curStorageLength < 32);
        newListPtr[newValuePos] = newValue;
        if (storage){delete[] ((DNALength*)storage);}
        storage = (uint64_t)newListPtr;
    }

    void DirectlyStoreValue(uint64_t &storage, int curStorageLength, DNALength value, int valuePos) {
        /*
         * In this instance, the value may be copied to either the first
         * half or the second half of 'storage', without having to turn
         * storage into a list.  The values must be stored in sorted
         * order.  If 'storage' is empty, the correct place is at the
         * beginning of storage.  If there already is a value at the
         * beginning, it may be necessary to shift the existing value over
         * to keep everything in sorted order. 
         */

        if (curStorageLength == 0) {
            //
            // Nothing here, just store the value.
            //
            storage = value;
        }
        else if (valuePos == 0) {
            //
            // Place the value at the beginning of the storage.
            //
            storage = storage << 32;
            storage = storage + value;
        }
        else {
            // 
            // Place the value at the end of storage.
            //
            uint64_t longValue = value;
            longValue = longValue << 32;
            storage += longValue;
        }
    }

    int AddValue(DNALength pos, DNALength value) {
        //
        // The bucket is either a values[pos] that can store up to two
        // values, or it is a pointer to a list of values.  The values are
        // always in order of their corresponding position in the BWT
        // string. 
        // To add a value to this bucket, first check to see if
        // values[pos] has enough room to simply put it there, otherwise,
        // either a list already exists and the value must be inserted, or
        // the values[pos] must be converted to a list, and then the new
        // value inserted.

        //
        // First, operate on the assumption that no value gets added
        // twice.
        //
        UInt bin = pos / BinSize;
        UInt bit = pos & BinModMask;
        assert((table[bin] & ( 1 << bit)) == 0);

        //
        // Now, add the pos and determine where it is in the array.
        //
        table[bin] = table[bin] + ( 1 << bit);

        //
        // Mask off everything above this bit.
        //
        UInt mask = (UInt)-1;
        mask >>= (31 - bit);
        UInt lowerBits = table[bin] & mask;
        int  rank = CountBits(lowerBits) - 1;
        int  card = CountBits(table[bin]);

        if (card < 3) {
            DirectlyStoreValue(values[bin], card-1, value, rank);
        }
        else if (card == 3) {
            ValueToList(values[bin], value, rank);
        }
        else {
            InsertValueInList(values[bin], card-1, value, rank);
        }
        return card;
    }

    int LookupValue(DNALength pos, DNALength &value) {

        /* 
         * Check to see if there is a value stored for 'pos'.  If so,
         * store it in value, and return 1 for success.
         */
        UInt binIndex = pos/BinSize;
        UInt bin      = table[binIndex];
        UInt setBit = bin & (1 << (pos & BinModMask));
        if (setBit == 0) {
            return 0;
        }

        // 
        // GetSetBitPosition64 returns the position relative to the most
        // significant bit in a 64-bit word, starting at MSB = 1.  This
        // should never be less than 32, since it's counting bits in a 32
        // bit word.  
        //

        int  bitPos     = GetSetBitPosition32(setBit);
        UInt bitPosMask = ((UInt)-1) >> (32 - bitPos-1);;

        //
        // Determine how to interpret the value of this bucket.  It is
        // either a pointer or two words.  If there are more than two bits
        // set in this bucket, it is a pointer.  Otherwise, pick the half
        // of the 64 bit word based on the index in the bucket.
        //
        int nSet         = CountBits(bin);
        int bitRank      = CountBits(bin & bitPosMask) - 1;
        assert(nSet > 0);
        if (nSet <= 2) {
            // return lower 32 bits
            if (bitRank == 0) {
                value = ((uint32_t)values[binIndex]);
                return 1;
            }
            else {
                value = ((uint32_t)(values[binIndex]>>32));
                return 1;
            }
        }
        else {
            /*
             * In this instance, values is a pointer rather than a pair of values.
             */
            value = (uint32_t) ((DNALength*)values[binIndex])[bitRank];
            return 1;
        }
        //
        // This is reached if nothing is stored in value. For now, this
        // shouldn't be reached, so make sure the program bails here.
        //
        assert(0);
        return 0;
    }	

    /*
     * Define binary I/O routines for the packed hash. The sizes of the
     * tables are constant, but then the values table has some lists.
     * Those are written past the end of the tables treating the last
     * half of the file as a heap structure.  They are simlarly read in.
     *
     */

    void Write(std::ostream &out) {
        out.write((char*) &tableLength,sizeof(tableLength));
        if (tableLength > 0) {
            out.write((char*) table, tableLength * sizeof(table[0]));
            out.write((char*) values, tableLength * sizeof(values[0]));
        }
        DNALength tablePos;
        for (tablePos = 0; tablePos < tableLength; tablePos++) {
            int nSetBits = CountBits(table[tablePos]);
            if( nSetBits > 2) {
                out.write((char*)values[tablePos], sizeof(uint32_t)*nSetBits);
            }
        }
    }

    void Read(std::istream &in) {
        Free();
        in.read((char*)&tableLength, sizeof(tableLength));
        if (tableLength > 0) {
            table  = ProtectedNew<uint32_t>(tableLength);
            values = ProtectedNew<uint64_t>(tableLength);
            in.read((char*)table, sizeof(uint32_t)*tableLength);
            in.read((char*)values, sizeof(uint64_t)*tableLength);
            DNALength tablePos;
            for (tablePos = 0; tablePos < tableLength; tablePos++) {
                int nSetBits = CountBits(table[tablePos]);
                if (nSetBits > 2) {
                    values[tablePos] = (uint64_t)(ProtectedNew<uint32_t>(nSetBits));
                    in.read((char*)values[tablePos], nSetBits * sizeof(uint32_t));
                }
            }
        }
    }
};

#endif // _BLASR_PACKED_HASH_HPP_