/usr/include/kmer/kmer/existDB.H is in libkmer-dev 0~20150903+r2013-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | #ifndef EXISTDB_H
#define EXISTDB_H
// Used by wgs-assembler, to determine if a rather serious bug was patched.
#define EXISTDB_H_VERSION 1960
#include "bio++.H"
// Takes as input a list of mers (in a file) and builds a searchable
// structure listing those mers. Duplicate mers are not removed and
// will be stored multiple times.
//
// Using a compressed hash is allowed, but somewhat useless -- it is
// really slow and doesn't save that much.
//
// If existDBcanonical is requested, this will store only the
// canonical mer. It is up to the client to be sure that is
// appropriate! See positionDB.H for more.
//#define STATS
typedef uint32 existDBflags;
const existDBflags existDBnoFlags = 0x0000;
const existDBflags existDBcompressHash = 0x0001;
const existDBflags existDBcompressBuckets = 0x0002;
const existDBflags existDBcompressCounts = 0x0004;
const existDBflags existDBcanonical = 0x0008;
const existDBflags existDBforward = 0x0010;
const existDBflags existDBcounts = 0x0020;
class existDB {
public:
// Read state from an existDB file
existDB(char const *filename,
bool loadData=true);
// Load mers from an existing existDB file, a fastafile, or a meryl database
existDB(char const *filename,
uint32 merSize,
existDBflags flags,
uint32 lo,
uint32 hi);
// Load mers from a character string
existDB(char const *sequence,
uint32 merSize,
existDBflags flags);
~existDB();
void saveState(char const *filename);
void printState(FILE *stream);
bool isForward(void) { return(_isForward); };
bool isCanonical(void) { return(_isCanonical); };
bool exists(uint64 mer);
uint64 count(uint64 mer);
private:
bool loadState(char const *filename, bool beNoisy=false, bool loadData=true);
bool createFromFastA(char const *filename,
uint32 merSize,
uint32 flags);
bool createFromMeryl(char const *filename,
uint32 merSize,
uint32 lo,
uint32 hi,
uint32 flags);
bool createFromSequence(char const *sequence,
uint32 merSize,
uint32 flags);
uint64 HASH(uint64 k) {
return(((k >> _shift1) ^ (k >> _shift2) ^ k) & _mask1);
};
uint64 CHECK(uint64 k) {
return(k & _mask2);
};
void insertMer(uint64 hsh, uint64 chk, uint64 cnt, uint64 *countingTable) {
// If the mer is already here, just update the count. This only
// works if not _compressedBucket, and only makes sense for loading from
// fasta or sequence.
if ((_compressedBucket == false) &&
(_searchForDupe)) {
uint64 st = _hashTable[hsh];
uint64 ed = countingTable[hsh];
for (; st<ed; st++) {
if (_buckets[st] == chk) {
if (_counts)
_counts[st] += cnt;
return;
}
}
}
if (_compressedBucket)
setDecodedValue(_buckets, countingTable[hsh] * _chkWidth, _chkWidth, chk);
else
_buckets[countingTable[hsh]] = chk;
if (_counts) {
if (_compressedCounts) {
setDecodedValue(_counts, countingTable[hsh] * _cntWidth, _cntWidth, cnt);
} else {
_counts[countingTable[hsh]] = cnt;
}
}
countingTable[hsh]++;
};
bool _compressedHash;
bool _compressedBucket;
bool _compressedCounts;
bool _isForward;
bool _isCanonical;
bool _searchForDupe;
uint32 _merSizeInBases;
uint32 _shift1;
uint32 _shift2;
uint64 _mask1;
uint64 _mask2;
uint32 _hshWidth; // Only for the compressed hash
uint32 _chkWidth; // Only for the compressed bucket
uint32 _cntWidth; // Only for the compressed counts
uint64 _hashTableWords;
uint64 _bucketsWords;
uint64 _countsWords;
uint64 *_hashTable;
uint64 *_buckets;
uint64 *_counts;
void clear(void) {
};
};
#endif // EXISTDB_H
|