/usr/include/kmer/bio/kmer.H is in libkmer-dev 0~20150903+r2013-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | // Copyright (c) 2005 J. Craig Venter Institute
// Author: Brian Walenz
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received (LICENSE.txt) a copy of the GNU General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// A 'simple' kMer datastructure.
#ifndef BIO_KMER_H
#define BIO_KMER_H
// The maximum size of a mer. You get 32 bases per word, so
// KMER_WORDS=4 will get you up to a 128-mer.
//
#define KMER_WORDS 1
#include "util.h"
#include "util++.H"
#include "bio.h"
#include "bio++.H"
#include "kmeriface.H"
#if KMER_WORDS == 1
#include "kmertiny.H"
typedef kMerTiny kMer;
#else
#include "kmerhuge.H"
typedef kMerHuge kMer;
#endif
#undef DEBUGADDBASE
#undef DEBUGCOMP
#undef DEBUGSPACE
class kMerBuilder {
public:
kMerBuilder(uint32 ms=0, uint32 cm=0, char *tm=0L);
~kMerBuilder();
// Clear all mer data, reset state to as just after construction.
void clear(bool clearMer=true);
// Returns true if we need another base to finish the mer. This
// only occurs for compressed mers, if we are in a homopolymer run.
//
private:
bool addBaseContiguous(uint64 cf, uint64 cr);
bool addBaseCompressed(uint64 cf, uint64 cr);
bool addBaseSpaced(uint64 cf, uint64 cr);
bool addBaseCompressedSpaced(uint64 cf, uint64 cr);
public:
bool addBase(char ch) {
uint64 cf = letterToBits[ch];
uint64 cr = letterToBits[complementSymbol[ch]];
#ifdef DEBUGADDBASE
fprintf(stderr, "addBase() %c\n", ch);
#endif
if (_style == 0)
return(addBaseContiguous(cf, cr));
if (_style == 1)
return(addBaseCompressed(cf, cr));
if (_style == 2)
return(addBaseSpaced(cf, cr));
if (_style == 3)
return(addBaseCompressedSpaced(cf, cr));
fprintf(stderr, "kMerBuilder::addBase()-- Invalid mer type %d.\n", _style);
exit(1);
return(false);
}
void mask(void) {
_fMer->mask(true);
_rMer->mask(false);
};
kMer const &theFMer(void) { return(*_fMer); };
kMer const &theRMer(void) { return(*_rMer); };
kMer const &theCMer(void) { return((theFMer() < theRMer()) ? theFMer() : theRMer()); };
uint32 merSize(void) { return(_merSize); };
uint32 templateSpan(void) { return(_templateSpan); };
uint32 baseSpan(uint32 b) {
return(_compressionLength[(_compressionIndex + 1 + b) % _merSize]);;
};
private:
// Style of builder we are
uint32 _style;
// Amount of the mer that has valid sequence. Sigh. I really needed a signed value here --
// where negative values mean that we first have to get to the end of the template that was
// invalid, then we need to build a new mer.
//
// And, yes, just simply making it signed leads to all sortes of compiler warnings about
// comparing signed and unsigned. And I've been here before, and those warnings just propate
// endlessly. Just go away, Mr. Smartypants.
//
// Details: when building spaced seeds, if we hit an N in the middle of the template, we need to
// invalidate the mer, but not start building a new mer until we exhaust the current template.
// The example is template=1101. Suppose we hit an N at the second 1. We set the merSizeValid
// to 0, and proceed. When we push on the base for the last 1 in the template, we'd increment
// the merSizeValid. The first two 1's in the template would now create a mer big enough to be
// valid, and we'd return it -- but now the template we're using is 0111.
//
// _merSizeValid is offset by _merSize (e.g., the true valid size is _merSizeValid - _merSize).
// _merSizeValidIs is the size _merSizeValid needs to be in order for it to be valid.
// Similarily, _merSizeValidZero is the value of zero (currently this is equal to _merSize).
//
uint32 _merSize; // Desired number of bases in the mer
uint32 *_merSizeValid; // Actual number of bases in the mer
uint32 _merSizeValidZero; // Definition of 'zero' bases in the mer
uint32 _merSizeValidIs; // Definition of 'full' bases in the mer
// An array of mers, we allocate all mers in one block
kMer *_merStorage;
// Pointer to the currently active mer
kMer *_fMer;
kMer *_rMer;
// For compression
uint32 _compression;
uint32 _compressionIndex; // index into cL[] that is the last base in the mer
uint32 _compressionFirstIndex; // index into cL[] that is the first base in a run
uint32 *_compressionLength; // one per base
uint32 _compressionCurrentLength;
// For templates
uint32 _templateSpan; // # of 0's and 1's in the template
uint32 _templateLength; // length of the pattern in the template
char *_template; // character string template
uint32 _templatePos; // position we are building in the template
uint32 _templateMer; // the mer we should output next
uint32 _templateFirst; // if true, we're still building the initial mer
};
#endif // BIO_KMER_H
|