This file is indexed.

/usr/include/libMems-1.6/libMems/SortedMerList.h is in libmems-1.6-dev 1.6.0+4725-4build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
/*******************************************************************************
 * $Id: SortedMerList.h,v 1.13 2004/02/27 23:08:55 darling Exp $
 * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
 * This file is licensed under the GPL.
 * Please see the file called COPYING for licensing details.
 * **************
 ******************************************************************************/

#ifndef _SortedMerList_h_
#define _SortedMerList_h_

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "libGenome/gnDefs.h"
#include "libGenome/gnClone.h"
#include "libGenome/gnDebug.h"
#include "libGenome/gnSequence.h"
#include "libGenome/gnException.h"
#include "stdlib.h"
#include <string>
#include <vector>
#include "libMems/SeedMasks.h"

namespace mems {

#define DNA_ALPHA_BITS 2	/**< number of bits to represent each nucleotide of DNA */
#define DNA_MER_SIZE 31		/**< largest possible number of characters in each dna mer ALWAYS ODD */

#define PROTEIN_ALPHA_BITS 5	/**< number of bits to represent each amino acid */
#define PROTEIN_MER_SIZE 12		/**< default number of characters in each protein mer */

#define DESCRIPTION_SIZE 2048	/**< Number of bytes for the freeform text description of an SML */

#define NO_UNIQUE_COUNT UINT32_MAX

typedef int16 sarID_t;

typedef uint32 smlSeqI_t;

//4 + 8 = 16 (blame C alignment rules.)
struct bmer{
	smlSeqI_t position;	/**< starting position of this mer in the sequence */
	uint64 mer; 		/**< the actual binary encoded mer */
};

struct SMLHeader{
	uint32 version;						/**< Format version - 4 bytes */
	uint32 alphabet_bits;				/**< Bits per character in the alphabet - 4 bytes */
//	uint32 mer_size;					/**< Size of mers used for sorting the list - 4 bytes */
	uint64 seed;						/**< The pattern used in each seed */
	uint32 seed_length;					/**< The length of the seed mask */
	uint32 seed_weight;					/**< The weight of the seed mask */
	uint64 length;						/**< length of the sequence before circularity - 8 bytes */
	uint32 unique_mers;					/**< Number of unique mers in the sequence 4 bytes */
	uint32 word_size;					/**< Word size on the machine the sequence was translated */
	boolean little_endian;				/**< Is the byte order little endian?  0==no, !0==yes */
	sarID_t id;							/**< Obsolete ID value - 1 byte, eaten by alignment? */
	boolean circular;					/**< Circularity of sequence - 1 byte */
	uint8 translation_table[UINT8_MAX];	/**< Translation table for ascii characters to binary values -- 256 bytes */
	char description[DESCRIPTION_SIZE]; /**< Freeform text description of sequence data -- 2048 bytes */
};


/**
 * A base class which defines an interface common to all sorted mer lists
 */
class SortedMerList : public genome::gnClone
{
public:
	SortedMerList();
	SortedMerList( const SortedMerList& sa );
	SortedMerList& operator=(const SortedMerList& sa);
	~SortedMerList();
	
	/**
	 * Set data structures to default values
	 */
	virtual void Clear();
	
	/**
	 * Creates a new sorted mer list.
	 * This function enumerates each possible mer of the specified size and 
	 * sorts them alphabetically in order to construct a sorted mer list.
	 * @param seq The sequence to create an SML for.
	 * @param mersize The size of the mers to sort on.
	 */
	virtual void Create(const genome::gnSequence& seq, const uint64 seed);
	/**
	 * Read a range of mers in the sorted mer list.
	 * This function reads a section of data from the sorted mer list starting at 'offset'
	 * and continuing for 'size' mers.  The mers are placed into readVector.  Anything
	 * already in readVector is cleared.  Returns false if there was a problem completing the
	 * read.  If the end of the list is reached, all mers which could be read will be placed
	 * into readVector and false will be returned
	 * @param readVector the vector to read bmers into.
	 * @param size The number of bmers to read.
	 * @param offset The mer index in the sorted mer list to start reading from. 
	 * @return false if a problem was encountered while reading.
	 */
	virtual boolean Read(std::vector<bmer>& readVector, gnSeqI size, gnSeqI offset) = 0;
	/**
	 * Merges two SortedMerLists.
	 */
	virtual void Merge(SortedMerList& sa, SortedMerList& sa2) = 0;
	
	/**
	 * Get the mer at the specified index in the sorted mer list.
	 * @param index The index of the mer to return.
	 * @return The specified mer.
	 */
	virtual bmer operator[](gnSeqI index) = 0;
	/**
	 * Get the mer at the specified index in the sorted mer list.
	 * @param position The index of the mer to return.
	 * @return The specified mer.
	 */
	virtual uint64 GetMer(gnSeqI position) const;
	/**
	 * Searches the SML for a subsequence which matches the query string.
	 * Returns true if one is found, false otherwise.
	 * If no matching mer is found, 'result' contains the index that the query
	 * sequence would be in if it existed in the SML.
	 */
	virtual boolean Find(const std::string& query_seq, gnSeqI& result);
	/**
	 * Searches the SML for a mer which matches the query mer.
	 * Returns true if one is found, false otherwise.
	 * If no matching mer is found, 'result' contains the index that the query
	 * mer would be in if it existed in the SML.
	 */
	virtual boolean FindMer(const uint64 query_mer, gnSeqI& result);
	/**
	 * Searches the SML for mers which match the query mer.
	 * Puts the indices of all matching mers into the 'result' vector
	 */
	virtual void FindAll(const std::string& query_seq, std::vector<gnSeqI> result);
	/**
	 * Returns the number of unique mers in the sequence
	 */
	virtual gnSeqI UniqueMerCount();
	
	/**
	 * Returns a freeform text description of the SML.
	 */
	virtual std::string Description() const;
	/**
	 * Sets the freeform text description of the SML.
	 */
	virtual void SetDescription(const std::string& d);
	/**
	 * Returns the length of the seed pattern that this SML was sorted on.
	 */
	virtual uint SeedLength() const;
	/**
	 * Returns the weight of the seed that this SML was sorted on.
	 */
	virtual uint SeedWeight() const;
	/**
	 * Returns the seed pattern that this SML was sorted on.
	 */
	virtual uint64 Seed() const;
	/**
	 * Returns the length of the mer mask.
	 * Some types of sorted mer list support a configurable mer mask size, allowing
	 * the same sorted mer list to behave as though it were sorted on a shorter mer size.
	 * DNA sorted mer lists do not support this feature.
	 */
	virtual uint32 GetMerMaskSize() const;
	/**
	 * Sets the length of the mer mask.
	 * Some types of sorted mer list support a configurable mer mask size, allowing
	 * the same sorted mer list to behave as though it were sorted on a shorter mer size.
	 * DNA sorted mer lists do not support this feature.
	 */
	virtual void SetMerMaskSize(uint32 mer_size);
	/**
	 * Returns the length of the sequence encoded in this sorted mer list.
	 */
	gnSeqI Length() const;
	/**
	 * Returns the length of the sorted mer list itself.  This value will be less
	 * than the sequence length if the sequence isn't circular
	 */
	gnSeqI SMLLength() const;
	/**
	 * Ignore this.
	 */
	virtual sarID_t GetID() const;
	/**
	 * Ignore this.
	 */
	virtual void SetID(const sarID_t d);
	/**
	 * Returns true if this SML is circular.  False otherwise.
	 */
	virtual boolean IsCircular() const;
	/**
	 * Returns a mask which can be bitwise AND'ed to a mer in order to
	 * get only the relevant bits of sequence data without direction bits.
	 */
	virtual uint64 GetMerMask() const;
	/**
	 * Returns a mask which can be bitwise AND'ed to a seed mer in order to
	 * get only the relevant bits of sequence data without direction bits.
	 */
	virtual uint64 GetSeedMask() const;
	/**
	 * Returns a copy of the header information for this SML.
	 */
	virtual SMLHeader GetHeader() const;
	/**
	 * Returns a translation table for DNA sequence which disambiguates each nucleotide.
	 */
	static const uint8* BasicDNATable();
	/**
	 * Returns a translation table for Protein sequence.
	 */
	static const uint8* ProteinTable();
	/** 
	 * Places a copy of the binary encoded sequence data into dest.
	 * @param len The length in sequence characters to copy
	 * @param offset The sequence offset to start copying from
	 * @throws IndexOutOfBounds if offset or len are invalid
	 */
	virtual void GetBSequence(uint32* dest, const gnSeqI len, const gnSeqI offset);
	
	/**
	 * Returns the reverse complement of a mer
	 */
	virtual uint64 RevCompMer( uint64 mer_a, int mer_length ) const;
	/**
	 * Applies the seed mask to the sequence at the given offset and returns the resulting
	 * seed.
	 */
	virtual uint64 GetSeedMer( gnSeqI offset ) const;
	/**
	 * Returns the lesser of the forward and reverse complement seeds at the given offset.
	 * Note: The seed pattern should be palindromic, otherwise the returned rev. complement
	 * match will be under a different pattern.
	 */
	virtual uint64 GetDnaSeedMer( gnSeqI offset ) const;

	/**
	 * Applies the seed mask to the sequence at the given offset and returns the resulting
	 * seed.
	 */
	virtual void FillDnaSeedSML(const genome::gnSequence& seq, std::vector<bmer>& sml_array);

protected:
	struct SMLHeader header; /**< stores general information about this sorted mer list */
	uint64 mer_mask;	/**< a mask for the used bits in a mer */
	uint64 seed_mask;	/**< a mask covering only the number of characters a seed covers */
	uint32 mask_size;   /**< the number of characters covered by the mask */
	uint32 *sequence;	/**< Stores the sequence data */
	gnSeqI binary_seq_len;	/**< Stores the length in 32 bit words of the sequence */

	/** Set the sequence data to the seq_len characters in seq_buf */
	virtual void SetSequence(gnSeqC* seq_buf, gnSeqI seq_len);
	/** Fill in the vector of bmers with the initial unsorted bmers for the sequence in seq_buf  */
	virtual void FillSML(gnSeqC* seq_buf, gnSeqI seq_len, boolean circular, std::vector<bmer>& sml_array);
	virtual void FillSML(const genome::gnSequence& seq, std::vector<bmer>& sml_array);
	virtual void FillDnaSML(const genome::gnSequence& seq, std::vector<bmer>& sml_array);
	/** Fill in the vector of positions with the initial unsorted positions for the sequence in seq_buf  */
	virtual void FillSML(gnSeqI seq_len, std::vector<gnSeqI>& sml_array);
	virtual uint64 GetDnaMer(gnSeqI offset) const;

	virtual gnSeqI bsearch(const struct bmer& query_mer, const gnSeqI start, const gnSeqI end);
	virtual void translate(uint8* dest, const gnSeqC* src, const gnSeqI len) const;
	virtual void translate32(uint32* dest, const gnSeqC* src, const gnSeqI len) const;
	/**
	 * Shifts an entire array of words left or right by a few bits
	 * @param data A pointer to the array of words
	 * @param bits The number of bits to shift by.  A positive number shifts right and a negative number shifts left.
	 */
	virtual void ShiftWords(uint32* data, uint32 length, int32 bits);
	virtual uint32 CalculateMaxMerSize() const;

	static const uint8* CreateBasicDNATable();
	static const uint8* CreateProteinTable();
};

/**
 * Thrown when there is an error creating a sorted mer list.
 */
CREATE_EXCEPTION(SMLCreateError);

/**
 * Thrown when there is an error merging two sorted mer lists.
 */
CREATE_EXCEPTION(SMLMergeError);

class MerCompare {
public:
	MerCompare( SortedMerList* sa ){ sar = sa; }
	boolean operator()(const gnSeqI a, const gnSeqI b) const{
		return sar->GetMer(a) < sar->GetMer(b);
	}
protected:
	SortedMerList* sar;
};

bool bmer_lessthan(const bmer& a_v, const bmer& m_v);
bool bmer_id_lessthan(const bmer& a_v, const bmer& m_v);

int bmer_compare(const void* a_v, const void* m_v);
bool bmer_id_lessthan(const bmer& a_v, const bmer& m_v);

//less than function for STL sort functions
inline
bool bmer_lessthan(const bmer& a_v, const bmer& m_v){
	return (a_v.mer < m_v.mer);// ? true : false;
};

inline
int bmer_compare(const void* a_v, const void* m_v){
	return (int)((int64)(((bmer*)a_v)->mer) - (int64)(((bmer*)m_v)->mer));
}

}

#endif   //_SortedMerList_h_