This file is indexed.

/usr/include/shogun/kernel/string/OligoStringKernel.h is in libshogun-dev 3.2.0-7.3build4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2008 Christian Igel, Tobias Glasmachers
 * Copyright (C) 2008 Christian Igel, Tobias Glasmachers
 *
 * Shogun adjustments (W) 2008-2009,2013 Soeren Sonnenburg
 * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max-Planck-Society
 * Copyright (C) 2013 Soeren Sonnenburg
 */
#ifndef _OLIGOSTRINGKERNEL_H_
#define _OLIGOSTRINGKERNEL_H_

#include <shogun/kernel/string/StringKernel.h>

#include <vector>
#include <string>

namespace shogun
{
/**
 * @brief This class offers access to the Oligo Kernel introduced
 * by Meinicke et al. in 2004
 *
 * The class has functions to preprocess the data such that the kernel
 * computation can be pursued faster. The kernel function is then
 * kernelOligoFast or kernelOligo.
 *
 * Requires significant speedup, should be working but as is might be
 * applicable only to academic small scale problems:
 *
 * - the kernel should only ever see encoded sequences, which however
 * requires another OligoFeatures object (using CDenseFeatures of pairs)
 *
 * Uses CSqrtDiagKernelNormalizer, as the vanilla kernel seems to be very
 * diagonally dominant.
 *
 */
class COligoStringKernel : public CStringKernel<char>
{
	public:
		/** default constructor  */
		COligoStringKernel();

		/** Constructor
		 * @param cache_size cache size for kernel
		 * @param k k-mer length
		 * @param width - equivalent to 2*sigma^2
		 */
		COligoStringKernel(int32_t cache_size, int32_t k, float64_t width);

		/** Constructor
		 * @param l features of left-hand side
		 * @param r features of right-hand side
		 * @param k k-mer length
		 * @param width - equivalent to 2*sigma^2
		 */
		COligoStringKernel(
				CStringFeatures<char>* l, CStringFeatures<char>* r,
				int32_t k, float64_t width);

		/** Destructor */
		virtual ~COligoStringKernel();

		/** initialize kernel
		 *
		 * @param l features of left-hand side
		 * @param r features of right-hand side
		 * @return if initializing was successful
		 */
		virtual bool init(CFeatures* l, CFeatures* r);

		/** return what type of kernel we are
		 *
		 * @return kernel type OLIGO
		 */
		virtual EKernelType get_kernel_type() { return K_OLIGO; }

		/** return the kernel's name
		 *
		 * @return name Oligo
		 */
		virtual const char* get_name() const { return "OligoStringKernel"; }


		virtual float64_t compute(int32_t x, int32_t y);

		/** clean up your kernel
		 */
		virtual void cleanup();

	protected:
		/**
		 * @brief encodes the signals of the sequence
		 *
		 * This function stores the oligo function signals in 'values'.
		 *
		 * The 'k_mer_length' and the 'allowed_characters' determine,
		 * which signals are used. Every pair contains the position of the
		 * signal and a numerical value reflecting the signal. The
		 * numerical value represents the k_mer to a base
		 * n = |allowed_characters|.
		 * Example: The value of k_mer CG for the allowed characters ACGT
		 * would be 1 * n^1 + 2 * n^0 = 6.
		 */
		static void encodeOligo(
			const std::string& sequence, uint32_t k_mer_length,
			const std::string& allowed_characters,
			std::vector< std::pair<int32_t, float64_t> >&   values);

		/**
		  @brief encodes all sequences with the encodeOligo function and stores
		  them in 'encoded_sequences'

		  This function encodes the sequences of 'sequences' via the
		  function encodeOligo.
		  */
		static void getSequences(
			const std::vector<std::string>& sequences,
			uint32_t k_mer_length, const std::string& allowed_characters,
			std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences);

		/**
		  @brief returns the value of the oligo kernel for sequences 'x' and 'y'

		  This function computes the kernel value of the oligo kernel,
		  which was introduced by Meinicke et al. in 2004. 'x' and
		  'y' are encoded by encodeOligo and 'exp_cache' has to be
		  constructed by getExpFunctionCache.

		  'max_distance' can be used to speed up the computation
		  even further by restricting the maximum distance between a k_mer at
		  position i in sequence 'x' and a k_mer at position j
		  in sequence 'y'. If i - j > 'max_distance' the value is not
		  added to the kernel value. This approximation is switched
		  off by default (max_distance < 0).
		  */
		float64_t kernelOligoFast(
			const std::vector< std::pair<int32_t, float64_t> >& x,
			const std::vector< std::pair<int32_t, float64_t> >& y,
			int32_t max_distance = -1);

		/**
		  @brief returns the value of the oligo kernel for sequences 'x' and 'y'

		  This function computes the kernel value of the oligo kernel,
		  which was introduced by Meinicke et al. in 2004. 'x' and
		  'y' have to be encoded by encodeOligo.
		  */
		float64_t kernelOligo(
				const std::vector< std::pair<int32_t, float64_t> >& x,
				const std::vector< std::pair<int32_t, float64_t> >& y);


	private:
		/**
		  @brief prepares the exp function cache of the oligo kernel

		  The oligo kernel was introduced for sequences of fixed length.
		  Let n be the sequence length of sequences x and y. There can
		  only be n different distances between signals in sequence x
		  and sequence y (0, 1, ..., n-1). Therefore, we precompute
		  the corresponding values of the e-function. These values
		  can then be used in kernelOligoFast.
		  */
		void getExpFunctionCache(uint32_t sequence_length);

		static inline bool cmpOligos_(std::pair<int32_t, float64_t> a,
				std::pair<int32_t, float64_t> b )
		{
			return (a.second < b.second);
		}

		void init();

	protected:
		/** k-mer length */
		int32_t k;
		/** width of kernel */
		float64_t width;
		/** gauss table cache for exp (see getExpFunctionCache above) */
		SGVector<float64_t> gauss_table;
};
}
#endif // _OLIGOSTRINGKERNEL_H_