This file is indexed.

/usr/include/shogun/clustering/GMM.h is in libshogun-dev 1.1.0-4ubuntu2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2011 Alesis Novik
 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
 */
#ifndef _GMM_H__
#define _GMM_H__

#include <shogun/lib/config.h>

#ifdef HAVE_LAPACK

#include <shogun/distributions/Distribution.h>
#include <shogun/distributions/Gaussian.h>
#include <shogun/lib/common.h>

namespace shogun
{
/** @brief Gaussian Mixture Model interface.
 *
 * Takes input of number of Gaussians to fit and a covariance type to use.
 * Parameter estimation is done using either the Expectation-Maximization or
 * Split-Merge Expectation-Maximization algorithms. To estimate the GMM
 * parameters, the train(...) method has to be run to set the training data
 * and then either train_em(...) or train_smem(...) to do the actual
 * estimation.
 * The EM algorithm is described here:
 * http://en.wikipedia.org/wiki/Expectation-maximization_algorithm
 * The SMEM algorithm is described here:
 * http://mlg.eng.cam.ac.uk/zoubin/papers/uedanc.pdf
 */
class CGMM : public CDistribution
{
	public:
		/** default constructor */
		CGMM();
		/** constructor
		 *
		 * @param n number of Gaussians
		 * @param cov_type covariance type
		 */
		CGMM(int32_t n, ECovType cov_type=FULL);
		/** constructor
		 *
		 * @param components GMM components
		 * @param coefficients mixing coefficients
		 * @param copy true if should be copied
		 */
		CGMM(SGVector<CGaussian*> components, SGVector<float64_t> coefficients,
				bool copy=false);
		virtual ~CGMM();

		/** cleanup */
		void cleanup();

		/** set training data for use with EM or SMEM
		 *
		 * @param data training data
		 *
		 * @return true
		 */
		virtual bool train(CFeatures* data=NULL);

		/** learn model using EM
		 *
		 * @param min_cov minimum covariance
		 * @param max_iter maximum iterations
		 * @param min_change minimum change in log likelihood
		 *
		 * @return log likelihood of training data
		 */
		float64_t train_em(float64_t min_cov=1e-9, int32_t max_iter=1000,
				float64_t min_change=1e-9);

		/** learn model using SMEM
		 *
		 * @param max_iter maximum SMEM iterations
		 * @param max_cand maximum split-merge candidates
		 * @param min_cov minimum covariance
		 * @param max_em_iter maximum iterations for EM
		 * @param min_change minimum change in log likelihood
		 *
		 * @return log likelihood of training data
		 */
		float64_t train_smem(int32_t max_iter=100, int32_t max_cand=5,
				float64_t min_cov=1e-9, int32_t max_em_iter=1000,
				float64_t min_change=1e-9);

		/** maximum likelihood estimation
		 *
		 * @param alpha point assignment
		 * @param min_cov minimum covariance
		 */
		void max_likelihood(SGMatrix<float64_t> alpha, float64_t min_cov);

		/** get number of parameters in model
		 *
		 * @return number of parameters in model
		 */
		virtual int32_t get_num_model_parameters();

		/** get model parameter (logarithmic)
		 *
		 * @return model parameter (logarithmic) if num_param < m_dim returns
		 * an element from the mean, else return an element from the covariance
		 */
		virtual float64_t get_log_model_parameter(int32_t num_param);

		/** get partial derivative of likelihood function (logarithmic)
		 *
		 * @param num_param derivative against which param
		 * @param num_example which example
		 * @return derivative of likelihood (logarithmic)
		 */
		virtual float64_t get_log_derivative(
			int32_t num_param, int32_t num_example);

		/** compute log likelihood for example
		 *
		 * abstract base method
		 *
		 * @param num_example which example
		 * @return log likelihood for example
		 */
		virtual float64_t get_log_likelihood_example(int32_t num_example);

		/** compute likelihood for example
		 *
		 * abstract base method
		 *
		 * @param num_example which example
		 * @return likelihood for example
		 */
		virtual float64_t get_likelihood_example(int32_t num_example);

		/** get nth mean
		 *
		 * @param num index of mean to retrieve
		 *
		 * @return mean
		 */
		virtual SGVector<float64_t> get_nth_mean(int32_t num);

		/** set nth mean
		 *
		 * @param mean new mean
		 * @param num index mean to set
		 */
		virtual void set_nth_mean(SGVector<float64_t> mean, int32_t num);

		/** get nth covariance
		 *
		 * @param num index of covariance to retrieve
		 *
		 * @return covariance
		 */
		virtual SGMatrix<float64_t> get_nth_cov(int32_t num);

		/** set nth covariance
		 *
		 * @param cov new covariance
		 * @param num index of covariance to set
		 */
		virtual void set_nth_cov(SGMatrix<float64_t> cov, int32_t num);

		/** get coefficients
		 *
		 * @return coeffiecients
		 */
		virtual SGVector<float64_t> get_coef();

		/** set coefficients
		 *
		 * @param coefficients mixing coefficients
		 */
		virtual void set_coef(SGVector<float64_t> coefficients);

		/** get components
		 *
		 * @return components
		 */
		virtual SGVector<CGaussian*> get_comp();

		/** set components
		 *
		 * @param components Gaussian components
		 */
		virtual void set_comp(SGVector<CGaussian*> components);

		/** sample from model
		 *
		 * @return sample
		 */
		SGVector<float64_t> sample();

		/** cluster point
		 *
		 * @return log likelihood of belonging to clusters and the log likelihood of being generated by this GMM
		 * The length of the returned vector is number of components + 1
		 */
		SGVector<float64_t> cluster(SGVector<float64_t> point);

		/** @return object name */
		inline virtual const char* get_name() const { return "GMM"; }

	private:
		/** 1NN assignment initialization 
		 *
		 * @param init_means initial means
		 *
		 * @return initial alphas
		 */
		SGMatrix<float64_t> alpha_init(SGMatrix<float64_t> init_means);

		/** Initialize parameters for serialization */
		void register_params();

		/** apply the partial EM algorithm on 3 components 
		 *
		 * @param comp1 index of first component
		 * @param comp2 index of second component
		 * @param comp3 index of third component
		 * @param min_cov minimum covariance
		 * @param max_em_iter maximum iterations for EM
		 * @param min_change minimum change in log likelihood
		 */
		void partial_em(int32_t comp1, int32_t comp2, int32_t comp3,
				float64_t min_cov, int32_t max_em_iter, float64_t min_change);

	protected:
		/** Mixture components */
		SGVector<CGaussian*> m_components;
		/** Mixture coefficients */
		SGVector<float64_t> m_coefficients;
};
}
#endif //HAVE_LAPACK
#endif //_GMM_H__