This file is indexed.

/usr/include/shogun/regression/LeastAngleRegression.h is in libshogun-dev 3.1.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2012 Chiyuan Zhang
 * Copyright (C) 2012 Chiyuan Zhang
 */

#ifndef LEASTANGLEREGRESSION_H__
#define LEASTANGLEREGRESSION_H__

#include <shogun/lib/config.h>

#ifdef HAVE_LAPACK
#include <vector>
#include <shogun/machine/LinearMachine.h>

namespace shogun
{
class CFeatures;

/** @brief Class for Least Angle Regression, can be used to solve LASSO.
 *
 * LASSO is basically L1 regulairzed least square regression
 *
 * \f[
 * \min \|X^T\beta - y\|^2 + \lambda\|\beta\|_{1}
 * \f]
 *
 * where the L1 norm is defined as
 *
 * \f[
 * \|\beta\|_1 = \sum_i|\beta_i|
 * \f]
 *
 * **Note**: pre-processing of X and y are needed to ensure the correctness
 * of this algorithm:
 * * X needs to be normalized: each feature should have zero-mean and unit-norm
 * * y needs to be centered: its mean should be zero
 *
 * The above equation is equivalent to the following form
 *
 * \f[
 * \min \|X^T\beta - y\|^2 \quad s.t. \|\beta\|_1 \leq C
 * \f]
 *
 * There is a correspondence between the regularization coefficient lambda
 * and the hard constraint constant C. The latter form is easier to control
 * by explicitly constraining the l1-norm of the estimator. In this
 * implementation, we provide support for the latter form, moreover, we
 * allow explicit control of the number of non-zero variables.
 *
 * When no constraints is provided, the full path is generated.
 *
 * Please see the following paper for more details.
 *
 * @code
 * @article{efron2004least,
 *   title={Least angle regression},
 *   author={Efron, B. and Hastie, T. and Johnstone, I. and Tibshirani, R.},
 *   journal={The Annals of statistics},
 *   volume={32},
 *   number={2},
 *   pages={407--499},
 *   year={2004},
 *   publisher={Institute of Mathematical Statistics}
 * }
 * @endcode
 */
class CLeastAngleRegression: public CLinearMachine
{
public:
	/** problem type */
	MACHINE_PROBLEM_TYPE(PT_REGRESSION);

	/** default constructor
	 *
	 * @param lasso - when true, it runs the LASSO, when false, it runs LARS
	 * */
	CLeastAngleRegression(bool lasso=true);

	/** default destructor */
	virtual ~CLeastAngleRegression();

	/** set max number of non-zero variables for early stopping
	 *
	 * @param n 0 means no constraint
	 */
	void set_max_non_zero(int32_t n)
	{
		m_max_nonz = n;
	}

	/** get max number of non-zero variables for early stopping
	 */
	int32_t get_max_non_zero() const
	{
		return m_max_nonz;
	}

	/** set max l1-norm of estimator for early stopping
	 *
	 * @param norm the max l1-norm for beta
	 */
	void set_max_l1_norm(float64_t norm)
	{
		m_max_l1_norm = norm;
	}

	/** get max l1-norm of estimator for early stopping
	 */
	float64_t get_max_l1_norm() const
	{
		return m_max_l1_norm;
	}

	/** switch estimator
	 *
	 * @param num_variable number of non-zero coefficients
	 */
	void switch_w(int32_t num_variable)
	{
		if (w.vlen <= 0)
			SG_ERROR("cannot swith estimator before training")
		if (size_t(num_variable) >= m_beta_idx.size() || num_variable < 0)
			SG_ERROR("cannot switch to an estimator of %d non-zero coefficients", num_variable)
		if (w.vector == NULL)
			w = SGVector<float64_t>(w.vlen);
		std::copy(m_beta_path[m_beta_idx[num_variable]].begin(),
			m_beta_path[m_beta_idx[num_variable]].end(), w.vector);
	}

	/** get path size
	 *
	 * @return the size of variable selection path. Call get_w_for_var(i) to get the
	 * estimator of i-th entry on the path, where i can be in the range [0, path_size)
	 *
	 * @see switch_w
	 * @see get_w_for_var
	 */
	int32_t get_path_size() const
	{
		return m_beta_idx.size();
	}

	/** get w for a particular regularization variable
	 *
	 * @param num_var number of non-zero coefficients
	 *
	 * @return the estimator with num_var non-zero coefficients. **Note** the
	 * returned memory references to some internal structures. The pointer will
	 * become invalid if train is called *again*. So make a copy if you want to
	 * call train multiple times.
	 */
	SGVector<float64_t> get_w_for_var(int32_t num_var)
	{
		return SGVector<float64_t>(&m_beta_path[m_beta_idx[num_var]][0], w.vlen, false);
	}

	/** get classifier type
	 *
	 * @return classifier type LinearRidgeRegression
	 */
	virtual EMachineType get_classifier_type()
	{
		return CT_LARS;
	}

	/** @return object name */
	virtual const char* get_name() const { return "LeastAngleRegression"; }

protected:
	virtual bool train_machine(CFeatures* data=NULL);

private:
	void activate_variable(int32_t v)
	{
		m_num_active++;
		m_active_set.push_back(v);
		m_is_active[v] = true;
	}
	void deactivate_variable(int32_t v_idx)
	{
		m_num_active--;
		m_is_active[m_active_set[v_idx]] = false;
		m_active_set.erase(m_active_set.begin() + v_idx);
	}

	SGMatrix<float64_t> cholesky_insert(SGMatrix<float64_t> X, SGMatrix<float64_t> R, int32_t i_max_corr);
	SGMatrix<float64_t> cholesky_delete(SGMatrix<float64_t> R, int32_t i_kick);


	bool m_lasso; //!< enable lasso modification

	int32_t m_max_nonz;  //!< max number of non-zero variables for early stopping
	float64_t m_max_l1_norm; //!< max l1-norm of beta (estimator) for early stopping

	std::vector<std::vector<float64_t> > m_beta_path;
	std::vector<int32_t> m_beta_idx;

	std::vector<int32_t> m_active_set;
	std::vector<bool> m_is_active;
	int32_t m_num_active;
}; // class LARS

} // namespace shogun

#endif // HAVE_LAPACK
#endif // LEASTANGLEREGRESSION_H__