This file is indexed.

/usr/include/shogun/evaluation/CrossValidation.h is in libshogun-dev 1.1.0-4ubuntu2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2011 Heiko Strathmann
 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
 */

#ifndef __CROSSVALIDATION_H_
#define __CROSSVALIDATION_H_

#include <shogun/base/SGObject.h>
#include <shogun/evaluation/Evaluation.h>

namespace shogun
{

class CMachine;
class CFeatures;
class CLabels;
class CSplittingStrategy;
class CEvaluation;

/** @brief type to encapsulate the results of an evaluation run.
 * May contain confidence interval (if conf_int_alpha!=0).
 * m_conf_int_alpha is the probability for an error, i.e. the value does not lie
 * in the confidence interval.
 */

class CrossValidationResult
{
	public:
		/** print result */
		void print_result()
		{
			if (has_conf_int)
			{
				SG_SPRINT("[%f,%f] with alpha=%f, mean=%f\n", conf_int_low,
						conf_int_up, conf_int_alpha, mean);
			}
			else
				SG_SPRINT("%f\n", mean);
		}

	public:
		/** mean */
		float64_t mean;
		/** has conf int */
		bool has_conf_int;
		/** conf int low */
		float64_t conf_int_low;
		/** conf int up */
		float64_t conf_int_up;
		/** conf int alpha */
		float64_t conf_int_alpha;

};

/** @brief base class for cross-validation evaluation.
 * Given a learning machine, a splitting strategy, an evaluation criterium,
 * features and correspnding labels, this provides an interface for
 * cross-validation. Results may be retrieved using the evaluate method. A
 * number of repetitions may be specified for obtaining more accurate results.
 * The arithmetic mean of different runs is returned along with confidence
 * intervals, if a p-value is specified.
 * Default number of runs is one, confidence interval combutation is disabled.
 *
 * This class calculates an evaluation criterium of every fold and then
 * calculates the arithmetic mean of all folds. This is for example suitable
 * for the AUC or for Accuracy. However, for example F1-measure may not be
 * merged this way (result will be biased). To solve this, different sub-classes
 * may average results of each cross validation fold differently by overwriting
 * the evaluate_one_run method.
 *
 * See [Forman, G. and Scholz, M. (2009). Apples-to-apples in cross-validation
 * studies: Pitfalls in classifier performance measurement. Technical report,
 * HP Laboratories.] for details on this subject.
 */
class CCrossValidation: public CSGObject
{
public:
	/** constructor */
	CCrossValidation();

	/** constructor
	 * @param machine learning machine to use
	 * @param features features to use for cross-validation
	 * @param labels labels that correspond to the features
	 * @param splitting_strategy splitting strategy to use
	 * @param evaluation_criterium evaluation criterium to use
	 */
	CCrossValidation(CMachine* machine, CFeatures* features, CLabels* labels,
			CSplittingStrategy* splitting_strategy,
			CEvaluation* evaluation_criterium);

	/** destructor */
	virtual ~CCrossValidation();

	/** @return in which direction is the best evaluation value? */
	EEvaluationDirection get_evaluation_direction();

	/** method for evaluation. Performs cross-validation.
	 * Is repeated m_num_runs. If this number is larger than one, a confidence
	 * interval is calculated if m_conf_int_alpha is (0<p<1).
	 * By default m_num_runs=1 and m_conf_int_alpha=0
	 *
	 * @return result of evaluation
	 */
	CrossValidationResult evaluate();

	/** @return underlying learning machine */
	CMachine* get_machine() const;

	/** setter for the number of runs to use for evaluation */
	void set_num_runs(int32_t num_runs);

	/** setter for the number of runs to use for evaluation */
	void set_conf_int_alpha(float64_t m_conf_int_alpha);

	/** @return name of the SGSerializable */
	inline virtual const char* get_name() const
	{
		return "CrossValidation";
	}

private:
	void init();

protected:
	/** Evaluates one single cross-validation run.
	 * Current implementation evaluates each fold separately and then calculates
	 * arithmetic mean. Suitable for accuracy and AUC for example. NOT for
	 * F1-measure. Has to be overridden by sub-classes if results have to be
	 * merged differently
	 *
	 * @return evaluation result of one cross-validation run
	 */
	virtual float64_t evaluate_one_run();

private:
	int32_t m_num_runs;
	float64_t m_conf_int_alpha;

	CMachine* m_machine;
	CFeatures* m_features;
	CLabels* m_labels;
	CSplittingStrategy* m_splitting_strategy;
	CEvaluation* m_evaluation_criterium;
};

}

#endif /* __CROSSVALIDATION_H_ */