/usr/include/shogun/evaluation/CrossValidation.h is in libshogun-dev 3.2.0-7.5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | /*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#ifndef __CROSSVALIDATION_H_
#define __CROSSVALIDATION_H_
#include <shogun/evaluation/EvaluationResult.h>
#include <shogun/evaluation/MachineEvaluation.h>
namespace shogun
{
class CMachineEvaluation;
class CCrossValidationOutput;
class CList;
/** @brief type to encapsulate the results of an evaluation run.
* May contain confidence interval (if conf_int_alpha!=0).
* m_conf_int_alpha is the probability for an error, i.e. the value does not lie
* in the confidence interval.
*/
class CCrossValidationResult : public CEvaluationResult
{
public:
CCrossValidationResult()
{
SG_ADD(&mean, "mean", "Mean of results", MS_NOT_AVAILABLE);
SG_ADD(&has_conf_int, "has_conf_int", "Has confidence intervals?",
MS_NOT_AVAILABLE);
SG_ADD(&conf_int_low, "conf_int_low", "Lower confidence bound",
MS_NOT_AVAILABLE);
SG_ADD(&conf_int_up, "conf_int_up", "Upper confidence bound",
MS_NOT_AVAILABLE);
SG_ADD(&conf_int_alpha, "conf_int_alpha",
"Alpha of confidence interval", MS_NOT_AVAILABLE);
mean = 0;
has_conf_int = 0;
conf_int_low = 0;
conf_int_up = 0;
conf_int_alpha = 0;
}
/** return what type of result we are.
*
* @return result type
*/
virtual EEvaluationResultType get_result_type() const
{
return CROSSVALIDATION_RESULT;
}
/** Returns the name of the SGSerializable instance. It MUST BE
* the CLASS NAME without the prefixed `C'.
*
* @return name of the SGSerializable
*/
virtual const char* get_name() const { return "CrossValidationResult"; }
/** helper method used to specialize a base class instance
*
* @param eval_result its dynamic type must be CCrossValidationResult
*/
static CCrossValidationResult* obtain_from_generic(
CEvaluationResult* eval_result)
{
if (!eval_result)
return NULL;
REQUIRE(eval_result->get_result_type()==CROSSVALIDATION_RESULT,
"CrossValidationResult::obtain_from_generic(): argument is"
"of wrong type!\n");
SG_REF(eval_result);
return (CCrossValidationResult*) eval_result;
}
/** print result */
virtual void print_result()
{
if (has_conf_int)
{
SG_SPRINT("[%f,%f] with alpha=%f, mean=%f\n", conf_int_low,
conf_int_up, conf_int_alpha, mean);
}
else
SG_SPRINT("%f\n", mean)
}
public:
/** mean */
float64_t mean;
/** has conf int */
bool has_conf_int;
/** conf int low */
float64_t conf_int_low;
/** conf int up */
float64_t conf_int_up;
/** conf int alpha */
float64_t conf_int_alpha;
};
/** @brief base class for cross-validation evaluation.
* Given a learning machine, a splitting strategy, an evaluation criterium,
* features and correspnding labels, this provides an interface for
* cross-validation. Results may be retrieved using the evaluate method. A
* number of repetitions may be specified for obtaining more accurate results.
* The arithmetic mean of different runs is returned along with confidence
* intervals, if a p-value is specified.
* Default number of runs is one, confidence interval combutation is disabled.
*
* This class calculates an evaluation criterium of every fold and then
* calculates the arithmetic mean of all folds. This is for example suitable
* for the AUC or for Accuracy. However, for example F1-measure may not be
* merged this way (result will be biased). To solve this, different sub-classes
* may average results of each cross validation fold differently by overwriting
* the evaluate_one_run method.
*
* See [Forman, G. and Scholz, M. (2009). Apples-to-apples in cross-validation
* studies: Pitfalls in classifier performance measurement. Technical report,
* HP Laboratories.] for details on this subject.
*
* Cross validation tries to lock underlying machines if that is possible to
* speed up computations. Can be turned off by the set_autolock() method.
* Locking in general may speed up things (eg for kernel machines the kernel
* matrix is precomputed), however, it is not always supported.
*/
class CCrossValidation: public CMachineEvaluation
{
public:
/** constructor */
CCrossValidation();
/** constructor
* @param machine learning machine to use
* @param features features to use for cross-validation
* @param labels labels that correspond to the features
* @param splitting_strategy splitting strategy to use
* @param evaluation_criterion evaluation criterion to use
* @param autolock whether machine should be auto-locked before evaluation
*/
CCrossValidation(CMachine* machine, CFeatures* features, CLabels* labels,
CSplittingStrategy* splitting_strategy,
CEvaluation* evaluation_criterion, bool autolock=true);
/** constructor, for use with custom kernels (no features)
* @param machine learning machine to use
* @param labels labels that correspond to the features
* @param splitting_strategy splitting strategy to use
* @param evaluation_criterion evaluation criterion to use
* @param autolock autolock
*/
CCrossValidation(CMachine* machine, CLabels* labels,
CSplittingStrategy* splitting_strategy,
CEvaluation* evaluation_criterion, bool autolock=true);
/** destructor */
virtual ~CCrossValidation();
/** setter for the number of runs to use for evaluation */
void set_num_runs(int32_t num_runs);
/** setter for the number of runs to use for evaluation */
void set_conf_int_alpha(float64_t m_conf_int_alpha);
/** evaluate */
virtual CEvaluationResult* evaluate();
/** appends given cross validation output instance
* to the list of listeners
*
* @param cross_validation_output given cross validation output
*/
void add_cross_validation_output(
CCrossValidationOutput* cross_validation_output);
/** @return name of the SGSerializable */
virtual const char* get_name() const
{
return "CrossValidation";
}
private:
void init();
protected:
/** Evaluates one single cross-validation run.
* Current implementation evaluates each fold separately and then calculates
* arithmetic mean. Suitable for accuracy and AUC for example. NOT for
* F1-measure. Has to be overridden by sub-classes if results have to be
* merged differently
*
* @return evaluation result of one cross-validation run
*/
virtual float64_t evaluate_one_run();
/** number of evaluation runs for one fold */
int32_t m_num_runs;
/** confidence interval alpha parameter */
float64_t m_conf_int_alpha;
/** xval output listeners */
CList* m_xval_outputs;
};
}
#endif /* __CROSSVALIDATION_H_ */
|