/usr/include/root/TMVA/RuleFitParams.h is in libroot-tmva-dev 5.34.30-0ubuntu8.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | // @(#)root/tmva $Id$
// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
/**********************************************************************************
* Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
* Package: TMVA *
* Class : RuleFitParams *
* Web : http://tmva.sourceforge.net *
* *
* Description: *
* A class doing the actual fitting of a linear model using rules as *
* base functions. *
* Reference paper: 1.Gradient Directed Regularization *
* Friedman, Popescu, 2004 *
* 2.Predictive Learning with Rule Ensembles *
* Friedman, Popescu, 2005 *
* *
* *
* Authors (alphabetical): *
* Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
* Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
* *
* Copyright (c) 2005: *
* CERN, Switzerland *
* Iowa State U. *
* MPI-K Heidelberg, Germany *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted according to the terms listed in LICENSE *
* (http://tmva.sourceforge.net/LICENSE) *
**********************************************************************************/
#ifndef ROOT_TMVA_RuleFitParams
#define ROOT_TMVA_RuleFitParams
// #if ROOT_VERSION_CODE >= 364802
#ifndef ROOT_TMathBase
#include "TMathBase.h"
#endif
// #else
// #ifndef ROOT_TMath
// #include "TMath.h"
// #endif
// #endif
#ifndef ROOT_TMVA_Event
#include "TMVA/Event.h"
#endif
class TTree;
namespace TMVA {
class RuleEnsemble;
class MsgLogger;
class RuleFit;
class RuleFitParams {
public:
RuleFitParams();
virtual ~RuleFitParams();
void Init();
// set message type
void SetMsgType( EMsgType t );
// set RuleFit ptr
void SetRuleFit( RuleFit *rf ) { fRuleFit = rf; }
//
// GD path: set N(path steps)
void SetGDNPathSteps( Int_t np ) { fGDNPathSteps = np; }
// GD path: set path step size
void SetGDPathStep( Double_t s ) { fGDPathStep = s; }
// GD path: set tau search range
void SetGDTauRange( Double_t t0, Double_t t1 )
{
fGDTauMin = (t0>1.0 ? 1.0:(t0<0.0 ? 0.0:t0));
fGDTauMax = (t1>1.0 ? 1.0:(t1<0.0 ? 0.0:t1));
if (fGDTauMax<fGDTauMin) fGDTauMax = fGDTauMin;
}
// GD path: set number of steps in tau search range
void SetGDTauScan( UInt_t n ) { fGDTauScan = n; }
// GD path: set tau
void SetGDTau( Double_t t ) { fGDTau = t; }
void SetGDErrScale( Double_t s ) { fGDErrScale = s; }
void SetGDTauPrec( Double_t p ) { fGDTauPrec=p; CalcGDNTau(); fGDTauVec.resize(fGDNTau); }
// return type such that +1 = signal and -1 = background
Int_t Type( const Event * e ) const; // return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1); }
//
UInt_t GetPathIdx1() const { return fPathIdx1; }
UInt_t GetPathIdx2() const { return fPathIdx2; }
UInt_t GetPerfIdx1() const { return fPerfIdx1; }
UInt_t GetPerfIdx2() const { return fPerfIdx2; }
// Loss function; Huber loss eq 33
Double_t LossFunction( const Event& e ) const;
// same but using evt idx (faster)
Double_t LossFunction( UInt_t evtidx ) const;
Double_t LossFunction( UInt_t evtidx, UInt_t itau ) const;
// Empirical risk
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const;
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff, UInt_t itau) const;
// Risk evaluation for fPathIdx and fPerfInd
Double_t RiskPath() const { return Risk(fPathIdx1,fPathIdx2,fNEveEffPath); }
Double_t RiskPerf() const { return Risk(fPerfIdx1,fPerfIdx2,fNEveEffPerf); }
Double_t RiskPerf( UInt_t itau ) const { return Risk(fPerfIdx1,fPerfIdx2,fNEveEffPerf,itau); }
// Risk evaluation for all tau
UInt_t RiskPerfTst();
// Penalty function; Lasso function (eq 8)
Double_t Penalty() const;
// initialize GD path
void InitGD();
// find best tau and return the number of scan steps used
Int_t FindGDTau();
// make path for binary classification (squared-error ramp, sect 6 in ref 1)
void MakeGDPath();
protected:
// typedef of an Event const iterator
typedef std::vector<const TMVA::Event *>::const_iterator EventItr;
// init ntuple
void InitNtuple();
// calculate N(tau) in scan - limit to 100000.
void CalcGDNTau() { fGDNTau = static_cast<UInt_t>(1.0/fGDTauPrec)+1; if (fGDNTau>100000) fGDNTau=100000; }
// fill ntuple with coefficient info
void FillCoefficients();
// estimate the optimum scoring function
void CalcFStar();
// estimate of binary error rate
Double_t ErrorRateBin();
// estimate of scale average error rate
Double_t ErrorRateReg();
// estimate 1-area under ROC
Double_t ErrorRateRocRaw( std::vector<Double_t> & sFsig, std::vector<Double_t> & sFbkg );
Double_t ErrorRateRoc();
void ErrorRateRocTst();
// estimate optimism
Double_t Optimism();
// make gradient vector (eq 44 in ref 1)
void MakeGradientVector();
// Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
void UpdateCoefficients();
// calculate average of responses of F
Double_t CalcAverageResponse();
Double_t CalcAverageResponseOLD();
// calculate average of true response (initial estimate of a0)
Double_t CalcAverageTruth();
// calculate the average of each variable over the range
void EvaluateAverage(UInt_t ind1, UInt_t ind2,
std::vector<Double_t> &avsel,
std::vector<Double_t> &avrul);
// evaluate using fPathIdx1,2
void EvaluateAveragePath() { EvaluateAverage( fPathIdx1, fPathIdx2, fAverageSelectorPath, fAverageRulePath ); }
// evaluate using fPerfIdx1,2
void EvaluateAveragePerf() { EvaluateAverage( fPerfIdx1, fPerfIdx2, fAverageSelectorPerf, fAverageRulePerf ); }
// the same as above but for the various tau
void MakeTstGradientVector();
void UpdateTstCoefficients();
void CalcTstAverageResponse();
RuleFit * fRuleFit; // rule fit
RuleEnsemble * fRuleEnsemble; // rule ensemble
//
UInt_t fNRules; // number of rules
UInt_t fNLinear; // number of linear terms
//
// Event indecis for path/validation - TODO: should let the user decide
// Now it is just a simple one-fold cross validation.
//
UInt_t fPathIdx1; // first event index for path search
UInt_t fPathIdx2; // last event index for path search
UInt_t fPerfIdx1; // first event index for performance evaluation
UInt_t fPerfIdx2; // last event index for performance evaluation
Double_t fNEveEffPath; // sum of weights for Path events
Double_t fNEveEffPerf; // idem for Perf events
std::vector<Double_t> fAverageSelectorPath; // average of each variable over the range fPathIdx1,2
std::vector<Double_t> fAverageRulePath; // average of each rule, same range
std::vector<Double_t> fAverageSelectorPerf; // average of each variable over the range fPerfIdx1,2
std::vector<Double_t> fAverageRulePerf; // average of each rule, same range
std::vector<Double_t> fGradVec; // gradient vector - dimension = number of rules in ensemble
std::vector<Double_t> fGradVecLin; // gradient vector - dimension = number of variables
std::vector< std::vector<Double_t> > fGradVecTst; // gradient vector - one per tau
std::vector< std::vector<Double_t> > fGradVecLinTst; // gradient vector, linear terms - one per tau
//
std::vector<Double_t> fGDErrTst; // error rates per tau
std::vector<Char_t> fGDErrTstOK; // error rate is sufficiently low <--- stores boolean
std::vector< std::vector<Double_t> > fGDCoefTst; // rule coeffs - one per tau
std::vector< std::vector<Double_t> > fGDCoefLinTst; // linear coeffs - one per tau
std::vector<Double_t> fGDOfsTst; // offset per tau
std::vector< Double_t > fGDTauVec; // the tau's
UInt_t fGDNTauTstOK; // number of tau in the test-phase that are ok
UInt_t fGDNTau; // number of tau-paths - calculated in SetGDTauPrec
Double_t fGDTauPrec; // precision in tau
UInt_t fGDTauScan; // number scan for tau-paths
Double_t fGDTauMin; // min threshold parameter (tau in eq 26, ref 1)
Double_t fGDTauMax; // max threshold parameter (tau in eq 26, ref 1)
Double_t fGDTau; // selected threshold parameter (tau in eq 26, ref 1)
Double_t fGDPathStep; // step size along path (delta nu in eq 22, ref 1)
Int_t fGDNPathSteps; // number of path steps
Double_t fGDErrScale; // stop scan at error = scale*errmin
//
Double_t fAverageTruth; // average truth, ie sum(y)/N, y=+-1
//
std::vector<Double_t> fFstar; // vector of F*() - filled in CalcFStar()
Double_t fFstarMedian; // median value of F*() using
//
TTree *fGDNtuple; // Gradient path ntuple, contains params for each step along the path
Double_t fNTRisk; // GD path: risk
Double_t fNTErrorRate; // GD path: error rate (or performance)
Double_t fNTNuval; // GD path: value of nu
Double_t fNTCoefRad; // GD path: 'radius' of all rulecoeffs
Double_t fNTOffset; // GD path: model offset
Double_t *fNTCoeff; // GD path: rule coefficients
Double_t *fNTLinCoeff; // GD path: linear coefficients
Double_t fsigave; // Sigma of current signal score function F(sig)
Double_t fsigrms; // Rms of F(sig)
Double_t fbkgave; // Average of F(bkg)
Double_t fbkgrms; // Rms of F(bkg)
private:
mutable MsgLogger* fLogger; //! message logger
MsgLogger& Log() const { return *fLogger; }
};
// --------------------------------------------------------
class AbsValue {
public:
Bool_t operator()( Double_t first, Double_t second ) const { return TMath::Abs(first) < TMath::Abs(second); }
};
}
#endif
|