This file is indexed.

/usr/include/root/TMVA/RuleFitParams.h is in libroot-tmva-dev 5.34.30-0ubuntu8.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
// @(#)root/tmva $Id$
// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
 * Package: TMVA                                                                  *
 * Class  : RuleFitParams                                                         *
 * Web    : http://tmva.sourceforge.net                                           *
 *                                                                                *
 * Description:                                                                   *
 *      A class doing the actual fitting of a linear model using rules as         *
 *      base functions.                                                           *
 *      Reference paper: 1.Gradient Directed Regularization                       *
 *                         Friedman, Popescu, 2004                                *
 *                       2.Predictive Learning with Rule Ensembles                *
 *                         Friedman, Popescu, 2005                                *
 *                                                                                *
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA      *
 *      Helge Voss         <Helge.Voss@cern.ch>         - MPI-KP Heidelberg, Ger. *
 *                                                                                *
 * Copyright (c) 2005:                                                            *
 *      CERN, Switzerland                                                         * 
 *      Iowa State U.                                                             *
 *      MPI-K Heidelberg, Germany                                                 * 
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 * (http://tmva.sourceforge.net/LICENSE)                                          *
 **********************************************************************************/

#ifndef ROOT_TMVA_RuleFitParams
#define ROOT_TMVA_RuleFitParams

// #if ROOT_VERSION_CODE >= 364802
#ifndef ROOT_TMathBase
#include "TMathBase.h"
#endif
// #else
// #ifndef ROOT_TMath
// #include "TMath.h"
// #endif
// #endif

#ifndef ROOT_TMVA_Event
#include "TMVA/Event.h"
#endif

class TTree;

namespace TMVA {

   class RuleEnsemble;
   class MsgLogger;
   class RuleFit;
   class RuleFitParams {

   public:

      RuleFitParams();
      virtual ~RuleFitParams();

      void Init();

      // set message type
      void SetMsgType( EMsgType t );

      // set RuleFit ptr
      void SetRuleFit( RuleFit *rf )    { fRuleFit = rf; }
      //
      // GD path: set N(path steps)
      void SetGDNPathSteps( Int_t np )  { fGDNPathSteps = np; }

      // GD path: set path step size
      void SetGDPathStep( Double_t s )  { fGDPathStep = s; }

      // GD path: set tau search range
      void SetGDTauRange( Double_t t0, Double_t t1 )
      {
         fGDTauMin = (t0>1.0 ? 1.0:(t0<0.0 ? 0.0:t0));
         fGDTauMax = (t1>1.0 ? 1.0:(t1<0.0 ? 0.0:t1));
         if (fGDTauMax<fGDTauMin) fGDTauMax = fGDTauMin;
      }

      // GD path: set number of steps in tau search range
      void SetGDTauScan( UInt_t n )        { fGDTauScan = n; }

      // GD path: set tau
      void SetGDTau( Double_t t ) { fGDTau = t; }


      void SetGDErrScale( Double_t s ) { fGDErrScale = s; }
      void SetGDTauPrec( Double_t p )  { fGDTauPrec=p; CalcGDNTau(); fGDTauVec.resize(fGDNTau); }

      // return type such that +1 = signal and -1 = background
      Int_t Type( const Event * e ) const; // return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1); }
      //
      UInt_t                            GetPathIdx1() const { return fPathIdx1; }
      UInt_t                            GetPathIdx2() const { return fPathIdx2; }
      UInt_t                            GetPerfIdx1() const { return fPerfIdx1; }
      UInt_t                            GetPerfIdx2() const { return fPerfIdx2; }

      // Loss function; Huber loss eq 33
      Double_t LossFunction( const Event& e ) const;

      // same but using evt idx (faster)
      Double_t LossFunction( UInt_t evtidx ) const;
      Double_t LossFunction( UInt_t evtidx, UInt_t itau ) const;

      // Empirical risk
      Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const;
      Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff, UInt_t itau) const;

      // Risk evaluation for fPathIdx and fPerfInd
      Double_t RiskPath() const { return Risk(fPathIdx1,fPathIdx2,fNEveEffPath); }
      Double_t RiskPerf() const { return Risk(fPerfIdx1,fPerfIdx2,fNEveEffPerf); }
      Double_t RiskPerf( UInt_t itau ) const { return Risk(fPerfIdx1,fPerfIdx2,fNEveEffPerf,itau); }

      // Risk evaluation for all tau
      UInt_t RiskPerfTst();
    
      // Penalty function; Lasso function (eq 8)
      Double_t Penalty() const;

      // initialize GD path
      void InitGD();

      // find best tau and return the number of scan steps used
      Int_t FindGDTau();

      // make path for binary classification (squared-error ramp, sect 6 in ref 1)
      void MakeGDPath();

   protected:

      // typedef of an Event const iterator
      typedef std::vector<const TMVA::Event *>::const_iterator  EventItr;

      // init ntuple
      void InitNtuple();

      // calculate N(tau) in scan - limit to 100000.
      void CalcGDNTau()  { fGDNTau = static_cast<UInt_t>(1.0/fGDTauPrec)+1; if (fGDNTau>100000) fGDNTau=100000; }

      // fill ntuple with coefficient info
      void FillCoefficients();

      // estimate the optimum scoring function
      void CalcFStar();

      // estimate of binary error rate
      Double_t ErrorRateBin();

      // estimate of scale average error rate
      Double_t ErrorRateReg();

      // estimate 1-area under ROC
      Double_t ErrorRateRocRaw( std::vector<Double_t> & sFsig, std::vector<Double_t> & sFbkg );
      Double_t ErrorRateRoc();
      void     ErrorRateRocTst();

      // estimate optimism
      Double_t Optimism();

      // make gradient vector (eq 44 in ref 1)
      void MakeGradientVector();

      // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
      void UpdateCoefficients();

      // calculate average of responses of F
      Double_t CalcAverageResponse();
      Double_t CalcAverageResponseOLD();

      // calculate average of true response (initial estimate of a0)
      Double_t CalcAverageTruth();

      // calculate the average of each variable over the range
      void EvaluateAverage(UInt_t ind1, UInt_t ind2,
                           std::vector<Double_t> &avsel,
                           std::vector<Double_t> &avrul);

      // evaluate using fPathIdx1,2
      void EvaluateAveragePath() { EvaluateAverage( fPathIdx1, fPathIdx2, fAverageSelectorPath, fAverageRulePath ); }

      // evaluate using fPerfIdx1,2
      void EvaluateAveragePerf() { EvaluateAverage( fPerfIdx1, fPerfIdx2, fAverageSelectorPerf, fAverageRulePerf ); }

      // the same as above but for the various tau
      void MakeTstGradientVector();
      void UpdateTstCoefficients();
      void CalcTstAverageResponse();


      RuleFit             * fRuleFit;      // rule fit
      RuleEnsemble        * fRuleEnsemble; // rule ensemble
      //
      UInt_t                fNRules;       // number of rules
      UInt_t                fNLinear;      // number of linear terms
      //
      // Event indecis for path/validation - TODO: should let the user decide
      // Now it is just a simple one-fold cross validation.
      //
      UInt_t                fPathIdx1;       // first event index for path search
      UInt_t                fPathIdx2;       // last event index for path search
      UInt_t                fPerfIdx1;       // first event index for performance evaluation
      UInt_t                fPerfIdx2;       // last event index for performance evaluation
      Double_t              fNEveEffPath;    // sum of weights for Path events
      Double_t              fNEveEffPerf;    // idem for Perf events

      std::vector<Double_t> fAverageSelectorPath; // average of each variable over the range fPathIdx1,2
      std::vector<Double_t> fAverageRulePath;     // average of each rule, same range
      std::vector<Double_t> fAverageSelectorPerf; // average of each variable over the range fPerfIdx1,2
      std::vector<Double_t> fAverageRulePerf;     // average of each rule, same range

      std::vector<Double_t> fGradVec;        // gradient vector - dimension = number of rules in ensemble
      std::vector<Double_t> fGradVecLin;     // gradient vector - dimension = number of variables

      std::vector< std::vector<Double_t> > fGradVecTst;    // gradient vector - one per tau
      std::vector< std::vector<Double_t> > fGradVecLinTst; // gradient vector, linear terms - one per tau
      //
      std::vector<Double_t> fGDErrTst;     // error rates per tau
      std::vector<Char_t>   fGDErrTstOK;   // error rate is sufficiently low <--- stores boolean
      std::vector< std::vector<Double_t> > fGDCoefTst;    // rule coeffs - one per tau
      std::vector< std::vector<Double_t> > fGDCoefLinTst; // linear coeffs - one per tau
      std::vector<Double_t> fGDOfsTst;       // offset per tau
      std::vector< Double_t > fGDTauVec;     // the tau's
      UInt_t                fGDNTauTstOK;    // number of tau in the test-phase that are ok
      UInt_t                fGDNTau;         // number of tau-paths - calculated in SetGDTauPrec
      Double_t              fGDTauPrec;      // precision in tau
      UInt_t                fGDTauScan;      // number scan for tau-paths
      Double_t              fGDTauMin;       // min threshold parameter (tau in eq 26, ref 1)
      Double_t              fGDTauMax;       // max threshold parameter (tau in eq 26, ref 1)
      Double_t              fGDTau;          // selected threshold parameter (tau in eq 26, ref 1)
      Double_t              fGDPathStep;     // step size along path (delta nu in eq 22, ref 1)
      Int_t                 fGDNPathSteps;   // number of path steps
      Double_t              fGDErrScale;     // stop scan at error = scale*errmin
      //
      Double_t              fAverageTruth;   // average truth, ie sum(y)/N, y=+-1
      //
      std::vector<Double_t> fFstar;          // vector of F*() - filled in CalcFStar()
      Double_t              fFstarMedian;    // median value of F*() using 
      //
      TTree                *fGDNtuple;       // Gradient path ntuple, contains params for each step along the path
      Double_t              fNTRisk;         // GD path: risk
      Double_t              fNTErrorRate;    // GD path: error rate (or performance)
      Double_t              fNTNuval;        // GD path: value of nu
      Double_t              fNTCoefRad;      // GD path: 'radius' of all rulecoeffs
      Double_t              fNTOffset;       // GD path: model offset
      Double_t             *fNTCoeff;        // GD path: rule coefficients
      Double_t             *fNTLinCoeff;     // GD path: linear coefficients

      Double_t              fsigave;         // Sigma of current signal score function F(sig)
      Double_t              fsigrms;         // Rms of F(sig)
      Double_t              fbkgave;         // Average of F(bkg)
      Double_t              fbkgrms;         // Rms of F(bkg)

   private:

      mutable MsgLogger*    fLogger;         //! message logger
      MsgLogger& Log() const { return *fLogger; }                       

   };

   // --------------------------------------------------------

   class AbsValue {

   public:

      Bool_t operator()( Double_t first, Double_t second ) const { return TMath::Abs(first) < TMath::Abs(second); }
   };
}


#endif