This file is indexed.

/usr/include/BALL/QSAR/regressionValidation.h is in libball1.4-dev 1.4.1+20111206-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/* regressionValidation.h
 * 
 * Copyright (C) 2009 Marcel Schumann
 * 
 * This file is part of QuEasy -- A Toolbox for Automated QSAR Model
 * Construction and Validation.
 * QuEasy is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at
 * your option) any later version.
 * 
 * QuEasy is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//
//

#ifndef REGVALIDATION
#define REGVALIDATION

#ifndef QSARDATA
#include <BALL/QSAR/QSARData.h>
#endif

#ifndef VALIDATION
#include <BALL/QSAR/validation.h>
#endif

#include <gsl/gsl_randist.h>
#include <gsl/gsl_cdf.h>
#include <iterator>


#include <BALL/MATHS/LINALG/matrix.h>
#include <BALL/MATHS/LINALG/vector.h>



namespace BALL
{	
	namespace QSAR
	{
		class RegressionModel;
		/** class for validation of QSAR regression models */
		class BALL_EXPORT RegressionValidation : public Validation
		{	
			public:
				/** @name Constructors and Destructors
				 */
				//@{
				/** constructor
				@param m pointer to the regression model, which the object of this class should test */
				RegressionValidation(RegressionModel* m);

				~RegressionValidation();
				//@}
				
				
				/** @name Accessors
				 */
				//@{
				/** starts k-fold cross validation \n
				@param k no of cross validation folds
				@param restore if restore==1, Model.descriptor_matrix and RegressionModel.training_result is restored after cross validation */
				void crossValidation(int k, bool restore=1);
				
				/** starts k-fold cross validation \n
				@param k no of cross validation folds
				@param restore if restore==1, Model.descriptor_matrix and RegressionModel.training_result is restored after cross validation 
				@param results pointer to vector that should take all matrices RegressionModel.training_result produced during this cross validation run */
				void crossValidation(int k, vector<BALL::Matrix<double> >* results, bool restore=1);
				
				
				/** starts bootstrapping with k samples \n
				@param k no of bootstrap samples
				@param restore if restore==1, Model.descriptor_matrix and RegressionModel.training_result is restored after bootstrapping */
				void bootstrap(int k, bool restore=1);
				
				/** starts bootstrapping with k samples \n
				@param k no of bootstrap samples
				@param restore if restore==1, Model.descriptor_matrix and RegressionModel.training_result is restored after bootstrapping
				@param results pointer to vector that should take all matrices RegressionModel.training_result produced during this bootstrapping */
				void bootstrap(int k, vector<BALL::Matrix<double> >* results, bool restore=1);
				
				void bootstrap1(int k, vector<BALL::Matrix<double> >* results, bool restore=1);
				
				/** Y randomization test \n
				Randomizes all columns of model.Y, trains the model, runs crossValidation and testInputData and saves the resulting R2 and Q2 value to a matrix with 2 columns; the R2 values makeing up the first colum, the Q2 value the second.
				@param runs this is repeated as often as specified by 'runs' */
				const BALL::Matrix<double>& yRandomizationTest(int runs, int k);
				
				/** get the Q^2 value.\n
				If no cross-validation has been done yet, -1 is returned */
				double getQ2();
				
				/** get the R^2 value.\n
				If testInputData() has not been run yet, -1 is returned */
				double getR2();		
				
				/** get the F-value as calculated by testInputData().\n
				If testInputData() has not been run yet, -1 is returned */
				double getFregr();
						
				/** get the F-value as calculated by cross validation.\n
				If crossValidation() has not been run yet, -1 is returned */
				double getFcv();
				
				double getCVRes();
				
				double getFitRes();
				
				/** returns the maximal error of the prediction */
				double getMaxError();
				
				void setCVRes(double d);
		
				/** set the Q^2 value */
				void setQ2(double d);
				
				void testInputData(bool transform=0);
				
				/** select the desired statistic to be used for validating the models
				@param s if (s==1) R^2 and Q^2 are used \n
					if(s==2) F_regr and F_cv are used. */
				void selectStat(int s);
				
				/** calculates standart deviations for all predicted coefficients and saves them to coefficient_stddev_ \n
				@param b if b==1, bootstrapping is used; else: cross-validation
				@param k number of bootstrap samples resp. cross-validation steps */
				void calculateCoefficientStdErrors(int k, bool b=1);
			
				/** returns a const pointer to the matrix containing the standart deviations of all predicted coefficients */
				const BALL::Matrix<double>* getCoefficientStdErrors();
				
				void setCoefficientStdErrors(const BALL::Matrix<double>* stddev);
				
				void saveToFile(string filename) const;
				
				void saveToFile(string filename, const double& r2, const double& q2, const Matrix<double>& coefficient_stddev, const Matrix<double>& yRand_results) const;
				
				void readFromFile(string filename);
				//@}
				
	
			private:
				
				struct BackupData
				{
					Matrix<double> descriptor_matrix;
					Matrix<double> training_result;
					Matrix<double> Y;
					Matrix<double> K;
					Matrix<double> latent_variables;
					Matrix<double> loadings;
					Matrix<double> weights;
				};
				
				
				/** @name Accessors
				 */
				//@{	
				/** Tests the current model with all substances in the (unchanged) test data set */
				void testAllSubstances(bool transform);
				
				void backupTrainingResults();
				
				void restoreTrainingResults();
				//@}
				
				
				/** @name Attributes
				 */
				//@{
				
				double ssR_;
				
				double ssE_;
				
				/** the sum of squares of the response */
				double ssY_;
				
				/** standart error */
				double std_err_;
	
				/** Q^2-value as calculated after cross-validation */
				double Q2_;
	
				/** F-value as calculated after cross-validation */
				double F_cv_;
				
				/** F-value as calculated after regression for input data  */
				double F_regr_;
	
				double R2_;
				
				double max_error_;
				
				/** the quality as calcated by the last call of testAllSubstances() according to the chose quality-statistic */
				double quality_;
				
				double (RegressionValidation::* predQualFetcher_)();
				
				double (RegressionValidation::* fitQualFetcher_)();
				
				void calculateQOF();
				
				/** contains the standart deviations of all predicted coefficients in one column for each modelled activity */
				BALL::Matrix<double> coefficient_stderr_;
				
				/** pointer to the regression model, which the object of this class should test */
				RegressionModel* regr_model_;
				
				BackupData backup_data_;
				
				void (RegressionValidation::* qualCalculation)();
				//@}				
				
		};
	}
}



#endif // REGVALIDATION