/usr/include/BALL/QSAR/statistics.h is in libball1.4-dev 1.4.1+20111206-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | /* statistics.h
*
* Copyright (C) 2009 Marcel Schumann
*
* This file is part of QuEasy -- A Toolbox for Automated QSAR Model
* Construction and Validation.
* QuEasy is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* QuEasy is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//
//
#ifndef STATISTICS
#define STATISTICS
#include <vector>
#include <iostream>
#include <BALL/MATHS/LINALG/matrix.h>
#include <cmath>
#include <BALL/MATHS/parsedFunction.h>
#include <set>
namespace BALL
{
namespace QSAR
{
/** class for statistical correction of input-data */
class BALL_EXPORT Statistics
{
public:
/** @name Constructors and Destructors
*/
//@{
/** defaut constructor */
Statistics(){};
/** defaut destructor */
~Statistics(){};
//@}
/** @name static functions
*/
//@{
/** scales the given vector to a variance of 1 */
static void scaling(std::vector<double>& v);
/** scales each column of the given vector-based matrix to a variance of 1 */
static void scaling(std::vector<std::vector<double> >& m);
/** centers each colum of the given vector-based matrix to a variance of 1 and mean of 0 */
static void centering(std::vector<std::vector<double> >& m);
/** centers the given vector to a variance of 1 and mean of 0 */
static void centering(std::vector<double>& v);
/** centers the given vector to a variance of 1 and mean of 0
@param mean the mean of v will be saved here
@param std the standart deviation of v will be saved here */
static void centering(vector<double>& v, double& mean, double& std);
/** calculate variance of a given vector */
static double getVariance(const std::vector<double>& v, double mean=-1);
/** calculate standart deviation of a given vector */
static double getStddev(const vector<double>& v, double mean=-1);
/** calculate covariance between two given vectors */
static double getCovariance(const vector<double>& v1, const vector<double>& v2, double mean1, double mean2);
/** calculate mean of given vector */
static double getMean(const std::vector<double>& v);
//--- methods for calculating mean, covar, var of matrix-ROWS ---
/** calculates covarianve between two rows of the given matrix
@param features_to_use if specified, only the contained features are used for calculation of the covariance */
static double getRowCovariance(const vector<vector<double> >& v, int row1, int row2, double mean1=-1, double mean2=-1, std::multiset<int>* features_to_use=0);
/** calculates mean of a row of the given matrix
@param features_to_use if specified, only the contained features are used for calculation of the mean */
static double getRowMean(const vector<vector<double> >& v, int row, std::multiset<int>* features_to_use=0);
/** calculates variance of a row of the given matrix
@param features_to_use if specified, only the contained features are used for calculation of the variance */
static double getRowVariance(const vector<vector<double> >& v, int row, double mean=-1, std::multiset<int>* features_to_use=0);
/** calculates standard deviation of a row of the given matrix
@param features_to_use if specified, only the contained features are used for calculation of the standard deviation */
static double getRowStddev(const vector<vector<double> >& v, int row, double mean=-1, std::multiset<int>* features_to_use=0);
// ------------------------
/** scales each column of the given newmat-matrix to a variance of 1 */
static void centering(BALL::Matrix<double>& m);
/** scales column col of the given newmat-matrix m to a variance of 1 */
static void centering(BALL::Matrix<double>& m, int col);
/** calculates mean of column col of newmat-matrix m */
static double getMean(const BALL::Matrix<double>& m, int col);
/** calculates variance of column col of newmat-matrix m */
static double getVariance(const BALL::Matrix<double>& m, int col, double mean=-1);
/** calculated standard deviation of column col of matrix m */
static double getStddev(const BALL::Matrix<double>& m, int col, double mean=-1);
/** calculates covariance between columns col1 and col2 of matrix m*/
static double getCovariance(const BALL::Matrix<double>& m, int col1, int col2, double mean1=-1, double mean2=-1);
/** calculates the sum-of-square of column col of newmat-matrix m */
static double sq(const BALL::Matrix<double>& m, int col, double mean=-1);
/** calculates polynomial distance between two rows of newmat-matrix m */
static double distance(const BALL::Matrix<double>& m, int& row1, int& row2, double& p);
/** calculates polynomial distance between row1 of BALL::Matrix<double> m1 and row2 of BALL::Matrix<double> m2.\n
m1 and m2 must therefore have the same number of columns.*/
static double distance(const BALL::Matrix<double>& m1, const BALL::Matrix<double>& m2, int& row1, int& row2, double& p);
/** calculates the distance between row1 of BALL::Matrix<double> m1 and row2 of BALL::Matrix<double> m2 according to the given equations
@param f equation f, determines how elements "x1" of row1 are charged against elements "x2" of row2. Use "x1" and "x2" in the String, e.g. "x1*x2"
@param g equation g, determines what is to be done with the calculated "sum" over all elements (use "sum" in String); e.g. "sum^0.5" => euclidean distance if f=="x1*x2" */
static double distance(const BALL::Matrix<double>& m1, const BALL::Matrix<double>& m2, int& row1, int& row2, String& f, String& g);
/** calculates euclidean distance between row1 of BALL::Matrix<double> m1 and row2 of BALL::Matrix<double> m2 as \f$ sqrt(\sum_i (row1_i-row2_i)^2) \f$\n
m1 and m2 must therefore have the same number of columns.*/
static double euclDistance(const BALL::Matrix<double>& m1, const BALL::Matrix<double>& m2, int row1, int row2);
/** Returns the euclidic norm of a ColumnVector as \f$ sqrt(\sum_i cv_i^2) \f$*/
static double euclNorm(const Vector<double>& cv);
/** Returns the euclidician difference between two ColumnVectors as \f$ sqrt(\sum_i (cv1_i-cv2_i)^2) \f$ */
static double euclDistance(const Vector<double>& c1, const Vector<double>& c2);
/** returns the scalar product \f$ cv^T*cv = \sum_i cv_i^2 \f$ */
static double scalarProduct(const Vector<double>& cv);
//@}
};
}
}
#endif // STATISTICS
|