This file is indexed.

/usr/include/shark/Models/NBClassifier.h is in libshark-dev 3.0.1+ds1-2ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
//===========================================================================
/*!
 * 
 *
 * \brief       Implementation of Naive Bayes classifier
 * 
 * 
 * 
 *
 * \author      B. Li
 * \date        2012
 *
 *
 * \par Copyright 1995-2015 Shark Development Team
 * 
 * <BR><HR>
 * This file is part of Shark.
 * <http://image.diku.dk/shark/>
 * 
 * Shark is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published 
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Shark is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Shark.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
//===========================================================================
#ifndef SHARK_MODEL_NB_CLASSIFIER_H
#define SHARK_MODEL_NB_CLASSIFIER_H

#include "shark/Core/Exception.h"
#include "shark/Core/Math.h"
#include "shark/Models/AbstractModel.h"

#include <boost/foreach.hpp>
#include <boost/noncopyable.hpp>
#include <boost/smart_ptr/shared_ptr.hpp>
#include <boost/static_assert.hpp>

#include <boost/type_traits.hpp>

#include <limits>
#include <utility>
namespace shark {

/// @brief Naive Bayes classifier
///
/// This model summarizes a Naive Bayes classifier, which assumes that the data X is generated by a mixture
/// of class-conditional (i.e., dependent on the value of the class variable Y) distributions. Furthermore, the Naive Bayes
/// assumption introduces the additional constraint that the attribute values Xi are independent of one another within
/// each of these mixture components.
template <class InputType = RealVector, class OutputType = unsigned int>
class NBClassifier :
	public AbstractModel<InputType, OutputType>,
	private boost::noncopyable
{
private:

	typedef AbstractModel<InputType, OutputType> base_type;

public:

	typedef typename base_type::BatchInputType BatchInputType;
	typedef typename base_type::BatchOutputType BatchOutputType;

	/// Type of class distribution
	typedef std::vector<double> ClassPriorsType;

	typedef boost::shared_ptr<AbstractDistribution> AbstractDistPtr;

	/// Type of features distribution
	typedef std::vector<std::vector<AbstractDistPtr> > FeatureDistributionsType;

	/// Size of distribution in format of (number of classes, number of features)
	typedef std::pair<std::size_t, std::size_t> DistSizeType;

	/// Ctor
	/// Will build hypothesis that all features in each class follows Normal distribution
	/// @param classSize size of class
	/// @param featureSize size of feature
	NBClassifier(std::size_t classSize, std::size_t featureSize)
	{
		SIZE_CHECK(classSize > 0u);
		SIZE_CHECK(featureSize > 0u);
		for (std::size_t i = 0; i < classSize; ++i)
		{
			std::vector<AbstractDistPtr> featureDist;
			for (std::size_t j = 0; j < featureSize; ++j)
				featureDist.push_back(AbstractDistPtr(new Normal<DefaultRngType>(Rng::globalRng)));
			m_featureDistributions.push_back(featureDist);
		}
	}

	/// Ctor
	/// The distributions for each feature in each class are given by @a featureDists
	/// @param featureDists the distribution of features
	explicit NBClassifier(FeatureDistributionsType const& featureDists)
	: m_featureDistributions(featureDists)
	{
		SIZE_CHECK(m_featureDistributions.size() > 0u);
	}

	/// \brief From INameable: return the class name.
	std::string name() const
	{ return "NBClassifier"; }

	/// Get a feature distribution for feature @a featureIndex given class @a classIndex
	/// @param classIndex index of class
	/// @param featureIndex index of feature
	/// @return the distribution for feature @a featureIndex given class @a classIndex
	AbstractDistribution& getFeatureDist(std::size_t classIndex, std::size_t featureIndex) const
	{
		SIZE_CHECK(classIndex < m_featureDistributions.size());
		SIZE_CHECK(featureIndex < m_featureDistributions[0].size());

		AbstractDistPtr const& featureDist = m_featureDistributions[classIndex][featureIndex];
		SHARK_ASSERT(featureDist);
		return *featureDist;
	}

	/// Get the size of distribution in format of (class size, feature size)
	/// @return the size of distribution
	DistSizeType getDistSize() const
	{
		SIZE_CHECK(m_featureDistributions.size() > 0u);
		return std::make_pair(m_featureDistributions.size(), m_featureDistributions[0].size());
	}

	using base_type::eval;
	
	boost::shared_ptr<State> createState()const{
		return boost::shared_ptr<State>(new EmptyState());
	}

	/// see AbstractModel::eval
	void eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{
		SIZE_CHECK(m_featureDistributions.size() == m_classPriors.size());
		SIZE_CHECK(m_classPriors.size() > 0u);
		SIZE_CHECK(size(patterns) > 0);
		
		outputs.resize(size(patterns));

		for(std::size_t p = 0; p != size(patterns); ++p){
			OutputType bestProbClass = 0; // just initialized to avoid warning 
			double maxLogProb = - std::numeric_limits<double>::max(); // initialized as smallest negative value

			// For each of possible output values, calculate its corresponding sum-up log prob and return the max one
			for(OutputType classIndex = 0; classIndex != m_classPriors.size(); ++classIndex){
				SIZE_CHECK(patterns.size2() == m_featureDistributions[classIndex].size());
				double const classDistribution = m_classPriors[classIndex];
				// Sum up log prob of each features and prior prob of current class
				// We use log to ensure that the result stays in a valid range of double, even when the propability is very low
				double currentLogProb = safeLog(classDistribution); 
				std::size_t featureIndex = 0u;
				BOOST_FOREACH(AbstractDistPtr const& featureDistribution, m_featureDistributions[classIndex])
					currentLogProb += featureDistribution->logP(patterns(p,featureIndex++));

				// Record the greater one
				if (currentLogProb > maxLogProb)
				{
					maxLogProb = currentLogProb;
					bestProbClass = classIndex;
				}
			}
			SHARK_ASSERT(maxLogProb != - std::numeric_limits<double>::max());//should never happen!
			outputs(p) = bestProbClass;
		}
	}

	/// Set prior distribution of @a class to be @a probability
	/// @param classToAdd the class of which probability will be updated
	/// @param probability the probability of the class
	/// @tparam OutputType the type of output class
	void setClassPrior(OutputType classToAdd, double probability)
	{
		if (classToAdd == m_classPriors.size())
			m_classPriors.push_back(probability);
		else
			throw SHARKEXCEPTION("[NBClassifier] class probability must be added in ascending order.");
	}
	
	/// This model does not have any parameters.
	RealVector parameterVector() const {
		return RealVector();
	}

	/// This model does not have any parameters
	void setParameterVector(const RealVector& param) {
		SHARK_ASSERT(param.size() == 0);
	}

protected:

	/// Feature and class distributions
	///@{
	FeatureDistributionsType m_featureDistributions;
	ClassPriorsType m_classPriors;
	///@}

	/// Output should be integer/discrete type
	BOOST_STATIC_ASSERT(boost::is_integral<OutputType>::value);
};

} // namespace shark {

#endif // SHARK_MODEL_NB_CLASSIFIER_H