/usr/include/shogun/statistics/MMDKernelSelectionMedian.h is in libshogun-dev 3.1.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | /*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#ifndef __MMDKERNELSELECTIONMEDIAN_H_
#define __MMDKERNELSELECTIONMEDIAN_H_
#include <shogun/statistics/MMDKernelSelection.h>
namespace shogun
{
/** @brief Implements MMD kernel selection for a number of Gaussian baseline
* kernels via selecting the one with a bandwidth parameter that is closest to
* the median of all pairwise distances in the underlying data. Therefore, it
* only works for data to which a GaussianKernel can be applied, which are
* grouped under the class CDotFeatures in SHOGUN.
*
* This method works reasonable if distinguishing characteristics of data are not
* hidden at a different length-scale that the overall one. In addition it is
* fast to compute. In other cases, it is a bad choice.
*
* Optimal selection of single kernels can be found in the class
* CMMDKernelSelectionOpt
*
* Described among oher places in
* Gretton, A., Borgwardt, K. M., Rasch, M. J., Schoelkopf, B., & Smola, A.
* (2012).
* A Kernel Two-Sample Test. Journal of Machine Learning Research, 13, 671-721.
*/
class CMMDKernelSelectionMedian: public CMMDKernelSelection
{
public:
/** Default constructor */
CMMDKernelSelectionMedian();
/** Constructor that initialises the underlying MMD instance
*
* @param mmd MMD instance to use. Has to be an MMD based kernel two-sample
* test.
* @param num_data_distance Number of points that is used to compute the
* median distance on. Since the median is stable, this do need need to be
* all data, but a small subset is sufficient.
*/
CMMDKernelSelectionMedian(CKernelTwoSampleTestStatistic* mmd,
index_t num_data_distance=1000);
/** Destructor */
virtual ~CMMDKernelSelectionMedian();
/** @return Throws an error and shoold not be used */
virtual SGVector<float64_t> compute_measures();
/** Returns the baseline kernel whose bandwidth parameter is closest to the
* median of the pairwise distances of the underlyinf data
*
* @return selected kernel (SG_REF'ed)
*/
virtual CKernel* select_kernel();
/** @return name of the SGSerializable */
const char* get_name() const { return "MMDKernelSelectionMedian"; }
private:
/* initialises and registers member variables */
void init();
protected:
/** maximum number of data to be used for median distance computation */
index_t m_num_data_distance;
};
}
#endif /* __MMDKERNELSELECTIONMEDIAN_H_ */
|