/usr/include/shogun/converter/MultidimensionalScaling.h is in libshogun-dev 1.1.0-4ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | /*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#ifndef MULTIDIMENSIONALSCALING_H_
#define MULTIDIMENSIONALSCALING_H_
#include <shogun/lib/config.h>
#ifdef HAVE_LAPACK
#include <shogun/converter/EmbeddingConverter.h>
#include <shogun/features/Features.h>
#include <shogun/distance/Distance.h>
namespace shogun
{
class CFeatures;
class CDistance;
/** @brief the class Multidimensionalscaling is used to perform
* multidimensional scaling (capable of landmark approximation
* if requested).
*
* Description of classical embedding is given on p.261 (Section 12.1) of
* Borg, I., & Groenen, P. J. F. (2005).
* Modern multidimensional scaling: Theory and applications. Springer.
*
* Description of landmark MDS approximation is given in
*
* Sparse multidimensional scaling using landmark points
* V De Silva, J B Tenenbaum (2004) Technology, p. 1-4
*
* In this preprocessor the LAPACK routine DSYEVR is used for
* solving an eigenproblem. If ARPACK library is available,
* its routines DSAUPD/DSEUPD are used instead.
*
* Note that target dimension should be set with reasonable value
* (using set_target_dim). In case it is higher than intrinsic
* dimensionality of the dataset 'extra' features of the output
* might be inconsistent (essentially, according to zero or
* negative eigenvalues). In this case a warning is showed.
*
* It is possible to apply multidimensional scaling to any
* given distance using apply_to_distance_matrix method.
* By default euclidean distance is used (with parallel
* instance replaced by preprocessor's one).
*
* Faster landmark approximation is parallel using pthreads.
* As for choice of landmark number it should be at least 3 for
* proper triangulation. For reasonable embedding accuracy greater
* values (30%-50% of total examples number) is pretty good for the
* most tasks.
*/
class CMultidimensionalScaling: public CEmbeddingConverter
{
public:
/* constructor */
CMultidimensionalScaling();
/* destructor */
virtual ~CMultidimensionalScaling();
/** apply preprocessor to CDistance
* @param distance (should be approximate euclidean for consistent result)
* @return new features with distance similar to given as much as possible
*/
virtual CSimpleFeatures<float64_t>* embed_distance(CDistance* distance);
/** apply preprocessor to feature matrix,
* changes feature matrix to the one having target dimensionality
* @param features features which feature matrix should be processed
* @return new feature matrix
*/
virtual CFeatures* apply(CFeatures* features);
/** get name */
const char* get_name() const;
/** get last embedding eigenvectors
* @return vector with last eigenvalues
*/
SGVector<float64_t> get_eigenvalues() const;
/** set number of landmarks
* should be lesser than number of examples and greater than 3
* for consistent embedding as triangulation is used
* @param num number of landmark to be set
*/
void set_landmark_number(int32_t num);
/** get number of landmarks
* @return current number of landmarks
*/
int32_t get_landmark_number() const;
/** setter for landmark parameter
* @param landmark true if landmark embedding should be used
*/
void set_landmark(bool landmark);
/** getter for landmark parameter
* @return true if landmark embedding is used
*/
bool get_landmark() const;
/// HELPERS
protected:
/** default initialization */
virtual void init();
/** classical embedding
* @param distance_matrix distance matrix to be used for embedding
* @return new feature matrix representing given distance
*/
SGMatrix<float64_t> classic_embedding(SGMatrix<float64_t> distance_matrix);
/** landmark embedding (approximate, accuracy varies with m_landmark_num parameter)
* @param distance_matrix distance matrix to be used for embedding
* @return new feature matrix representing given distance matrix
*/
SGMatrix<float64_t> landmark_embedding(SGMatrix<float64_t> distance_matrix);
/** process distance matrix (redefined in isomap, for mds does nothing)
* @param distance_matrix distance matrix
* @return processed distance matrix
*/
virtual SGMatrix<float64_t> process_distance_matrix(SGMatrix<float64_t> distance_matrix);
/// FIELDS
protected:
/** last embedding eigenvalues */
SGVector<float64_t> m_eigenvalues;
/** use landmark approximation? */
bool m_landmark;
/** number of landmarks */
int32_t m_landmark_number;
/// STATIC
protected:
/** run triangulation thread for landmark embedding
* @param p thread parameters
*/
static void* run_triangulation_thread(void* p);
/** subroutine used to shuffle count indexes among of total_count ones
* with Fisher-Yates (known as Knuth too) shuffle algorithm
* @param count number of indexes to be shuffled and returned
* @param total_count total number of indexes
* @return sorted shuffled indexes for landmarks
*/
static SGVector<int32_t> shuffle(int32_t count, int32_t total_count);
};
}
#endif /* HAVE_LAPACK */
#endif /* MULTIDIMENSIONALSCALING_H_ */
|