/usr/include/rdkit/SimDivPickers/HierarchicalClusterPicker.h is in librdkit-dev 201603.5+dfsg-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | //
// Copyright (C) 2003-2006 Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#ifndef _HIERARCHCLUSTERPICKER_H
#define _HIERARCHCLUSTERPICKER_H
#include <RDGeneral/types.h>
#include "DistPicker.h"
namespace RDPickers {
/*! \brief Diversity picker based on hierarchical clustering
*
* This class inherits from DistPicker since it uses the distance matrix
* for diversity picking. The clustering itself is done using the Murtagh
* code in $RDBASE/Code/ML/Cluster/Mutagh/
*/
class HierarchicalClusterPicker : public DistPicker {
public:
/*! \brief The type of hierarchical clustering algorithm to use
*/
typedef enum {
WARD = 1,
SLINK = 2,
CLINK = 3,
UPGMA = 4,
MCQUITTY = 5,
GOWER = 6,
CENTROID = 7
} ClusterMethod;
/*! \brief Constructor - takes a ClusterMethod as an argument
*
* Sets the hierarch clustering method
*/
explicit HierarchicalClusterPicker(ClusterMethod clusterMethod)
: d_method(clusterMethod) {
;
};
/*! \brief This is the function that does the picking
*
* Here is how the algorithm works \n
* FIX: Supply reference
*
* - The entire pool is clustered using the distance matrix using one of the
* hierachical clustering method (specified via the constructor). \n
* - Starting with the individaul items in the pool, clusters are merged based
* on the output from clustering method. \n
* - The merging is stopped when the number of clusters is same as
* the number of picks.
* - For each item in a cluster the sum of square of the distances to the rest
*of
* of the items (in the cluster) is computed. The item with the smallest of
*values is
* picked as a representative of the cluster. Basically trying to pick the
*item closest
* to the centroid of the cluster.
*
*
* \param distMat - distance matrix - a vector of double. It is assumed
*that only the
* lower triangle element of the matrix are supplied in a 1D
*array\n
* NOTE: this matrix WILL BE ALTERED during the picking\n
* \param poolSize - the size of the pool to pick the items from. It is
*assumed that the
* distance matrix above contains the right number of elements;
*i.e.
* poolSize*(poolSize-1) \n
* \param pickSize - the number items to pick from pool (<= poolSize)
*/
RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize,
unsigned int pickSize) const;
/*! \brief This is the function that does the clustering of the items - used
*by the picker
*
* ARGUMENTS:
*
* \param distMat - distance matrix - a vector of double. It is assumed that
*only the
* lower triangle element of the matrix are supplied in a 1D
*array\n
* NOTE: this matrix WILL BE ALTERED during the picking\n
* \param poolSize - the size of the pool to pick the items from. It is
*assumed that the
* distance matrix above contains the right number of elements;
*i.e.
* poolSize*(poolSize-1) \n
* \param pickSize - the number clusters to divide the pool into (<=
*poolSize)
*/
RDKit::VECT_INT_VECT cluster(const double *distMat, unsigned int poolSize,
unsigned int pickSize) const;
private:
ClusterMethod d_method;
};
};
#endif
|