/usr/include/rdkit/SimDivPickers/HierarchicalClusterPicker.h is in librdkit-dev 201503-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | //
// Copyright (C) 2003-2006 Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#ifndef _HIERARCHCLUSTERPICKER_H
#define _HIERARCHCLUSTERPICKER_H
#include <RDGeneral/types.h>
#include "DistPicker.h"
namespace RDPickers {
/*! \brief Diversity picker based on hierarchical clustering
*
* This class inherits from DistPicker since it uses the distance matrix
* for diversity picking. The clustering itself is done using the Murtagh
* code in $RDBASE/Code/ML/Cluster/Mutagh/
*/
class HierarchicalClusterPicker : public DistPicker {
public:
/*! \brief The type of hierarchical clustering algorithm to use
*/
typedef enum {
WARD=1,
SLINK=2,
CLINK=3,
UPGMA=4,
MCQUITTY=5,
GOWER=6,
CENTROID=7 } ClusterMethod;
/*! \brief Constructor - takes a ClusterMethod as an argument
*
* Sets the hierarch clustering method
*/
explicit HierarchicalClusterPicker(ClusterMethod clusterMethod) : d_method(clusterMethod) {;};
/*! \brief This is the function that does the picking
*
* Here is how the algorithm works \n
* FIX: Supply reference
*
* - The entire pool is clustered using the distance matrix using one of the
* hierachical clustering method (specified via the constructor). \n
* - Starting with the individaul items in the pool, clusters are merged based
* on the output from clustering method. \n
* - The merging is stopped when the number of clusters is same as
* the number of picks.
* - For each item in a cluster the sum of square of the distances to the rest of
* of the items (in the cluster) is computed. The item with the smallest of values is
* picked as a representative of the cluster. Basically trying to pick the item closest
* to the centroid of the cluster.
*
*
* \param distMat - distance matrix - a vector of double. It is assumed that only the
* lower triangle element of the matrix are supplied in a 1D array\n
* NOTE: this matrix WILL BE ALTERED during the picking\n
* \param poolSize - the size of the pool to pick the items from. It is assumed that the
* distance matrix above contains the right number of elements; i.e.
* poolSize*(poolSize-1) \n
* \param pickSize - the number items to pick from pool (<= poolSize)
*/
RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize) const ;
/*! \brief This is the function that does the clustering of the items - used by the picker
*
* ARGUMENTS:
*
* \param distMat - distance matrix - a vector of double. It is assumed that only the
* lower triangle element of the matrix are supplied in a 1D array\n
* NOTE: this matrix WILL BE ALTERED during the picking\n
* \param poolSize - the size of the pool to pick the items from. It is assumed that the
* distance matrix above contains the right number of elements; i.e.
* poolSize*(poolSize-1) \n
* \param pickSize - the number clusters to divide the pool into (<= poolSize)
*/
RDKit::VECT_INT_VECT cluster(const double *distMat, unsigned int poolSize, unsigned int pickSize) const;
private:
ClusterMethod d_method;
};
};
#endif
|