/usr/include/rdkit/GraphMol/FMCS/SubstructureCache.h is in librdkit-dev 201603.5-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | //
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#pragma once
#include <list>
#include <vector>
#include <string>
#include <stdexcept>
#include "../RDKitBase.h"
//#include "../Fingerprints/MorganFingerprints.h"
#include "Graph.h"
#include "Seed.h"
#include "DebugTrace.h" // algorithm filter definitions
namespace RDKit {
namespace FMCS {
class SubstructureCache {
public:
#pragma pack(push, 1)
struct KeyNumericMetrics {
typedef unsigned long long TValue;
TValue Value;
public:
KeyNumericMetrics() : Value(0) {}
};
#pragma pack(pop)
struct HashKey {
KeyNumericMetrics NumericMetrics;
public:
void computeKey(const Seed& seed,
const std::vector<unsigned>& queryAtomLabels,
const std::vector<unsigned>& queryBondLabels) {
computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
}
private:
void computeMorganCodeHash(const Seed& seed,
const std::vector<unsigned>& queryAtomLabels,
const std::vector<unsigned>& queryBondLabels) {
size_t nv = seed.getNumAtoms();
size_t ne = seed.getNumBonds();
std::vector<unsigned long> currCodes(nv);
std::vector<unsigned long> prevCodes(nv);
size_t nIterations = seed.getNumBonds();
if (nIterations > 5) nIterations = 5;
for (unsigned seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms();
seedAtomIdx++)
currCodes[seedAtomIdx] =
queryAtomLabels[seed.MoleculeFragment.AtomsIdx[seedAtomIdx]];
for (size_t iter = 0; iter < nIterations; iter++) {
for (size_t i = 0; i < nv; i++) prevCodes[i] = currCodes[i];
for (size_t seedBondIdx = 0; seedBondIdx < ne; seedBondIdx++) {
const Bond* bond = seed.MoleculeFragment.Bonds[seedBondIdx];
unsigned order =
queryBondLabels[seed.MoleculeFragment.BondsIdx[seedBondIdx]];
unsigned atom1 =
seed.MoleculeFragment.SeedAtomIdxMap.find(bond->getBeginAtomIdx())
->second;
unsigned atom2 =
seed.MoleculeFragment.SeedAtomIdxMap.find(bond->getEndAtomIdx())
->second;
unsigned v1 = prevCodes[atom1];
unsigned v2 = prevCodes[atom2];
currCodes[atom1] += v2 * v2 + (v2 + 23) * (order + 1721);
currCodes[atom2] += v1 * v1 + (v1 + 23) * (order + 1721);
}
}
KeyNumericMetrics::TValue result = 0;
for (unsigned seedAtomIdx = 0; seedAtomIdx < nv; seedAtomIdx++) {
unsigned long code = currCodes[seedAtomIdx];
result += code * (code + 6849) + 29;
}
NumericMetrics.Value = result;
}
// not implemented yet:
/*
void computeFingerprint(const Seed& seed)
{
unsigned int radius = seed.getNumBonds();
if (radius > 5)
radius = 5;
ExplicitBitVect *mf =
RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology,
radius); //SLOW !!!
// ...
delete mf;
NumericMetrics.Field.hasFingerprint = 1;
}
*/
};
typedef HashKey TKey;
typedef std::list<FMCS::Graph> TIndexEntry; // hash-key is not unique key
private:
std::vector<TIndexEntry> ValueStorage;
std::map<KeyNumericMetrics::TValue, size_t> NumericIndex; // TIndexEntry
public:
// returns computed key, and pointer to index entry with a set of subgraphs
// corresponding to the key if found.
// then caller must find exactly matched subgraph in the result set with own
// search algorithm,
// including a resolving of collisions of hash key
TIndexEntry* find(const Seed& seed,
const std::vector<unsigned>& queryAtomLabels,
const std::vector<unsigned>& queryBondLabels,
TKey& key) { // compute key and find entry
key.computeKey(seed, queryAtomLabels, queryBondLabels);
std::map<KeyNumericMetrics::TValue, size_t>::const_iterator entryit =
NumericIndex.find(key.NumericMetrics.Value);
if (NumericIndex.end() != entryit) return &ValueStorage[entryit->second];
return NULL; // not found
}
// if find() did not found any entry for this key of seed a new entry will be
// created
void add(const Seed& seed, TKey& key,
TIndexEntry* entry) { // "compute" value and store it in NEW entry
// if not found
if (!entry) {
try {
ValueStorage.push_back(TIndexEntry());
} catch (...) {
return; // not enought memory room to add the item, but it's just a
// cache
}
entry = &ValueStorage.back();
}
entry->push_back(seed.Topology);
if (!NumericIndex.insert(std::pair<KeyNumericMetrics::TValue, size_t>(
key.NumericMetrics.Value,
ValueStorage.size() - 1)).second)
return; // not enought memory room to add the item, but it is just cache
}
size_t keyssize() const { // for statistics only
return ValueStorage.size();
}
size_t fullsize() const { // for statistics only
size_t n = 0;
for (std::vector<TIndexEntry>::const_iterator e = ValueStorage.begin();
e != ValueStorage.end(); e++)
n += e->size();
return n;
}
};
}
}
|