RDKit
Open-source cheminformatics and machine learning.
Catalog.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2006 Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_CATALOG_H
13#define RD_CATALOG_H
14
15// Boost graph stuff
17#include <boost/graph/graph_traits.hpp>
18#include <boost/graph/adjacency_list.hpp>
19#include <boost/version.hpp>
20#if BOOST_VERSION >= 104000
21#include <boost/property_map/property_map.hpp>
22#else
23#include <boost/property_map.hpp>
24#endif
26
27// for some typedefs
28#include <RDGeneral/types.h>
29#include <RDGeneral/StreamOps.h>
30
31namespace RDCatalog {
32const int versionMajor = 1;
33const int versionMinor = 0;
34const int versionPatch = 0;
35const int endianId = 0xDEADBEEF;
36
37//-----------------------------------------------------------------------------
38//! abstract base class for a catalog object
39template <class entryType, class paramType>
40class Catalog {
41 public:
42 typedef entryType entryType_t;
43 typedef paramType paramType_t;
44
45 //------------------------------------
46 Catalog() : dp_cParams(nullptr) {}
47
48 //------------------------------------
49 virtual ~Catalog() { delete dp_cParams; }
50
51 //------------------------------------
52 //! return a serialized form of the Catalog as an std::string
53 virtual std::string Serialize() const = 0;
54
55 //------------------------------------
56 //! adds an entry to the catalog
57 /*!
58
59 \param entry the entry to be added
60 \param updateFPLength (optional) if this is true, our internal
61 fingerprint length will also be updated.
62
63 */
64 virtual unsigned int addEntry(entryType *entry,
65 bool updateFPLength = true) = 0;
66
67 //------------------------------------
68 //! returns a particular entry in the Catalog
69 virtual const entryType *getEntryWithIdx(unsigned int idx) const = 0;
70
71 //------------------------------------
72 //! returns the number of entries
73 virtual unsigned int getNumEntries() const = 0;
74
75 //------------------------------------
76 //! returns the length of our fingerprint
77 unsigned int getFPLength() const { return d_fpLength; }
78
79 //------------------------------------
80 //! sets our fingerprint length
81 void setFPLength(unsigned int val) { d_fpLength = val; }
82
83 //------------------------------------
84 //! sets our parameters by copying the \c params argument
85 virtual void setCatalogParams(const paramType *params) {
86 PRECONDITION(params, "bad parameter object");
87 // if we already have a parameter object throw an exception
89 "A parameter object already exists on the catalog");
90 /*
91 if (dp_cParams) {
92 // we already have parameter object on the catalog
93 // can't overwrite it
94 PRECONDITION(0, "A parameter object already exist on the catalog");
95 }*/
96 dp_cParams = new paramType(*params);
97 }
98
99 //------------------------------------
100 //! returns a pointer to our parameters
101 const paramType *getCatalogParams() const { return dp_cParams; }
102
103 protected:
104 // this is the ID that will be assigned to the next entry
105 // added to the catalog - need not be same as the number of entries
106 // in the catalog and does not correspond with the
107 // id of the entry in the catalog.
108 // this is more along the lines of bitId
109 unsigned int d_fpLength{0}; //!< the length of our fingerprint
110 paramType *dp_cParams; //!< our params object
111};
112
113//-----------------------------------------------------------------------------
114//! A Catalog with a hierarchical structure
115/*!
116
117 The entries of a HierarchCatalog are arranged in a directed graph
118
119 <b>The difference between <i>Indices</i> and <i>Bit Ids</i></b>
120
121 A HierarchCatalog may contain more entries than the user is actually
122 interested in. For example a HierarchCatalog constructed to contain
123 orders 5 through 8 may well contain information about orders 1-5,
124 in order to facilitate some search optimizations.
125
126 - <i>Bit Ids</i> refer to the "interesting" bits.
127 So, in the above example, Bit Id \c 0 will be the first entry
128 with order 5.
129 - <i>Indices</i> refer to the underlying structure of the catalog.
130 So, in the above example, the entry with index \c 0 will be
131 the first entry with order 1.
132
133*/
134template <class entryType, class paramType, class orderType>
135class HierarchCatalog : public Catalog<entryType, paramType> {
136 // the entries in the catalog can be traversed using the edges
137 // in a desired order
138 public:
139 //! used by the BGL to set up the node properties in our graph
141 enum { num = 1003 };
142 typedef boost::vertex_property_tag kind;
143 };
144 typedef boost::property<vertex_entry_t, entryType *> EntryProperty;
145
146 //! the type of the graph itself:
147 typedef boost::adjacency_list<
148 boost::vecS,
149 boost::vecS, // FIX: should be using setS for edges so that parallel
150 // edges are never added (page 225 BGL book)
151 // but that seems result in compile errors
152 boost::bidirectionalS, EntryProperty>
154
155 typedef boost::graph_traits<CatalogGraph> CAT_GRAPH_TRAITS;
156 typedef typename CAT_GRAPH_TRAITS::vertex_iterator VER_ITER;
157 typedef std::pair<VER_ITER, VER_ITER> ENT_ITER_PAIR;
158 typedef typename CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER;
159 typedef std::pair<DOWN_ENT_ITER, DOWN_ENT_ITER> DOWN_ENT_ITER_PAIR;
160
161 //------------------------------------
163
164 //------------------------------------
165 //! Construct by making a copy of the input \c params object
167 : Catalog<entryType, paramType>() {
168 this->setCatalogParams(params);
169 }
170
171 //------------------------------------
172 //! Construct from a \c pickle (a serialized form of the HierarchCatalog)
173 HierarchCatalog<entryType, paramType, orderType>(const std::string &pickle) {
174 this->initFromString(pickle);
175 }
176
177 //------------------------------------
178 ~HierarchCatalog() override { destroy(); }
179
180 //------------------------------------
181 //! serializes this object to a stream
182 void toStream(std::ostream &ss) const {
183 PRECONDITION(this->getCatalogParams(), "NULL parameter object");
184
185 // the i/o header:
190
191 // information about the catalog itself:
192 int tmpUInt;
193 tmpUInt = this->getFPLength();
194 RDKit::streamWrite(ss, tmpUInt);
195 tmpUInt = this->getNumEntries();
196 RDKit::streamWrite(ss, tmpUInt);
197
198 // std::cout << ">>>>-------------------------------" << std::endl;
199 // std::cout << "\tlength: " << getFPLength() << " " << getNumEntries() <<
200 // std::endl;
201
202 // add the params object:
203 this->getCatalogParams()->toStream(ss);
204 // std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
205 // std::cout << " " << getCatalogParams()->getUpperFragLength();
206 // std::cout << " " << getCatalogParams()->getNumFuncGroups();
207 // std::cout << std::endl;
208
209 // write the entries in order:
210 for (unsigned int i = 0; i < getNumEntries(); i++) {
211 this->getEntryWithIdx(i)->toStream(ss);
212 }
213
214 // finally the adjacency list:
215 for (unsigned int i = 0; i < getNumEntries(); i++) {
216 RDKit::INT_VECT children = this->getDownEntryList(i);
217 tmpUInt = static_cast<unsigned int>(children.size());
218 RDKit::streamWrite(ss, tmpUInt);
219 for (RDKit::INT_VECT::const_iterator ivci = children.begin();
220 ivci != children.end(); ivci++) {
221 RDKit::streamWrite(ss, *ivci);
222 }
223 }
224 }
225
226 //------------------------------------
227 //! serializes this object and returns the resulting \c pickle
228 std::string Serialize() const override {
229 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
230 std::ios_base::in);
231 this->toStream(ss);
232 return ss.str();
233 }
234
235 //------------------------------------
236 //! fills the contents of this object from a stream containing a \c pickle
237 void initFromStream(std::istream &ss) {
238 int tmpInt;
239 // FIX: at the moment we ignore the header info:
240 RDKit::streamRead(ss, tmpInt);
241 RDKit::streamRead(ss, tmpInt);
242 RDKit::streamRead(ss, tmpInt);
243 RDKit::streamRead(ss, tmpInt);
244
245 unsigned int tmpUInt;
246 RDKit::streamRead(ss, tmpUInt); // fp length
247 this->setFPLength(tmpUInt);
248
249 unsigned int numEntries;
250 RDKit::streamRead(ss, numEntries);
251 // std::cout << "<<<-------------------------------" << std::endl;
252 // std::cout << "\tlength: " << getFPLength() << " " << numEntries <<
253 // std::endl;
254
255 // grab the params:
256 paramType *params = new paramType();
257 params->initFromStream(ss);
258 this->setCatalogParams(params);
259 delete params;
260
261 // std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
262 // std::cout << " " << getCatalogParams()->getUpperFragLength();
263 // std::cout << " " << getCatalogParams()->getNumFuncGroups();
264 // std::cout << std::endl;
265
266 // now all of the entries:
267 for (unsigned int i = 0; i < numEntries; i++) {
268 entryType *entry = new entryType();
269 entry->initFromStream(ss);
270 this->addEntry(entry, false);
271 }
272
273 // and, finally, the adjacency list:
274 for (unsigned int i = 0; i < numEntries; i++) {
275 unsigned int nNeighbors;
276 RDKit::streamRead(ss, nNeighbors);
277 for (unsigned int j = 0; j < nNeighbors; j++) {
278 RDKit::streamRead(ss, tmpInt);
279 this->addEdge(i, tmpInt);
280 }
281 }
282 }
283
284 //------------------------------------
285 unsigned int getNumEntries() const override {
286 return static_cast<unsigned int>(boost::num_vertices(d_graph));
287 }
288
289 //------------------------------------
290 //! fills the contents of this object from a string containing a \c pickle
291 void initFromString(const std::string &text) {
292 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
293 std::ios_base::in);
294 // initialize the stream:
295 ss.write(text.c_str(), text.length());
296 // now start reading out values:
297 this->initFromStream(ss);
298 }
299
300 //------------------------------------
301 //! add a new entry to the catalog
302 /*!
303
304 \param entry the entry to be added
305 \param updateFPLength (optional) if this is true, our internal
306 fingerprint length will also be updated.
307
308 */
309 unsigned int addEntry(entryType *entry, bool updateFPLength = true) override {
310 PRECONDITION(entry, "bad arguments");
311 if (updateFPLength) {
312 unsigned int fpl = this->getFPLength();
313 entry->setBitId(fpl);
314 fpl++;
315 this->setFPLength(fpl);
316 }
317 unsigned int eid = static_cast<unsigned int>(
318 boost::add_vertex(EntryProperty(entry), d_graph));
319 orderType etype = entry->getOrder();
320 // REVIEW: this initialization is not required: the STL map, in
321 // theory, will create a new object when operator[] is called
322 // for a new item
323 if (d_orderMap.find(etype) == d_orderMap.end()) {
324 RDKit::INT_VECT nets;
325 d_orderMap[etype] = nets;
326 }
327 d_orderMap[etype].push_back(eid);
328 return eid;
329 }
330
331 //------------------------------------
332 //! adds an edge between two entries in the catalog
333 /*!
334 Since we are using a bidirectional graph - the order in
335 which the ids are supplied here makes a difference
336
337 \param id1 index of the edge's beginning
338 \param id2 index of the edge's end
339
340 */
341 void addEdge(unsigned int id1, unsigned int id2) {
342 unsigned int nents = getNumEntries();
343 URANGE_CHECK(id1, nents);
344 URANGE_CHECK(id2, nents);
345 // FIX: if we boost::setS for the edgeList BGL will
346 // do the checking for duplicity (parallel edges)
347 // But for reasons unknown setS results in compile
348 // errors while using adjacent_vertices.
349 typename CAT_GRAPH_TRAITS::edge_descriptor edge;
350 bool found;
351 boost::tie(edge, found) = boost::edge(boost::vertex(id1, d_graph),
352 boost::vertex(id2, d_graph), d_graph);
353 if (!found) {
354 boost::add_edge(id1, id2, d_graph);
355 }
356 }
357
358 //------------------------------------
359 //! returns a pointer to our entry with a particular index
360 const entryType *getEntryWithIdx(unsigned int idx) const override {
362 int vd = static_cast<int>(boost::vertex(idx, d_graph));
363 typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
364 pMap = boost::get(vertex_entry_t(), d_graph);
365 return pMap[vd];
366 }
367
368 //------------------------------------
369 //! returns a pointer to our entry with a particular bit ID
370 const entryType *getEntryWithBitId(unsigned int idx) const {
371 URANGE_CHECK(idx, this->getFPLength());
372 typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
373 pMap = boost::get(vertex_entry_t(), d_graph);
374 const entryType *res = nullptr;
375 for (unsigned int i = idx; i < this->getNumEntries(); i++) {
376 const entryType *e = pMap[i];
377 if (e->getBitId() == static_cast<int>(idx)) {
378 res = e;
379 break;
380 }
381 }
382 return res;
383 }
384
385 //------------------------------------
386 //! returns the index of the entry with a particular bit ID
387 int getIdOfEntryWithBitId(unsigned int idx) const {
388 URANGE_CHECK(idx, this->getFPLength());
389 typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
390 pMap = boost::get(vertex_entry_t(), d_graph);
391 int res = -1;
392 for (unsigned int i = idx; i < this->getNumEntries(); i++) {
393 const entryType *e = pMap[i];
394 if (static_cast<unsigned int>(e->getBitId()) == idx) {
395 res = i;
396 break;
397 }
398 }
399 return res;
400 }
401
402 //------------------------------------
403 //! returns a list of the indices of entries below the one passed in
404 RDKit::INT_VECT getDownEntryList(unsigned int idx) const {
405 RDKit::INT_VECT res;
406 DOWN_ENT_ITER nbrIdx, endIdx;
407 boost::tie(nbrIdx, endIdx) = boost::adjacent_vertices(idx, d_graph);
408 while (nbrIdx != endIdx) {
409 res.push_back(static_cast<int>(*nbrIdx));
410 nbrIdx++;
411 }
412 // std::cout << res.size() << "\n";
413 return res;
414 }
415
416 //------------------------------------
417 //! returns a list of the indices that have a particular order
418 const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) {
419 return d_orderMap[ord];
420 }
421
422 //------------------------------------
423 //! returns a list of the indices that have a particular order
424 /*!
425 \overload
426 */
427 const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) const {
428 typename std::map<orderType, RDKit::INT_VECT>::const_iterator elem;
429 elem = d_orderMap.find(ord);
431 elem != d_orderMap.end(),
432 " catalog does not contain any entries of the order specified");
433 return elem->second;
434 }
435
436 private:
437 // graphs that store the entries in the catalog in a hierarchical manner
438 CatalogGraph d_graph;
439 // a map that maps the order type of entries in the catalog to
440 // a vector of vertex indices in the graphs above
441 // e.g. for a catalog with molecular fragments, the order of a fragment can
442 // simply be the number of bond in it. The list this oder maps to is all the
443 // vertex ids of these fragment in the catalog that have this many bonds in
444 // them
445 std::map<orderType, RDKit::INT_VECT> d_orderMap;
446
447 //------------------------------------
448 //! clear any memory that we've used
449 void destroy() {
450 ENT_ITER_PAIR entItP = boost::vertices(d_graph);
451 typename boost::property_map<CatalogGraph, vertex_entry_t>::type pMap =
452 boost::get(vertex_entry_t(), d_graph);
453 while (entItP.first != entItP.second) {
454 delete pMap[*(entItP.first++)];
455 }
456 }
457};
458
459} // namespace RDCatalog
460
461#endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define URANGE_CHECK(x, hi)
Definition: Invariant.h:142
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
abstract base class for a catalog object
Definition: Catalog.h:40
virtual std::string Serialize() const =0
return a serialized form of the Catalog as an std::string
virtual unsigned int addEntry(entryType *entry, bool updateFPLength=true)=0
adds an entry to the catalog
unsigned int getFPLength() const
returns the length of our fingerprint
Definition: Catalog.h:77
virtual void setCatalogParams(const paramType *params)
sets our parameters by copying the params argument
Definition: Catalog.h:85
entryType entryType_t
Definition: Catalog.h:42
virtual unsigned int getNumEntries() const =0
returns the number of entries
const paramType * getCatalogParams() const
returns a pointer to our parameters
Definition: Catalog.h:101
virtual ~Catalog()
Definition: Catalog.h:49
paramType * dp_cParams
our params object
Definition: Catalog.h:110
unsigned int d_fpLength
the length of our fingerprint
Definition: Catalog.h:109
void setFPLength(unsigned int val)
sets our fingerprint length
Definition: Catalog.h:81
paramType paramType_t
Definition: Catalog.h:43
virtual const entryType * getEntryWithIdx(unsigned int idx) const =0
returns a particular entry in the Catalog
A Catalog with a hierarchical structure.
Definition: Catalog.h:135
boost::graph_traits< CatalogGraph > CAT_GRAPH_TRAITS
Definition: Catalog.h:155
CAT_GRAPH_TRAITS::vertex_iterator VER_ITER
Definition: Catalog.h:156
CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER
Definition: Catalog.h:158
const entryType * getEntryWithBitId(unsigned int idx) const
returns a pointer to our entry with a particular bit ID
Definition: Catalog.h:370
const RDKit::INT_VECT & getEntriesOfOrder(orderType ord) const
returns a list of the indices that have a particular order
Definition: Catalog.h:427
RDKit::INT_VECT getDownEntryList(unsigned int idx) const
returns a list of the indices of entries below the one passed in
Definition: Catalog.h:404
int getIdOfEntryWithBitId(unsigned int idx) const
returns the index of the entry with a particular bit ID
Definition: Catalog.h:387
unsigned int getNumEntries() const override
returns the number of entries
Definition: Catalog.h:285
const entryType * getEntryWithIdx(unsigned int idx) const override
returns a pointer to our entry with a particular index
Definition: Catalog.h:360
std::string Serialize() const override
serializes this object and returns the resulting pickle
Definition: Catalog.h:228
void initFromStream(std::istream &ss)
fills the contents of this object from a stream containing a pickle
Definition: Catalog.h:237
boost::adjacency_list< boost::vecS, boost::vecS, boost::bidirectionalS, EntryProperty > CatalogGraph
the type of the graph itself:
Definition: Catalog.h:153
std::pair< VER_ITER, VER_ITER > ENT_ITER_PAIR
Definition: Catalog.h:157
const RDKit::INT_VECT & getEntriesOfOrder(orderType ord)
returns a list of the indices that have a particular order
Definition: Catalog.h:418
void initFromString(const std::string &text)
fills the contents of this object from a string containing a pickle
Definition: Catalog.h:291
~HierarchCatalog() override
Definition: Catalog.h:178
boost::property< vertex_entry_t, entryType * > EntryProperty
Definition: Catalog.h:144
std::pair< DOWN_ENT_ITER, DOWN_ENT_ITER > DOWN_ENT_ITER_PAIR
Definition: Catalog.h:159
unsigned int addEntry(entryType *entry, bool updateFPLength=true) override
add a new entry to the catalog
Definition: Catalog.h:309
void addEdge(unsigned int id1, unsigned int id2)
adds an edge between two entries in the catalog
Definition: Catalog.h:341
void toStream(std::ostream &ss) const
serializes this object to a stream
Definition: Catalog.h:182
const int endianId
Definition: Catalog.h:35
const int versionMinor
Definition: Catalog.h:33
const int versionMajor
Definition: Catalog.h:32
const int versionPatch
Definition: Catalog.h:34
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
std::vector< int > INT_VECT
Definition: types.h:277
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition: StreamOps.h:282
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition: StreamOps.h:260
used by the BGL to set up the node properties in our graph
Definition: Catalog.h:140
boost::vertex_property_tag kind
Definition: Catalog.h:142
@ num
Definition: Catalog.h:141