RDKit
Open-source cheminformatics and machine learning.
EnumerationStrategyBase.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 #include <RDGeneral/export.h>
33 #ifndef ENUMERATION_STRATEGY_H
34 #define ENUMERATION_STRATEGY_H
35 
36 #include "EnumerateTypes.h"
37 #include "../Reaction.h"
38 #include <utility>
39 #include <vector>
41 #include <cstdint>
42 #ifdef RDK_USE_BOOST_SERIALIZATION
43 #include <boost/serialization/assume_abstract.hpp>
44 #include <boost/serialization/vector.hpp>
45 // the next two includes need to be there for boost 1.56
46 #include <boost/serialization/singleton.hpp>
47 #include <boost/serialization/extended_type_info.hpp>
48 #include <boost/serialization/shared_ptr.hpp>
49 #endif
51 
52 #include <GraphMol/RDKitBase.h>
53 
54 namespace RDKit {
55 
56 //! class for flagging enumeration strategy errors
58  : public std::exception {
59  public:
60  EnumerationStrategyException(const char *msg) : _msg(msg) {}
61  EnumerationStrategyException(std::string msg) : _msg(std::move(msg)) {}
62  const char *what() const noexcept override { return _msg.c_str(); }
63  ~EnumerationStrategyException() noexcept override = default;
64 
65  private:
66  std::string _msg;
67 };
68 
69 //! Return the number of elements per input vector
70 /*! \param bbs vector<vector<T> >
71 
72  \result vector<unint64_t> number of elements in each vector
73  */
74 template <class T>
75 EnumerationTypes::RGROUPS getSizesFromBBs(
76  const std::vector<std::vector<T>> &bbs) {
78  for (size_t i = 0; i < bbs.size(); ++i) {
79  sizes.push_back(bbs[i].size());
80  }
81  return sizes;
82 }
83 
84 //! getSizesFromReactants
85 //! Helper function for enumeration, bbs are stored in a
86 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
87 //
89  const std::vector<MOL_SPTR_VECT> &bbs);
90 
91 //! getReactantsFromRGroups
92 //! Helper function for enumeration, bbs are stored in a
93 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
94 //
96 getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
97  const EnumerationTypes::RGROUPS &rgroups);
98 
99 //! computeNumProducts
100 //! Returns the number of possible product combination from
101 //! The given numbers of building blocks for each rgroup
102 //! or EnumerationStrategyBase::EnumerationOverflow if the
103 //! number will not fit into the machines integer type.
104 //! n.b. An overflow simply means there are a lot of products
105 //! not that they cannot be enumerated
107  const EnumerationTypes::RGROUPS &sizes);
108 
109 //! Base Class for enumeration strategies
110 //! Usage:
111 //! EnumerationStrategyBase must be initialized with both a reaction
112 //! and the building block (molecule) vector to be sampled.
113 //!
114 //! \verbatim
115 //! EnumerationStrategyBase &eb = ...
116 //! if(eb) { // can we get another entry
117 //! const std::vector<int> &v = eb.next();
118 //! v[0] // RGroup 0 position
119 //! v[1] // RGroup 1 position...
120 //! }
121 //! \endverbatim
122 
124  protected:
125  EnumerationTypes::RGROUPS m_permutation; // where are we currently?
127  m_permutationSizes; // m_permutationSizes num bbs per group
128  boost::uint64_t
129  m_numPermutations{}; // total number of permutations for this group
130  // -1 if > ssize_t::max
131  public:
132  static const boost::uint64_t EnumerationOverflow =
133  static_cast<boost::uint64_t>(-1);
134  EnumerationStrategyBase() : m_permutation(), m_permutationSizes() {}
135 
137 
138  virtual const char *type() const { return "EnumerationStrategyBase"; }
139 
140  //! Initialize the enumerator based on the reaction and the
141  //! supplied building blocks
142  //! This is the standard API point.
143  //! This calls the derived class's initializeStrategy method which must be
144  //! implemented
145  void initialize(const ChemicalReaction &reaction,
146  const EnumerationTypes::BBS &building_blocks) {
147  // default initialization, may be overridden (sets the # reactants
148  // and computes the default # of permutations)
149  m_permutationSizes = getSizesFromBBs(building_blocks);
150  m_permutation.resize(m_permutationSizes.size());
151 
152  m_numPermutations = computeNumProducts(m_permutationSizes);
153  std::fill(m_permutation.begin(), m_permutation.end(), 0);
154 
155  initializeStrategy(reaction, building_blocks);
156  }
157 
158  // ! Initialize derived class. Must exist.
159  // ! EnumerationStrategyBase structures are already initialized:
160  // ! m_permutationSizes - [ length of building blocks for each reactant set ]
161  // ! m_numPermutations - number of possible permutations
162  // ! ( -1 if not computable )
163  // ! m_permutation - the first permutation, always the first supplied
164  // ! reactants
165  virtual void initializeStrategy(
166  const ChemicalReaction &reaction,
167  const EnumerationTypes::BBS &building_blocks) = 0;
168 
169  //! returns true if there are more permutations left
170  //! random enumerators may always return true...
171  virtual operator bool() const = 0;
172 
173  //! The current permutation {r1, r2, ...}
174  virtual const EnumerationTypes::RGROUPS &next() = 0;
175 
176  //! copy the enumeration strategy complete with current state
177  virtual EnumerationStrategyBase *copy() const = 0;
178 
179  //! The current position in the enumeration
180  const EnumerationTypes::RGROUPS &getPosition() const { return m_permutation; }
181 
182  //! a result of EnumerationOverflow indicates that the number of
183  //! permutations is not computable with the current
184  //! rdlonglong size.
185  boost::uint64_t getNumPermutations() const { return m_numPermutations; }
186 
187  //! Returns how many permutations have been processed by this strategy
188  virtual boost::uint64_t getPermutationIdx() const = 0;
189 
190  //! Skip the specified number of permutations (useful for
191  //! resetting state to a known position)
192  bool skip(boost::uint64_t skipCount) {
193  for (boost::uint64_t i = 0; i < skipCount; ++i) {
194  next();
195  }
196  return true;
197  }
198 
199  protected:
200  //! Initialize the internal data structures
201  //! i.e. RGROUPS = {10,40,50};
203  m_permutation.resize(rgroups.size());
204  m_permutationSizes = rgroups;
205  m_numPermutations = computeNumProducts(m_permutationSizes);
206  std::fill(m_permutation.begin(), m_permutation.end(), 0);
207  }
208 
209  private:
210  friend class boost::serialization::access;
211  template <class Archive>
212  void serialize(Archive &ar, const unsigned int /*version*/) {
213  ar &m_permutation;
214  ar &m_permutationSizes;
215  ar &m_numPermutations;
216  }
217 };
218 #ifdef RDK_USE_BOOST_SERIALIZATION
219 BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerationStrategyBase)
220 #endif
221 } // namespace RDKit
222 
223 #ifdef RDK_USE_BOOST_SERIALIZATION
224 BOOST_CLASS_VERSION(RDKit::EnumerationStrategyBase, 1)
225 #endif
226 
227 #endif
pulls in the core RDKit functionality
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:121
virtual EnumerationStrategyBase * copy() const =0
copy the enumeration strategy complete with current state
void initialize(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)
const EnumerationTypes::RGROUPS & getPosition() const
The current position in the enumeration.
bool skip(boost::uint64_t skipCount)
virtual const EnumerationTypes::RGROUPS & next()=0
The current permutation {r1, r2, ...}.
virtual const char * type() const
virtual boost::uint64_t getPermutationIdx() const =0
Returns how many permutations have been processed by this strategy.
virtual void initializeStrategy(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)=0
EnumerationTypes::RGROUPS m_permutation
void internalInitialize(const EnumerationTypes::RGROUPS &rgroups)
boost::uint64_t getNumPermutations() const
EnumerationTypes::RGROUPS m_permutationSizes
class for flagging enumeration strategy errors
~EnumerationStrategyException() noexcept override=default
const char * what() const noexcept override
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:49
std::vector< boost::uint64_t > RGROUPS
std::vector< MOL_SPTR_VECT > BBS
Std stuff.
Definition: Abbreviations.h:18
RDKIT_CHEMREACTIONS_EXPORT EnumerationTypes::RGROUPS getSizesFromReactants(const std::vector< MOL_SPTR_VECT > &bbs)
EnumerationTypes::RGROUPS getSizesFromBBs(const std::vector< std::vector< T >> &bbs)
Return the number of elements per input vector.
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:20
RDKIT_CHEMREACTIONS_EXPORT MOL_SPTR_VECT getReactantsFromRGroups(const std::vector< MOL_SPTR_VECT > &bbs, const EnumerationTypes::RGROUPS &rgroups)
RDKIT_CHEMREACTIONS_EXPORT boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes)