RDKit
Open-source cheminformatics and machine learning.
ScaffoldNetwork.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SCAFFOLDNETWORK_H
12 #define RD_SCAFFOLDNETWORK_H
13 
14 #include <vector>
15 #include <map>
16 #include <string>
17 #include <sstream>
18 #include <memory>
19 #include <iostream>
20 
21 #ifdef RDK_USE_BOOST_SERIALIZATION
22 #include <RDGeneral/Invariant.h>
24 #include <boost/archive/text_oarchive.hpp>
25 #include <boost/archive/text_iarchive.hpp>
26 #include <boost/serialization/vector.hpp>
27 #include <boost/serialization/shared_ptr.hpp>
28 #include <boost/serialization/version.hpp>
30 #endif
31 
32 namespace RDKit {
33 class ROMol;
34 class ChemicalReaction;
35 
36 namespace ScaffoldNetwork {
37 
39  bool includeGenericScaffolds =
40  true; ///< include scaffolds with all atoms replaced by dummies
41  bool includeGenericBondScaffolds =
42  false; ///< include scaffolds with all bonds replaced by single bonds
43  bool includeScaffoldsWithoutAttachments =
44  true; ///< remove attachment points from scaffolds and include the result
45  bool includeScaffoldsWithAttachments =
46  true; ///< Include the version of the scaffold with attachment points
47  bool keepOnlyFirstFragment =
48  true; ///< keep only the first fragment from the bond breaking rule
49  bool pruneBeforeFragmenting =
50  true; ///< Do a pruning/flattening step before starting fragmenting
51  bool flattenIsotopes = true; ///< remove isotopes when flattening
52  bool flattenChirality =
53  true; ///< remove chirality and bond stereo when flattening
54  bool flattenKeepLargest =
55  true; ///< keep only the largest fragment when doing flattening
56  bool collectMolCounts = true; ///< keep track of the number of molecules each
57  ///< scaffold was reached from
58 
59  std::vector<std::shared_ptr<ChemicalReaction>>
60  bondBreakersRxns; ///< the reaction(s) used to fragment. Should expect a
61  ///< single reactant and produce two products
64  {"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {};
65  ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
66 };
67 
68 enum class RDKIT_SCAFFOLDNETWORK_EXPORT EdgeType {
69  Fragment = 1, ///< molecule -> fragment
70  Generic = 2, ///< molecule -> generic molecule (all atoms are dummies)
71  GenericBond = 3, ///< molecule -> generic bond molecule (all bonds single)
72  RemoveAttachment = 4, ///< molecule -> molecule with no attachment points
73  Initialize = 5 ///< molecule -> flattened molecule
74 };
75 
77  size_t beginIdx;
78  size_t endIdx;
79  EdgeType type;
80  NetworkEdge() : beginIdx(0), endIdx(0), type(EdgeType::Initialize){};
81  NetworkEdge(size_t bi, size_t ei, EdgeType typ)
82  : beginIdx(bi), endIdx(ei), type(typ){};
84  return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
85  }
87  return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
88  }
89 #ifdef RDK_USE_BOOST_SERIALIZATION
90  private:
91  friend class boost::serialization::access;
92  template <class Archive>
93  void serialize(Archive &ar, const unsigned int version) {
94  RDUNUSED_PARAM(version);
95  ar &beginIdx;
96  ar &endIdx;
97  ar &type;
98  }
99 #endif
100 };
101 
103  std::vector<std::string> nodes; ///< SMILES for the scaffolds
104  std::vector<unsigned>
105  counts; ///< number of times each scaffold was encountered
106  std::vector<unsigned>
107  molCounts; ///< number of molecules each scaffold was found in
108  std::vector<NetworkEdge> edges; ///< edges in the network
110 #ifdef RDK_USE_BOOST_SERIALIZATION
111  ScaffoldNetwork(const std::string &pkl) {
112  std::stringstream iss(pkl);
113  boost::archive::text_iarchive ia(iss);
114  ia >> *this;
115  }
116 
117  private:
118  friend class boost::serialization::access;
119  template <class Archive>
120  void serialize(Archive &ar, const unsigned int version) {
121  RDUNUSED_PARAM(version);
122  ar &nodes;
123  ar &counts;
124  if (version > 0) {
125  ar &molCounts;
126  }
127  ar &edges;
128  }
129 #endif
130 };
131 
132 //! update an existing ScaffoldNetwork using a set of molecules
133 template <typename T>
134 void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network,
135  const ScaffoldNetworkParams &params);
136 
137 //! create a new ScaffoldNetwork for a set of molecules
138 template <typename T>
140  const ScaffoldNetworkParams &params) {
141  ScaffoldNetwork res;
142  updateScaffoldNetwork(mols, res, params);
143  return res;
144 }
145 //! allows nodes to output nicely as strings
146 inline std::ostream &operator<<(std::ostream &ostr,
147  const RDKit::ScaffoldNetwork::EdgeType &e) {
148  switch (e) {
150  ostr << "Fragment";
151  break;
153  ostr << "Generic";
154  break;
156  ostr << "GenericBond";
157  break;
159  ostr << "RemoveAttachment";
160  break;
161  case RDKit::ScaffoldNetwork::EdgeType::Initialize:
162  ostr << "Initialize";
163  break;
164  default:
165  ostr << "UNKNOWN";
166  break;
167  }
168  return ostr;
169 }
170 //! allows edges to output nicely as strings
171 inline std::ostream &operator<<(std::ostream &ostr,
173  ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
174  << ", type:" << e.type << " )";
175  return ostr;
176 }
177 
178 //! returns parameters for constructing scaffold networks using BRICS
179 //! fragmentation
181 
182 } // namespace ScaffoldNetwork
183 } // namespace RDKit
184 
185 #ifdef RDK_USE_BOOST_SERIALIZATION
186 namespace boost {
187 namespace serialization {
188 template <>
189 struct version<RDKit::ScaffoldNetwork::ScaffoldNetwork> {
190  BOOST_STATIC_CONSTANT(int, value = 1);
191 };
192 } // namespace serialization
193 } // namespace boost
194 #endif
195 #endif
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
Fragment
molecule -> fragment
Generic
molecule -> generic molecule (all atoms are dummies)
GenericBond
molecule -> generic bond molecule (all bonds single)
RemoveAttachment
molecule -> molecule with no attachment points
#define RDKIT_SCAFFOLDNETWORK_EXPORT
Definition: export.h:671
ScaffoldNetwork createScaffoldNetwork(const T &mols, const ScaffoldNetworkParams &params)
create a new ScaffoldNetwork for a set of molecules
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network, const ScaffoldNetworkParams &params)
update an existing ScaffoldNetwork using a set of molecules
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams()
Std stuff.
Definition: Abbreviations.h:17
std::ostream & operator<<(std::ostream &oss, const TextAlignType &tat)
Definition: RDLog.h:22
bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
NetworkEdge(size_t bi, size_t ei, EdgeType typ)
bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
std::vector< std::shared_ptr< ChemicalReaction > > bondBreakersRxns
ScaffoldNetworkParams(const std::vector< std::string > &bondBreakersSmarts)
std::vector< NetworkEdge > edges
edges in the network
std::vector< unsigned > molCounts
number of molecules each scaffold was found in
std::vector< std::string > nodes
SMILES for the scaffolds.
std::vector< unsigned > counts
number of times each scaffold was encountered