RDKit
Open-source cheminformatics and machine learning.
SubstanceGroup.h
Go to the documentation of this file.
1//
2//
3// Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11/*! \file SubstanceGroup.h
12
13 \brief Defines the SubstanceGroup class
14
15*/
16#include <RDGeneral/export.h>
17#ifndef _RD_SGROUP_H
18#define _RD_SGROUP_H
19
20#include <utility>
21#include <unordered_map>
22
23#include <Geometry/point.h>
24#include <RDGeneral/types.h>
25#include <RDGeneral/RDProps.h>
26#include <boost/smart_ptr.hpp>
27
28namespace RDKit {
29class ROMol;
30class RWMol;
31class Bond;
32class Atom;
33
34//! used to indicate errors from incorrect sgroup access
36 : public std::runtime_error {
37 public:
38 //! construct with an error message
39 SubstanceGroupException(const char *msg) : std::runtime_error(msg) {}
40 //! construct with an error message
41 SubstanceGroupException(const std::string &msg) : std::runtime_error(msg) {}
42};
43
44//! The class for representing SubstanceGroups
45/*!
46 <b>Notes:</b>
47 - These are inspired by the SGroups in the MDL formats
48 - Implementation is based on 2010 MDL SD specification:
49 http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
50 - See SGroups.md for further, more comprehensive notes.
51
52*/
53
55 public:
56 //! Bond type (see V3000 spec)
57 enum class BondType {
58 XBOND, // External/Crossing bond
59 CBOND, // Internal/Contained bond
60 };
61
62 typedef std::array<RDGeom::Point3D, 3> Bracket;
63
64 //! Data structure for SAP lines (see V3000 spec)
65 //! lvIdx may not be set; this signaled with value -1
66 struct AttachPoint {
67 unsigned int aIdx;
68 int lvIdx;
69 std::string id;
70 bool operator==(const AttachPoint &other) const {
71 return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
72 }
73 };
74
75 //! See specification for V3000 CSTATE
76 //! vector may or not be considered, depending on TYPE
77 struct CState {
78 unsigned int bondIdx;
80 bool operator==(const CState &other) const {
81 // note that we ignore coordinates for this
82 return bondIdx == other.bondIdx;
83 }
84 };
85
86//! No default constructor
87#ifndef SWIG
88 // Unfortunately, SWIG generated wrapper code uses temporary variables that
89 // require a default ctor not be deleted.
90 SubstanceGroup() = delete;
91#endif // !SWIG
92
93 //! Main Constructor. Ownership is only set on this side of the relationship:
94 //! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
95 //! on the other side.
96 SubstanceGroup(ROMol *owning_mol, const std::string &type);
97
98 SubstanceGroup(const SubstanceGroup &other) = default;
99 SubstanceGroup &operator=(const SubstanceGroup &other) = default;
100
101 SubstanceGroup(SubstanceGroup &&other) noexcept : RDProps(std::move(other)) {
102 dp_mol = std::exchange(other.dp_mol, nullptr);
103 d_atoms = std::move(other.d_atoms);
104 d_patoms = std::move(other.d_patoms);
105 d_bonds = std::move(other.d_bonds);
106 d_brackets = std::move(other.d_brackets);
107 d_cstates = std::move(other.d_cstates);
108 d_saps = std::move(other.d_saps);
109 }
110
112 if (this == &other) {
113 return *this;
114 }
115 RDProps::operator=(std::move(other));
116 dp_mol = std::exchange(other.dp_mol, nullptr);
117 d_atoms = std::move(other.d_atoms);
118 d_patoms = std::move(other.d_patoms);
119 d_bonds = std::move(other.d_bonds);
120 d_brackets = std::move(other.d_brackets);
121 d_cstates = std::move(other.d_cstates);
122 d_saps = std::move(other.d_saps);
123 return *this;
124 }
125
126 //! Destructor
127 ~SubstanceGroup() = default;
128
129 //! returns whether or not this belongs to a molecule
130 bool hasOwningMol() const { return dp_mol != nullptr; }
131
132 //! Get the molecule that owns this instance
134 PRECONDITION(dp_mol, "no owner");
135 return *dp_mol;
136 }
137
138 //! returns whether or not this group is valid; invalid groups must be
139 //! ignored.
140 bool getIsValid() const { return d_isValid; }
141
142 //! set whether or not this group is valid; invalid groups must be ignored.
143 void setIsValid(bool isValid) { d_isValid = isValid; }
144
145 //! get the index of this sgroup in dp_mol's sgroups vector
146 //! (do not mistake this by the ID!)
147 unsigned int getIndexInMol() const;
148
149 /* Atom and Bond methods */
150 void addAtomWithIdx(unsigned int idx);
151 void addParentAtomWithIdx(unsigned int idx);
152 void addBondWithIdx(unsigned int idx);
153 void addAtomWithBookmark(int mark);
155 void addBondWithBookmark(int mark);
156
157 void addBracket(const Bracket &bracket);
158 void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
159 void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);
160
161 BondType getBondType(unsigned int bondIdx) const;
162
163 const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
164 const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
165 const std::vector<unsigned int> &getBonds() const { return d_bonds; }
166
167 void setAtoms(std::vector<unsigned int> atoms) { d_atoms = std::move(atoms); }
168 void setParentAtoms(std::vector<unsigned int> patoms) {
169 d_patoms = std::move(patoms);
170 }
171 void setBonds(std::vector<unsigned int> bonds) { d_bonds = std::move(bonds); }
172
173 const std::vector<Bracket> &getBrackets() const { return d_brackets; }
174 const std::vector<CState> &getCStates() const { return d_cstates; }
175 const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }
176
177 std::vector<Bracket> &getBrackets() { return d_brackets; }
178 std::vector<CState> &getCStates() { return d_cstates; }
179 std::vector<AttachPoint> &getAttachPoints() { return d_saps; }
180
181 void clearBrackets() { d_brackets.clear(); }
182 void clearCStates() { d_cstates.clear(); }
183 void clearAttachPoints() { d_saps.clear(); }
184
185 //! adjusts our atom IDs to reflect that an atom has been removed from the
186 //! parent molecule
187 //! decrements all atom IDs that are higher than \c atomIdx
188 //! raises a \c SubstanceGroupException if \c atomIdx is actually part of
189 //! this substance group
190 //! \returns whether or not anything was changed
191 bool adjustToRemovedAtom(unsigned int atomIdx);
192
193 //! \returns whether or not anything the specified atom is part of the
194 //! definition of this substance group
195 bool includesAtom(unsigned int atomIdx) const;
196
197 //! adjusts our bond IDs to reflect that a bond has been removed from the
198 //! parent molecule
199 //! decrements all bond IDs that are higher than \c bondIdx
200 //! raises a \c SubstanceGroupException if \c bondIdx is actually part of
201 //! this substance group
202 //! \returns whether or not anything was changed
203 bool adjustToRemovedBond(unsigned int bondIdx);
204
205 //! \returns whether or not anything the specified bond is part of the
206 //! definition of this substance group
207 bool includesBond(unsigned int bondIdx) const;
208
209 //! Set owning molecule
210 //! This only updates atoms and bonds; parent sgroup has to be updated
211 //! independently, since parent might not exist at the time this is
212 //! called.
213 void setOwningMol(ROMol *mol);
214
215 bool operator==(const SubstanceGroup &other) const {
216 // we ignore brackets and cstates, which involve coordinates
217 return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
218 d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
219 d_saps == other.d_saps;
220 }
221
222 private:
223 ROMol *dp_mol = nullptr; // owning molecule
224
225 bool d_isValid = true;
226
227 std::vector<unsigned int> d_atoms;
228 std::vector<unsigned int> d_patoms;
229 std::vector<unsigned int> d_bonds;
230
231 std::vector<Bracket> d_brackets;
232 std::vector<CState> d_cstates;
233 std::vector<AttachPoint> d_saps;
234}; // namespace RDKit
235
236namespace SubstanceGroupChecks {
237
238const std::vector<std::string> sGroupTypes = {
239 // polymer sgroups:
240 "SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
241 // formulations/mixtures:
242 "COM", "MIX", "FOR",
243 // other
244 "SUP", "MUL", "DAT", "GEN"};
245
246const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
247const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};
248
249RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);
250
251RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);
252
253RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);
254
256 unsigned int id);
257
258} // namespace SubstanceGroupChecks
259
260//! \name SubstanceGroups and molecules
261//@{
262
263RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
264 ROMol &mol);
265RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
266 const ROMol &mol);
267
268//! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
269//! references to the SubstanceGroup exist.
270/*!
271 \param sgroup - SubstanceGroup to be added to the molecule.
272*/
274 SubstanceGroup sgroup);
275
276//! Removes SubstanceGroups which reference a particular atom index
277/*!
278 \param mol - molecule to be edited.
279 \param idx - atom index
280*/
282 RWMol &mol, unsigned int idx);
283//! Removes SubstanceGroups which reference a particular bond index
284/*!
285 \param mol - molecule to be edited.
286 \param idx - bond index
287*/
289 RWMol &mol, unsigned int idx);
290//@}
291
292} // namespace RDKit
293
294//! allows SubstanceGroup objects to be dumped to streams
295RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
296 const RDKit::SubstanceGroup &sg);
297#endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::SubstanceGroup &sg)
allows SubstanceGroup objects to be dumped to streams
RDProps & operator=(const RDProps &rhs)
Definition: RDProps.h:24
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
used to indicate errors from incorrect sgroup access
SubstanceGroupException(const std::string &msg)
construct with an error message
SubstanceGroupException(const char *msg)
construct with an error message
The class for representing SubstanceGroups.
const std::vector< unsigned int > & getBonds() const
void addBondWithIdx(unsigned int idx)
void setOwningMol(ROMol *mol)
SubstanceGroup & operator=(const SubstanceGroup &other)=default
void setParentAtoms(std::vector< unsigned int > patoms)
void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr)
void setBonds(std::vector< unsigned int > bonds)
~SubstanceGroup()=default
Destructor.
void setIsValid(bool isValid)
set whether or not this group is valid; invalid groups must be ignored.
const std::vector< unsigned int > & getAtoms() const
void addParentAtomWithBookmark(int mark)
void setAtoms(std::vector< unsigned int > atoms)
bool adjustToRemovedBond(unsigned int bondIdx)
void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector)
ROMol & getOwningMol() const
Get the molecule that owns this instance.
const std::vector< Bracket > & getBrackets() const
SubstanceGroup()=delete
No default constructor.
bool adjustToRemovedAtom(unsigned int atomIdx)
bool operator==(const SubstanceGroup &other) const
BondType
Bond type (see V3000 spec)
SubstanceGroup(const SubstanceGroup &other)=default
SubstanceGroup(SubstanceGroup &&other) noexcept
void addBondWithBookmark(int mark)
const std::vector< unsigned int > & getParentAtoms() const
void addAtomWithBookmark(int mark)
bool includesAtom(unsigned int atomIdx) const
std::vector< Bracket > & getBrackets()
SubstanceGroup(ROMol *owning_mol, const std::string &type)
void addParentAtomWithIdx(unsigned int idx)
void addAtomWithIdx(unsigned int idx)
const std::vector< CState > & getCStates() const
std::array< RDGeom::Point3D, 3 > Bracket
void addBracket(const Bracket &bracket)
const std::vector< AttachPoint > & getAttachPoints() const
std::vector< CState > & getCStates()
bool hasOwningMol() const
returns whether or not this belongs to a molecule
bool includesBond(unsigned int bondIdx) const
std::vector< AttachPoint > & getAttachPoints()
BondType getBondType(unsigned int bondIdx) const
SubstanceGroup & operator=(SubstanceGroup &&other) noexcept
unsigned int getIndexInMol() const
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:217
RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type)
RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type)
const std::vector< std::string > sGroupConnectTypes
RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol, unsigned int id)
RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type)
const std::vector< std::string > sGroupSubtypes
const std::vector< std::string > sGroupTypes
Std stuff.
Definition: Abbreviations.h:18
RDKIT_GRAPHMOL_EXPORT std::vector< SubstanceGroup > & getSubstanceGroups(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol, SubstanceGroup sgroup)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingBond(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular bond index.
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingAtom(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular atom index.
bool operator==(const AttachPoint &other) const
bool operator==(const CState &other) const