RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2021 Greg Landrum and Rational Discovery LLC
3// Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11#include <RDGeneral/export.h>
12#ifndef _RD_MOL_OPS_H_
13#define _RD_MOL_OPS_H_
14
15#include <vector>
16#include <map>
17#include <list>
19#include <boost/smart_ptr.hpp>
20#include <boost/dynamic_bitset.hpp>
22#include <RDGeneral/types.h>
23#include "SanitException.h"
24
25RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
26namespace RDKit {
27class ROMol;
28class RWMol;
29class Atom;
30class Bond;
31class Conformer;
32typedef std::vector<double> INVAR_VECT;
33typedef INVAR_VECT::iterator INVAR_VECT_I;
34typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
35
36//! \brief Groups a variety of molecular query and transformation operations.
37namespace MolOps {
38
39//! return the number of electrons available on an atom to donate for
40/// aromaticity
41/*!
42 The result is determined using the default valency, number of lone pairs,
43 number of bonds and the formal charge. Note that the atom may not donate
44 all of these electrons to a ring for aromaticity (also used in Conjugation
45 and hybridization code).
46
47 \param at the atom of interest
48
49 \return the number of electrons
50*/
52
53//! sums up all atomic formal charges and returns the result
55
56//! returns whether or not the given Atom is involved in a conjugated bond
58
59//! find fragments (disconnected components of the molecular graph)
60/*!
61
62 \param mol the molecule of interest
63 \param mapping used to return the mapping of Atoms->fragments.
64 On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
65 and will contain the fragment assignment for each Atom
66
67 \return the number of fragments found.
68
69*/
70RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
71 std::vector<int> &mapping);
72//! find fragments (disconnected components of the molecular graph)
73/*!
74
75 \param mol the molecule of interest
76 \param frags used to return the Atoms in each fragment
77 On return \c mapping will be \c numFrags long, and each entry
78 will contain the indices of the Atoms in that fragment.
79
80 \return the number of fragments found.
81
82*/
84 const ROMol &mol, std::vector<std::vector<int>> &frags);
85
86//! splits a molecule into its component fragments
87/// (disconnected components of the molecular graph)
88/*!
89
90 \param mol the molecule of interest
91 \param sanitizeFrags toggles sanitization of the fragments after
92 they are built
93 \param frags used to return the mapping of Atoms->fragments.
94 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
95 on return and will contain the fragment assignment for each Atom
96 \param fragsMolAtomMapping used to return the Atoms in each fragment
97 On return \c mapping will be \c numFrags long, and each entry
98 will contain the indices of the Atoms in that fragment.
99 \param copyConformers toggles copying conformers of the fragments after
100 they are built
101 \return a vector of the fragments as smart pointers to ROMols
102
103*/
104RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
105 const ROMol &mol, bool sanitizeFrags = true,
106 std::vector<int> *frags = nullptr,
107 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
108 bool copyConformers = true);
109
110//! splits a molecule into pieces based on labels assigned using a query
111/*!
112
113 \param mol the molecule of interest
114 \param query the query used to "label" the molecule for fragmentation
115 \param sanitizeFrags toggles sanitization of the fragments after
116 they are built
117 \param whiteList if provided, only labels in the list will be kept
118 \param negateList if true, the white list logic will be inverted: only labels
119 not in the list will be kept
120
121 \return a map of the fragments and their labels
122
123*/
124template <typename T>
125RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
126getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
127 bool sanitizeFrags = true,
128 const std::vector<T> *whiteList = nullptr,
129 bool negateList = false);
130
131#if 0
132 //! finds a molecule's minimum spanning tree (MST)
133 /*!
134 \param mol the molecule of interest
135 \param mst used to return the MST as a vector of bond indices
136 */
137 RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
138#endif
139
140//! \name Dealing with hydrogens
141//{@
142
143//! returns a copy of a molecule with hydrogens added in as explicit Atoms
144/*!
145 \param mol the molecule to add Hs to
146 \param explicitOnly (optional) if this \c true, only explicit Hs will be
147 added
148 \param addCoords (optional) If this is true, estimates for the atomic
149 coordinates
150 of the added Hs will be used.
151 \param onlyOnAtoms (optional) if provided, this should be a vector of
152 IDs of the atoms that will be considered for H addition.
153 \param addResidueInfo (optional) if this is true, add residue info to
154 hydrogen atoms (useful for PDB files).
155
156 \return the new molecule
157
158 <b>Notes:</b>
159 - it makes no sense to use the \c addCoords option if the molecule's
160 heavy
161 atoms don't already have coordinates.
162 - the caller is responsible for <tt>delete</tt>ing the pointer this
163 returns.
164 */
165RDKIT_GRAPHMOL_EXPORT ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
166 bool addCoords = false,
167 const UINT_VECT *onlyOnAtoms = nullptr,
168 bool addResidueInfo = false);
169//! \overload
170/// modifies the molecule in place
171RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false,
172 bool addCoords = false,
173 const UINT_VECT *onlyOnAtoms = nullptr,
174 bool addResidueInfo = false);
175
176//! Sets Cartesian coordinates for a terminal atom.
177//! Useful for growing an atom off a molecule with sensible
178//! coordinates based on the geometry of the neighbor.
179/*!
180 NOTE: this sets appropriate coordinates in all of the molecule's conformers.
181 \param mol the molecule the atoms belong to
182 \param idx index of the terminal atom whose coordinates are set
183 \param otherIdx index of the bonded neighbor atom
184*/
185
187 unsigned int otherIdx);
188
189//! returns a copy of a molecule with hydrogens removed
190/*!
191 \param mol the molecule to remove Hs from
192 \param implicitOnly (optional) if this \c true, only implicit Hs will be
193 removed
194 \param updateExplicitCount (optional) If this is \c true, when explicit Hs
195 are removed
196 from the graph, the heavy atom to which they are bound will have its
197 counter of
198 explicit Hs increased.
199 \param sanitize: (optional) If this is \c true, the final molecule will be
200 sanitized
201
202 \return the new molecule
203
204 <b>Notes:</b>
205 - Hydrogens which aren't connected to a heavy atom will not be
206 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
207 all atoms removed.
208 - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
209 will not be removed.
210 - two coordinate Hs, like the central H in C[H-]C, will not be removed
211 - Hs connected to dummy atoms will not be removed
212 - Hs that are part of the definition of double bond Stereochemistry
213 will not be removed
214 - Hs that are not connected to anything else will not be removed
215 - Hs that have a query defined (i.e. hasQuery() returns true) will not
216 be removed
217
218 - the caller is responsible for <tt>delete</tt>ing the pointer this
219 returns.
220*/
221
223 bool implicitOnly = false,
224 bool updateExplicitCount = false,
225 bool sanitize = true);
226//! \overload
227/// modifies the molecule in place
228RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, bool implicitOnly = false,
229 bool updateExplicitCount = false,
230 bool sanitize = true);
232 bool removeDegreeZero = false; /**< hydrogens that have no bonds */
233 bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
234 bool removeOnlyHNeighbors =
235 false; /**< hydrogens with bonds only to other hydrogens */
236 bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
237 bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
238 isotopes and keeps track of the heavy atom the isotopes were attached to in
239 the private _isotopicHs atom property, so they are re-added by AddHs() as the
240 original isotopes if possible*/
241 bool removeDummyNeighbors =
242 false; /**< hydrogens with at least one dummy-atom neighbor */
243 bool removeDefiningBondStereo =
244 false; /**< hydrogens defining bond stereochemistry */
245 bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
246 bool removeWithQuery = false; /**< hydrogens with queries defined */
247 bool removeMapped = true; /**< mapped hydrogens */
248 bool removeInSGroups = true; /**< part of a SubstanceGroup.
249 An H atom will only be removed if it doesn't cause any SGroup to become empty,
250 and if it doesn't play a special role in the SGroup (XBOND, attach point
251 or a CState) */
252 bool showWarnings = true; /**< display warnings for Hs that are not removed */
253 bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
254 bool updateExplicitCount =
255 false; /**< DEPRECATED equivalent of updateExplicitCount */
256 bool removeHydrides = true; /**< Removing Hydrides */
257 bool removeNontetrahedralNeighbors =
258 false; /**< remove Hs which are bonded to atoms with specified
259 non-tetrahedral stereochemistry */
260};
261//! \overload
262/// modifies the molecule in place
264 bool sanitize = true);
265//! \overload
266/// The caller owns the pointer this returns
268 const RemoveHsParameters &ps,
269 bool sanitize = true);
270
271//! removes all Hs from a molecule
272RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
273//! \overload
274/// The caller owns the pointer this returns
276 bool sanitize = true);
277
278//! returns a copy of a molecule with hydrogens removed and added as queries
279//! to the heavy atoms to which they are bound.
280/*!
281 This is really intended to be used with molecules that contain QueryAtoms
282
283 \param mol the molecule to remove Hs from
284
285 \return the new molecule
286
287 <b>Notes:</b>
288 - Atoms that do not already have hydrogen count queries will have one
289 added, other H-related queries will not be touched. Examples:
290 - C[H] -> [C;!H0]
291 - [C;H1][H] -> [C;H1]
292 - [C;H2][H] -> [C;H2]
293 - Hydrogens which aren't connected to a heavy atom will not be
294 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
295 all atoms removed.
296 - the caller is responsible for <tt>delete</tt>ing the pointer this
297 returns.
298 - By default all hydrogens are removed, however if
299 mergeUnmappedOnly is true, any hydrogen participating
300 in an atom map will be retained
301
302*/
304 bool mergeUnmappedOnly = false,
305 bool mergeIsotopes = false);
306//! \overload
307/// modifies the molecule in place
309 bool mergeUnmappedOnly = false,
310 bool mergeIsotopes = false);
311
312typedef enum {
319 ADJUST_IGNOREALL = 0xFFFFFFF
321
322//! Parameters controlling the behavior of MolOps::adjustQueryProperties
323/*!
324
325 Note that some of the options here are either directly contradictory or make
326 no sense when combined with each other. We generally assume that client code
327 is doing something sensible and don't attempt to detect possible conflicts or
328 problems.
329
330*/
332 bool adjustDegree = true; /**< add degree queries */
333 std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
334
335 bool adjustRingCount = false; /**< add ring-count queries */
336 std::uint32_t adjustRingCountFlags =
338
339 bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
340 labels to any-atom queries */
341
342 bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
343
344 bool makeBondsGeneric =
345 false; /**< convert bonds to generic queries (any bonds) */
346 std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
347
348 bool makeAtomsGeneric =
349 false; /**< convert atoms to generic queries (any atoms) */
350 std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
351
352 bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
353 overall degree */
354 std::uint32_t adjustHeavyDegreeFlags =
356
357 bool adjustRingChain = false; /**< add ring-chain queries */
358 std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
359
360 bool useStereoCareForBonds =
361 false; /**< remove stereochemistry info from double bonds that do not have
362 the stereoCare property set */
363
364 bool adjustConjugatedFiveRings =
365 false; /**< sets bond queries in conjugated five-rings to
366 SINGLE|DOUBLE|AROMATIC */
367
368 bool setMDLFiveRingAromaticity =
369 false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
370 software as documented in the Chemical Representation Guide */
371
372 bool adjustSingleBondsToDegreeOneNeighbors =
373 false; /**< sets single bonds between aromatic or conjugated atoms and
374 degree one neighbors to SINGLE|AROMATIC */
375
376 bool adjustSingleBondsBetweenAromaticAtoms =
377 false; /**< sets non-ring single bonds between two aromatic or conjugated
378 atoms to SINGLE|AROMATIC */
379 //! \brief returns an AdjustQueryParameters object with all adjustments
380 //! disabled
383 res.adjustDegree = false;
384 res.makeDummiesQueries = false;
385 res.aromatizeIfPossible = false;
386 return res;
387 }
389};
390
391//! updates an AdjustQueryParameters object from a JSON string
393 MolOps::AdjustQueryParameters &p, const std::string &json);
394
395//! returns a copy of a molecule with query properties adjusted
396/*!
397 \param mol the molecule to adjust
398 \param params controls the adjustments made
399
400 \return the new molecule, the caller owns the memory
401*/
403 const ROMol &mol, const AdjustQueryParameters *params = nullptr);
404//! \overload
405/// modifies the molecule in place
407 RWMol &mol, const AdjustQueryParameters *params = nullptr);
408
409//! returns a copy of a molecule with the atoms renumbered
410/*!
411
412 \param mol the molecule to work with
413 \param newOrder the new ordering of the atoms (should be numAtoms long)
414 for example: if newOrder is [3,2,0,1], then atom 3 in the original
415 molecule will be atom 0 in the new one
416
417 \return the new molecule
418
419 <b>Notes:</b>
420 - the caller is responsible for <tt>delete</tt>ing the pointer this
421 returns.
422
423*/
425 const ROMol &mol, const std::vector<unsigned int> &newOrder);
426
427//! @}
428
429//! \name Sanitization
430/// {
431
432typedef enum {
444 SANITIZE_ALL = 0xFFFFFFF
446
447//! \brief carries out a collection of tasks for cleaning up a molecule and
448/// ensuring
449//! that it makes "chemical sense"
450/*!
451 This functions calls the following in sequence
452 -# MolOps::cleanUp()
453 -# mol.updatePropertyCache()
454 -# MolOps::symmetrizeSSSR()
455 -# MolOps::Kekulize()
456 -# MolOps::assignRadicals()
457 -# MolOps::setAromaticity()
458 -# MolOps::setConjugation()
459 -# MolOps::setHybridization()
460 -# MolOps::cleanupChirality()
461 -# MolOps::adjustHs()
462
463 \param mol : the RWMol to be cleaned
464
465 \param operationThatFailed : the first (if any) sanitization operation that
466 fails is set here.
467 The values are taken from the \c SanitizeFlags
468 enum. On success, the value is \c
469 SanitizeFlags::SANITIZE_NONE
470
471 \param sanitizeOps : the bits here are used to set which sanitization
472 operations are carried out. The elements of the \c
473 SanitizeFlags enum define the operations.
474
475 <b>Notes:</b>
476 - If there is a failure in the sanitization, a \c MolSanitizeException
477 will be thrown.
478 - in general the user of this function should cast the molecule following
479 this function to a ROMol, so that new atoms and bonds cannot be added to
480 the molecule and screw up the sanitizing that has been done here
481*/
483 unsigned int &operationThatFailed,
484 unsigned int sanitizeOps = SANITIZE_ALL);
485//! \overload
487
488//! \brief Identifies chemistry problems (things that don't make chemical
489//! sense) in a molecule
490/*!
491 This functions uses the operations in sanitizeMol but does not change
492 the input structure and returns a list of the problems encountered instead
493 of stopping at the first failure,
494
495 The problems this looks for come from the sanitization operations:
496 -# mol.updatePropertyCache() : Unreasonable valences
497 -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
498 in rings, aromatic bonds to non-aromatic atoms.
499
500 \param mol : the ROMol to be cleaned
501
502 \param sanitizeOps : the bits here are used to set which sanitization
503 operations are carried out. The elements of the \c
504 SanitizeFlags enum define the operations.
505
506 \return a vector of \c MolSanitizeException values that indicate what
507 problems were encountered
508
509*/
511std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
512 const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL);
513
514//! Possible aromaticity models
515/*!
516- \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
517- \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
518Book)
519- \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
520does not consider the outer envelope of fused rings)
521- \c AROMATICITY_MDL
522- \c AROMATICITY_CUSTOM uses a caller-provided function
523*/
524typedef enum {
525 AROMATICITY_DEFAULT = 0x0, ///< future proofing
529 AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
531
532//! Sets up the aromaticity for a molecule
533/*!
534
535 This is what happens here:
536 -# find all the simple rings by calling the findSSSR function
537 -# loop over all the Atoms in each ring and mark them if they are
538 candidates
539 for aromaticity. A ring atom is a candidate if it can spare electrons
540 to the ring and if it's from the first two rows of the periodic table.
541 -# based on the candidate atoms, mark the rings to be either candidates
542 or non-candidates. A ring is a candidate only if all its atoms are
543 candidates
544 -# apply Hueckel rule to each of the candidate rings to check if the ring
545 can be
546 aromatic
547
548 \param mol the RWMol of interest
549 \param model the aromaticity model to use
550 \param func a custom function for assigning aromaticity (only used when
551 model=\c AROMATICITY_CUSTOM)
552
553 \return >0 on success, <= 0 otherwise
554
555 <b>Assumptions:</b>
556 - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
557 been called)
558
559*/
562 int (*func)(RWMol &) = nullptr);
563
564//! Designed to be called by the sanitizer to handle special cases before
565/// anything is done.
566/*!
567
568 Currently this:
569 - modifies nitro groups, so that the nitrogen does not have an
570 unreasonable valence of 5, as follows:
571 - the nitrogen gets a positive charge
572 - one of the oxygens gets a negative chage and the double bond to
573 this oxygen is changed to a single bond The net result is that nitro groups
574 can be counted on to be: \c "[N+](=O)[O-]"
575 - modifies halogen-oxygen containing species as follows:
576 \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
577 \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
578 \c [Cl,Br,I](=O)O -> [X+]([O-])O
579 - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
580
581 \param mol the molecule of interest
582
583*/
585
586//! Called by the sanitizer to assign radical counts to atoms
588
589//! adjust the number of implicit and explicit Hs for special cases
590/*!
591
592 Currently this:
593 - modifies aromatic nitrogens so that, when appropriate, they have an
594 explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
595
596 \param mol the molecule of interest
597
598 <b>Assumptions</b>
599 - this is called after the molecule has been sanitized,
600 aromaticity has been perceived, and the implicit valence of
601 everything has been calculated.
602
603*/
605
606//! Kekulizes the molecule
607/*!
608
609 \param mol the molecule of interest
610
611 \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
612 on both the Bonds and Atoms are turned to false following the Kekulization,
613 otherwise they are left alone in their original state.
614
615 \param maxBackTracks the maximum number of attempts at back-tracking. The
616 algorithm uses a back-tracking procedure to revisit a previous setting of
617 double bond if we hit a wall in the kekulization process
618
619 <b>Notes:</b>
620 - this does not modify query bonds which have bond type queries (like those
621 which come from SMARTS) or rings containing them.
622 - even if \c markAtomsBonds is \c false the \c BondType for all modified
623 aromatic bonds will be changed from \c RDKit::Bond::AROMATIC to \c
624 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
625
626*/
627RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
628 unsigned int maxBackTracks = 100);
629//! Kekulizes the molecule if possible. If the kekulization fails the molecule
630//! will not be modified
631/*!
632
633 \param mol the molecule of interest
634
635 \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
636 on both the Bonds and Atoms are turned to false following the Kekulization,
637 otherwise they are left alone in their original state.
638
639 \param maxBackTracks the maximum number of attempts at back-tracking. The
640 algorithm uses a back-tracking procedure to revisit a previous setting of
641 double bond if we hit a wall in the kekulization process
642
643 \returns whether or not the kekulization succeeded
644
645 <b>Notes:</b>
646 - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
647 bonds will be changed from \c RDKit::Bond::AROMATIC to \c
648 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
649
650*/
652 bool markAtomsBonds = true,
653 unsigned int maxBackTracks = 100);
654
655//! flags the molecule's conjugated bonds
657
658//! calculates and sets the hybridization of all a molecule's Stoms
660
661//! @}
662
663//! \name Ring finding and SSSR
664//! @{
665
666//! finds a molecule's Smallest Set of Smallest Rings
667/*!
668 Currently this implements a modified form of Figueras algorithm
669 (JCICS - Vol. 36, No. 5, 1996, 986-991)
670
671 \param mol the molecule of interest
672 \param res used to return the vector of rings. Each entry is a vector with
673 atom indices. This information is also stored in the molecule's
674 RingInfo structure, so this argument is optional (see overload)
675
676 \return number of smallest rings found
677
678 Base algorithm:
679 - The original algorithm starts by finding representative degree 2
680 nodes.
681 - Representative because if a series of deg 2 nodes are found only
682 one of them is picked.
683 - The smallest ring around each of them is found.
684 - The bonds that connect to this degree 2 node are them chopped off,
685 yielding
686 new deg two nodes
687 - The process is repeated on the new deg 2 nodes.
688 - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
689 with it is found. A bond from this is "carefully" (look in the paper)
690 selected and chopped, yielding deg 2 nodes. The process is same as
691 above once this is done.
692
693 Our Modifications:
694 - If available, more than one smallest ring around a representative deg 2
695 node will be computed and stored
696 - Typically 3 rings are found around a degree 3 node (when no deg 2s are
697 available)
698 and all the bond to that node are chopped.
699 - The extra rings that were found in this process are removed after all
700 the nodes have been covered.
701
702 These changes were motivated by several factors:
703 - We believe the original algorithm fails to find the correct SSSR
704 (finds the correct number of them but the wrong ones) on some sample
705 mols
706 - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
707 done. The extra rings this process adds can be quite useful.
708*/
710 std::vector<std::vector<int>> &res);
711//! \overload
713 const ROMol &mol, std::vector<std::vector<int>> *res = nullptr);
714
715//! use a DFS algorithm to identify ring bonds and atoms in a molecule
716/*!
717 \b NOTE: though the RingInfo structure is populated by this function,
718 the only really reliable calls that can be made are to check if
719 mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
720 return values >0
721*/
723
725
726//! symmetrize the molecule's Smallest Set of Smallest Rings
727/*!
728 SSSR rings obatined from "findSSSR" can be non-unique in some case.
729 For example, cubane has five SSSR rings, not six as one would hope.
730
731 This function adds additional rings to the SSSR list if necessary
732 to make the list symmetric, e.g. all atoms in cubane will be part of the
733 same number of SSSRs. This function choses these extra rings from the extra
734 rings computed and discarded during findSSSR. The new ring are chosen such
735 that:
736 - replacing a same sized ring in the SSSR list with an extra ring yields
737 the same union of bond IDs as the original SSSR list
738
739 \param mol - the molecule of interest
740 \param res used to return the vector of rings. Each entry is a vector with
741 atom indices. This information is also stored in the molecule's
742 RingInfo structure, so this argument is optional (see overload)
743
744 \return the total number of rings = (new rings + old SSSRs)
745
746 <b>Notes:</b>
747 - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
748 first
749*/
751 std::vector<std::vector<int>> &res);
752//! \overload
754
755//! @}
756
757//! \name Shortest paths and other matrices
758//! @{
759
760//! returns a molecule's adjacency matrix
761/*!
762 \param mol the molecule of interest
763 \param useBO toggles use of bond orders in the matrix
764 \param emptyVal sets the empty value (for non-adjacent atoms)
765 \param force forces calculation of the matrix, even if already
766 computed
767 \param propNamePrefix used to set the cached property name
768
769 \return the adjacency matrix.
770
771 <b>Notes</b>
772 - The result of this is cached in the molecule's local property
773 dictionary, which will handle deallocation. The caller should <b>not</b> \c
774 delete this pointer.
775
776*/
778 const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
779 const char *propNamePrefix = nullptr,
780 const boost::dynamic_bitset<> *bondsToUse = nullptr);
781
782//! Computes the molecule's topological distance matrix
783/*!
784 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
785
786 \param mol the molecule of interest
787 \param useBO toggles use of bond orders in the matrix
788 \param useAtomWts sets the diagonal elements of the result to
789 6.0/(atomic number) so that the matrix can be used to calculate
790 Balaban J values. This does not affect the bond weights.
791 \param force forces calculation of the matrix, even if already
792 computed
793 \param propNamePrefix used to set the cached property name
794
795 \return the distance matrix.
796
797 <b>Notes</b>
798 - The result of this is cached in the molecule's local property
799 dictionary, which will handle deallocation. The caller should <b>not</b> \c
800 delete this pointer.
801
802
803*/
805 const ROMol &mol, bool useBO = false, bool useAtomWts = false,
806 bool force = false, const char *propNamePrefix = nullptr);
807
808//! Computes the molecule's topological distance matrix
809/*!
810 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
811
812 \param mol the molecule of interest
813 \param activeAtoms only elements corresponding to these atom indices
814 will be included in the calculation
815 \param bonds only bonds found in this list will be included in the
816 calculation
817 \param useBO toggles use of bond orders in the matrix
818 \param useAtomWts sets the diagonal elements of the result to
819 6.0/(atomic number) so that the matrix can be used to calculate
820 Balaban J values. This does not affect the bond weights.
821
822 \return the distance matrix.
823
824 <b>Notes</b>
825 - The results of this call are not cached, the caller <b>should</b> \c
826 delete
827 this pointer.
828
829
830*/
832 const ROMol &mol, const std::vector<int> &activeAtoms,
833 const std::vector<const Bond *> &bonds, bool useBO = false,
834 bool useAtomWts = false);
835
836//! Computes the molecule's 3D distance matrix
837/*!
838
839 \param mol the molecule of interest
840 \param confId the conformer to use
841 \param useAtomWts sets the diagonal elements of the result to
842 6.0/(atomic number)
843 \param force forces calculation of the matrix, even if already
844 computed
845 \param propNamePrefix used to set the cached property name
846 (if set to an empty string, the matrix will not be
847 cached)
848
849 \return the distance matrix.
850
851 <b>Notes</b>
852 - If propNamePrefix is not empty the result of this is cached in the
853 molecule's local property dictionary, which will handle deallocation.
854 In other cases the caller is responsible for freeing the memory.
855
856*/
858 const ROMol &mol, int confId = -1, bool useAtomWts = false,
859 bool force = false, const char *propNamePrefix = nullptr);
860//! Find the shortest path between two atoms
861/*!
862 Uses the Bellman-Ford algorithm
863
864 \param mol molecule of interest
865 \param aid1 index of the first atom
866 \param aid2 index of the second atom
867
868 \return an std::list with the indices of the atoms along the shortest
869 path
870
871 <b>Notes:</b>
872 - the starting and end atoms are included in the path
873 - if no path is found, an empty path is returned
874
875*/
876RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
877 int aid2);
878
879//! @}
880
881//! \name Stereochemistry
882//! @{
883
884//! removes bogus chirality markers (those on non-sp3 centers):
886
887//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms
888/*!
889 \param mol the molecule of interest
890 \param confId the conformer to use
891 \param replaceExistingTags if this flag is true, any existing atomic chiral
892 tags will be replaced
893
894 If the conformer provided is not a 3D conformer, nothing will be done.
895
896
897 NOTE that this does not check to see if atoms are chiral centers (i.e. all
898 substituents are different), it merely sets the chiral type flags based on the
899 coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if you
900 want chiral flags only on actual stereocenters.
901*/
903 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
904
905//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
906//! stereo flags to its bonds
907/*!
908
909 \param mol the molecule of interest
910 \param confId the conformer to use
911 \param replaceExistingTags if this flag is true, any existing info about
912 stereochemistry will be replaced
913
914 If the conformer provided is not a 3D conformer, nothing will be done.
915*/
917 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
918
919//! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and
920//! stereo flags to its bonds
921/*!
922
923 \param mol the molecule of interest
924 \param confId the conformer to use
925 \param replaceExistingTags if this flag is true, any existing info about
926 stereochemistry will be replaced
927*/
929 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
930
931//! \deprecated: this function will be removed in a future release. Use
932//! setDoubleBondNeighborDirections() instead
934 int confId = -1);
935//! Sets bond directions based on double bond stereochemistry
937 ROMol &mol, const Conformer *conf = nullptr);
938//! removes directions from single bonds. Wiggly bonds will have the property
939//! _UnknownStereo set on them
941
942//! Assign CIS/TRANS bond stereochemistry tags based on neighboring
943//! directions
945
946//! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
947/*!
948 Does the CIP stereochemistry assignment for the molecule's atoms
949 (R/S) and double bond (Z/E). Chiral atoms will have a property
950 '_CIPCode' indicating their chiral code.
951
952 \param mol the molecule to use
953 \param cleanIt if true, any existing values of the property `_CIPCode`
954 will be cleared, atoms with a chiral specifier that aren't
955 actually chiral (e.g. atoms with duplicate
956 substituents or only 2 substituents, etc.) will have
957 their chiral code set to CHI_UNSPECIFIED. Bonds with
958 STEREOCIS/STEREOTRANS specified that have duplicate
959 substituents based upon the CIP atom ranks will be
960 marked STEREONONE.
961 \param force causes the calculation to be repeated even if it has
962 already been done
963 \param flagPossibleStereoCenters set the _ChiralityPossible property on
964 atoms that are possible stereocenters
965
966 <b>Notes:M</b>
967 - Throughout we assume that we're working with a hydrogen-suppressed
968 graph.
969
970*/
972 ROMol &mol, bool cleanIt = false, bool force = false,
973 bool flagPossibleStereoCenters = false);
974//! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
975/// i.e. Z/E)
976/*!
977
978 \param mol the molecule of interest
979*/
981
982//! \brief finds bonds that could be cis/trans in a molecule and mark them as
983//! Bond::STEREOANY.
984/*!
985 \param mol the molecule of interest
986 \param cleanIt toggles removal of stereo flags from double bonds that can
987 not have stereochemistry
988
989 This function finds any double bonds that can potentially be part of
990 a cis/trans system. No attempt is made here to mark them cis or
991 trans. No attempt is made to detect double bond stereo in ring systems.
992
993 This function is useful in the following situations:
994 - when parsing a mol file; for the bonds marked here, coordinate
995 information on the neighbors can be used to indentify cis or trans states
996 - when writing a mol file; bonds that can be cis/trans but not marked as
997 either need to be specially marked in the mol file
998 - finding double bonds with unspecified stereochemistry so they
999 can be enumerated for downstream 3D tools
1000
1001 The CIPranks on the neighboring atoms are checked in this function. The
1002 _CIPCode property if set to any on the double bond.
1003*/
1005 bool cleanIt = false);
1006//! \brief Uses the molParity atom property to assign ChiralType to a molecule's
1007//! atoms
1008/*!
1009 \param mol the molecule of interest
1010 \param replaceExistingTags if this flag is true, any existing atomic chiral
1011 tags will be replaced
1012*/
1014 ROMol &mol, bool replaceExistingTags = true);
1015
1016//! @}
1017
1018//! returns the number of atoms which have a particular property set
1020 const ROMol &mol, std::string prop);
1021
1022//! returns whether or not a molecule needs to have Hs added to it.
1024
1025namespace details {
1026//! not recommended for use in other code
1028 RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1029 boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds = true,
1030 unsigned int maxBackTracks = 100);
1031} // namespace details
1032
1033} // namespace MolOps
1034} // namespace RDKit
1035
1036#endif
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition: Atom.h:68
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:45
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:225
RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(RWMol &mol, const boost::dynamic_bitset<> &atomsToUse, boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
not recommended for use in other code
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:524
@ AROMATICITY_RDKIT
Definition: MolOps.h:526
@ AROMATICITY_MDL
Definition: MolOps.h:528
@ AROMATICITY_CUSTOM
use a function
Definition: MolOps.h:529
@ AROMATICITY_DEFAULT
future proofing
Definition: MolOps.h:525
@ AROMATICITY_SIMPLE
Definition: MolOps.h:527
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false, bool mergeIsotopes=false)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
@ SANITIZE_ALL
Definition: MolOps.h:444
@ SANITIZE_SETAROMATICITY
Definition: MolOps.h:439
@ SANITIZE_NONE
Definition: MolOps.h:433
@ SANITIZE_PROPERTIES
Definition: MolOps.h:435
@ SANITIZE_SETCONJUGATION
Definition: MolOps.h:440
@ SANITIZE_SYMMRINGS
Definition: MolOps.h:436
@ SANITIZE_ADJUSTHS
Definition: MolOps.h:443
@ SANITIZE_CLEANUPCHIRALITY
Definition: MolOps.h:442
@ SANITIZE_FINDRADICALS
Definition: MolOps.h:438
@ SANITIZE_KEKULIZE
Definition: MolOps.h:437
@ SANITIZE_SETHYBRIDIZATION
Definition: MolOps.h:441
@ SANITIZE_CLEANUP
Definition: MolOps.h:434
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT void clearSingleBondDirFlags(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=nullptr, bool addResidueInfo=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms
AdjustQueryWhichFlags
Definition: MolOps.h:312
@ ADJUST_IGNORERINGS
Definition: MolOps.h:315
@ ADJUST_IGNORENONE
Definition: MolOps.h:313
@ ADJUST_IGNOREMAPPED
Definition: MolOps.h:318
@ ADJUST_IGNORENONDUMMIES
Definition: MolOps.h:317
@ ADJUST_IGNOREDUMMIES
Definition: MolOps.h:316
@ ADJUST_IGNORECHAINS
Definition: MolOps.h:314
@ ADJUST_IGNOREALL
Definition: MolOps.h:319
Std stuff.
Definition: Abbreviations.h:19
std::vector< double > INVAR_VECT
Definition: MolOps.h:32
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:33
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:34
std::vector< UINT > UINT_VECT
Definition: types.h:297
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition: MolOps.h:331
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition: MolOps.h:381