RDKit
Open-source cheminformatics and machine learning.
RGroupDecompData.h
Go to the documentation of this file.
1//
2// Copyright (C) 2017 Novartis Institutes for BioMedical Research
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifndef RGROUP_DECOMP_DATA
11#define RGROUP_DECOMP_DATA
12
13#include "RGroupCore.h"
14#include "RGroupDecomp.h"
15#include "RGroupMatch.h"
16#include "RGroupScore.h"
18#include "RGroupGa.h"
19#include <vector>
20#include <map>
21
22// #define VERBOSE 1
23
24namespace RDKit {
26 // matches[mol_idx] == vector of potential matches
27 std::map<int, RCore> cores;
28 std::map<std::string, int> newCores; // new "cores" found along the way
30 // this caches the running product of permutations
31 // across calls to process()
33 // this caches the size of the previous matches vector
34 // such that the size of the current chunk can be inferred
36 // the default for Greedy/GreedyChunks is keeping only the best
37 // permutation after each call to process()
38 bool prunePermutations = true;
40
41 std::vector<std::vector<RGroupMatch>> matches;
42 std::set<int> labels;
43 std::vector<size_t> permutation;
44 unsigned int pruneLength = 0U;
46 std::map<int, std::vector<int>> userLabels;
47
48 std::vector<int> processedRlabels;
49
50 std::map<int, int> finalRlabelMapping;
52
53 RGroupDecompData(const RWMol &inputCore,
55 : params(std::move(inputParams)) {
56 cores[0] = RCore(inputCore);
58 }
59
60 RGroupDecompData(const std::vector<ROMOL_SPTR> &inputCores,
62 : params(std::move(inputParams)) {
63 for (size_t i = 0; i < inputCores.size(); ++i) {
64 cores[i] = RCore(*inputCores[i]);
65 }
67 }
68
69 void prepareCores() {
70 for (auto &core : cores) {
71 RWMol *alignCore = core.first ? cores[0].core.get() : nullptr;
72 CHECK_INVARIANT(params.prepareCore(*core.second.core, alignCore),
73 "Could not prepare at least one core");
74 core.second.init();
75 core.second.labelledCore.reset(new RWMol(*core.second.core));
76 }
77 }
78
79 void setRlabel(Atom *atom, int rlabel) {
80 PRECONDITION(rlabel > 0, "RLabels must be >0");
82 atom->setAtomMapNum(rlabel);
83 }
84
86 std::string dLabel = "R" + std::to_string(rlabel);
88 setAtomRLabel(atom, rlabel);
89 }
90
92 atom->setIsotope(rlabel);
93 }
94 }
95
96 int getRlabel(Atom *atom) const {
98 return atom->getAtomMapNum();
99 }
101 return atom->getIsotope();
102 }
103
105 unsigned int label = 0;
107 return label;
108 }
109 }
110
111 CHECK_INVARIANT(0, "no valid r label found");
112 }
113
114 double scoreFromPrunedData(const std::vector<size_t> &permutation,
115 bool reset = true) {
118 "Scoring method is not fingerprint variance!");
119
121 "Illegal permutation prune length");
122 if (permutation.size() < pruneLength * 1.5) {
123 for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
125 pos, permutation[pos], matches, labels);
126 }
127 double score =
129 if (reset) {
130 for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
132 pos, permutation[pos], matches, labels);
133 }
134 } else {
135 pruneLength = permutation.size();
136 }
137 return score;
138 } else {
139 if (reset) {
141 } else {
143 pruneLength = permutation.size();
146 }
147 }
148 }
149
150 void prune() { // prune all but the current "best" permutation of matches
151 PRECONDITION(permutation.size() <= matches.size(),
152 "permutation.size() should be <= matches.size()");
153 size_t offset = matches.size() - permutation.size();
154 for (size_t mol_idx = 0; mol_idx < permutation.size(); ++mol_idx) {
155 std::vector<RGroupMatch> keepVector;
156 size_t mi = mol_idx + offset;
157 keepVector.push_back(matches[mi].at(permutation[mol_idx]));
158 matches[mi] = keepVector;
159 }
160
161 permutation = std::vector<size_t>(permutation.size(), 0);
165 }
166 }
167
168 // Return the RGroups with the current "best" permutation
169 // of matches.
170 std::vector<RGroupMatch> GetCurrentBestPermutation() const {
171 const bool removeAllHydrogenRGroups =
174
175 std::vector<RGroupMatch> results; // std::map<int, RGroup> > result;
176 bool isPruned = (permutation.size() < matches.size());
177 for (size_t i = 0; i < matches.size(); ++i) {
178 size_t pi = (isPruned ? 0 : permutation.at(i));
179 results.push_back(matches[i].at(pi));
180 }
181
182 // * if a dynamically-added RGroup (i.e., when onlyMatchAtRGroups=false)
183 // is all hydrogens, remove it
184 // * if a user-defined RGroup is all hydrogens and either
185 // params.removeAllHydrogenRGroups==true or
186 // params.removeAllHydrogenRGroupsAndLabels==true, remove it
187
188 // This logic is a bit tricky, find all labels that have common cores
189 // and analyze those sets independently.
190 // i.e. if core 1 doesn't have R1 then don't analyze it in when looking
191 // at label 1
192 std::map<int, std::set<int>> labelCores; // map from label->cores
193 std::set<int> coresVisited;
194 for (auto &position : results) {
195 int core_idx = position.core_idx;
196 if (coresVisited.find(core_idx) == coresVisited.end()) {
197 coresVisited.insert(core_idx);
198 auto core = cores.find(core_idx);
199 if (core != cores.end()) {
200 for (auto rlabels : getRlabels(*core->second.core)) {
201 int rlabel = rlabels.first;
202 labelCores[rlabel].insert(core_idx);
203 }
204 }
205 }
206 }
207
208 for (int label : labels) {
209 if (label > 0 && !removeAllHydrogenRGroups) {
210 continue;
211 }
212 bool allH = true;
213 for (auto &position : results) {
214 R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
215 bool labelHasCore = labelCores[label].find(position.core_idx) !=
216 labelCores[label].end();
217 if (labelHasCore && rgroup != position.rgroups.end() &&
218 !rgroup->second->is_hydrogen) {
219 allH = false;
220 break;
221 }
222 }
223
224 if (allH) {
225 for (auto &position : results) {
226 position.rgroups.erase(label);
227 }
228 }
229 }
230 return results;
231 }
232
234 public:
235 std::set<int> labels_used;
236 bool add(int rlabel) {
237 if (labels_used.find(rlabel) != labels_used.end()) {
238 return false;
239 }
240 labels_used.insert(rlabel);
241 return true;
242 }
243
244 int next() {
245 int i = 1;
246 while (labels_used.find(i) != labels_used.end()) {
247 ++i;
248 }
249 labels_used.insert(i);
250 return i;
251 }
252 };
253
254 void addCoreUserLabels(const RWMol &core, std::set<int> &userLabels) {
255 auto atoms = getRlabels(core);
256 for (const auto &p : atoms) {
257 if (p.first > 0) {
258 userLabels.insert(p.first);
259 }
260 }
261 }
262
263 void addAtoms(RWMol &mol,
264 const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd) {
265 for (const auto &i : atomsToAdd) {
266 mol.addAtom(i.second, false, true);
267 mol.addBond(i.first, i.second, Bond::SINGLE);
268 if (mol.getNumConformers()) {
269 MolOps::setTerminalAtomCoords(mol, i.second->getIdx(),
270 i.first->getIdx());
271 }
272 }
273 }
274
275 void relabelCore(RWMol &core, std::map<int, int> &mappings,
276 UsedLabels &used_labels, const std::set<int> &indexLabels,
277 const std::map<int, std::vector<int>> &extraAtomRLabels) {
278 // Now remap to proper rlabel ids
279 // if labels are positive, they come from User labels
280 // if they are negative, they come from indices and should be
281 // numbered *after* the user labels.
282 //
283 // Some indices are attached to multiple bonds,
284 // these rlabels should be incrementally added last
285 std::map<int, Atom *> atoms = getRlabels(core);
286 // a core only has one labelled index
287 // a secondary structure extraAtomRLabels contains the number
288 // of bonds between this atom and the side chain
289
290 // a sidechain atom has a vector of the attachments back to the
291 // core that takes the place of numBondsToRlabel
292
293 std::map<int, std::vector<int>> bondsToCore;
294 std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
295
296 // Deal with user supplied labels
297 for (const auto &rlabels : atoms) {
298 int userLabel = rlabels.first;
299 if (userLabel < 0) {
300 continue; // not a user specified label
301 }
302 Atom *atom = rlabels.second;
303 mappings[userLabel] = userLabel;
304 used_labels.add(userLabel);
305
306 if (atom->getAtomicNum() == 0 &&
307 atom->getDegree() == 1) { // add to existing dummy/rlabel
308 setRlabel(atom, userLabel);
309 } else { // adds new rlabel
310 auto *newAt = new Atom(0);
311 setRlabel(newAt, userLabel);
312 atomsToAdd.emplace_back(atom, newAt);
313 }
314 }
315
316 // Deal with non-user supplied labels
317 for (auto newLabel : indexLabels) {
318 auto atm = atoms.find(newLabel);
319 if (atm == atoms.end()) {
320 continue;
321 }
322
323 Atom *atom = atm->second;
324
325 int rlabel;
326 auto mapping = mappings.find(newLabel);
327 if (mapping == mappings.end()) {
328 rlabel = used_labels.next();
329 mappings[newLabel] = rlabel;
330 } else {
331 rlabel = mapping->second;
332 }
333
334 if (atom->getAtomicNum() == 0 &&
336 *atom)) { // add to dummy
337 setRlabel(atom, rlabel);
338 } else {
339 auto *newAt = new Atom(0);
340 setRlabel(newAt, rlabel);
341 atomsToAdd.emplace_back(atom, newAt);
342 }
343 }
344
345 // Deal with multiple bonds to the same label
346 for (const auto &extraAtomRLabel : extraAtomRLabels) {
347 auto atm = atoms.find(extraAtomRLabel.first);
348 if (atm == atoms.end()) {
349 continue; // label not used in the rgroup
350 }
351 Atom *atom = atm->second;
352
353 for (size_t i = 0; i < extraAtomRLabel.second.size(); ++i) {
354 int rlabel = used_labels.next();
355 // Is this necessary?
357 atom->getAtomicNum() > 1,
358 "Multiple attachments to a dummy (or hydrogen) is weird.");
359 auto *newAt = new Atom(0);
360 setRlabel(newAt, rlabel);
361 atomsToAdd.emplace_back(atom, newAt);
362 }
363 }
364
365 addAtoms(core, atomsToAdd);
366 for (const auto &rlabels : atoms) {
367 auto atom = rlabels.second;
368 atom->clearProp(RLABEL);
369 atom->clearProp(RLABEL_TYPE);
370 }
371 core.updatePropertyCache(false); // this was github #1550
372 }
373
374 void relabelRGroup(RGroupData &rgroup, const std::map<int, int> &mappings) {
375 PRECONDITION(rgroup.combinedMol.get(), "Unprocessed rgroup");
376
377 RWMol &mol = *rgroup.combinedMol.get();
378
379 if (rgroup.combinedMol->hasProp(done)) {
380 rgroup.labelled = true;
381 return;
382 }
383
384 mol.setProp(done, true);
385 std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
386 std::map<int, int> rLabelCoreIndexToAtomicWt;
387
388 for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
389 ++atIt) {
390 Atom *atom = *atIt;
391 if (atom->hasProp(SIDECHAIN_RLABELS)) {
392 atom->setIsotope(0);
393 const std::vector<int> &rlabels =
394 atom->getProp<std::vector<int>>(SIDECHAIN_RLABELS);
395 // switch on atom mappings or rlabels....
396
397 for (int rlabel : rlabels) {
398 auto label = mappings.find(rlabel);
399 CHECK_INVARIANT(label != mappings.end(), "Unprocessed mapping");
400
401 if (atom->getAtomicNum() == 0) {
402 setRlabel(atom, label->second);
403 } else if (atom->hasProp(RLABEL_CORE_INDEX)) {
404 atom->setAtomicNum(0);
405 setRlabel(atom, label->second);
406 } else {
407 auto *newAt = new Atom(0);
408 setRlabel(newAt, label->second);
409 atomsToAdd.emplace_back(atom, newAt);
410 }
411 }
412 }
413 if (atom->hasProp(RLABEL_CORE_INDEX)) {
414 // convert to dummy as we don't want to collapse hydrogens onto the core
415 // match
416 auto rLabelCoreIndex = atom->getProp<int>(RLABEL_CORE_INDEX);
417 rLabelCoreIndexToAtomicWt[rLabelCoreIndex] = atom->getAtomicNum();
418 atom->setAtomicNum(0);
419 }
420 }
421
422 addAtoms(mol, atomsToAdd);
423
425 RDLog::LogStateSetter blocker;
426 bool implicitOnly = false;
427 bool updateExplicitCount = false;
428 bool sanitize = false;
429 MolOps::removeHs(mol, implicitOnly, updateExplicitCount, sanitize);
430 }
431
432 mol.updatePropertyCache(false); // this was github #1550
433 rgroup.labelled = true;
434
435 // Restore any core matches that we have set to dummy
436 for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
437 ++atIt) {
438 Atom *atom = *atIt;
439 if (atom->hasProp(RLABEL_CORE_INDEX)) {
440 // don't need to set IsAromatic on atom - that seems to have been saved
441 atom->setAtomicNum(
442 rLabelCoreIndexToAtomicWt[atom->getProp<int>(RLABEL_CORE_INDEX)]);
443 atom->setNoImplicit(true);
445 }
447 }
448
449#ifdef VERBOSE
450 std::cerr << "Relabel Rgroup smiles " << MolToSmiles(mol) << std::endl;
451#endif
452 }
453
454 // relabel the core and sidechains using the specified user labels
455 // if matches exist for non labelled atoms, these are added as well
456 void relabel() {
457 std::vector<RGroupMatch> best = GetCurrentBestPermutation();
458
459 // get the labels used
460 std::set<int> userLabels;
461 std::set<int> indexLabels;
462
463 // Go through all the RGroups and find out which labels were
464 // actually used.
465
466 // some atoms will have multiple attachment points, i.e. cycles
467 // split these up into new rlabels if necessary
468 // These are detected at match time
469 // This vector will hold the extra (new) labels required
470 std::map<int, std::vector<int>> extraAtomRLabels;
471
472 for (auto &it : best) {
473 for (auto &rgroup : it.rgroups) {
474 if (rgroup.first > 0) {
475 userLabels.insert(rgroup.first);
476 }
477 if (rgroup.first < 0 && !params.onlyMatchAtRGroups) {
478 indexLabels.insert(rgroup.first);
479 }
480
481 std::map<int, int> rlabelsUsedInRGroup =
482 rgroup.second->getNumBondsToRlabels();
483 for (auto &numBondsUsed : rlabelsUsedInRGroup) {
484 // Make space for the extra labels
485 if (numBondsUsed.second > 1) { // multiple rgroup bonds to same atom
486 extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
487 1);
488 }
489 }
490 }
491 }
492
493 // find user labels that are not present in the decomposition
494 for (auto &core : cores) {
495 core.second.labelledCore.reset(new RWMol(*core.second.core));
496 addCoreUserLabels(*core.second.labelledCore, userLabels);
497 }
498
499 // Assign final RGroup labels to the cores and propagate these to
500 // the scaffold
501 finalRlabelMapping.clear();
502
503 UsedLabels used_labels;
504 // Add all the user labels now to prevent an index label being assigned to a
505 // user label when multiple cores are present (e.g. the user label is
506 // present in the second core, but not the first).
507 for (auto userLabel : userLabels) {
508 used_labels.add(userLabel);
509 }
510 for (auto &core : cores) {
511 relabelCore(*core.second.labelledCore, finalRlabelMapping, used_labels,
512 indexLabels, extraAtomRLabels);
513 }
514
515 for (auto &it : best) {
516 for (auto &rgroup : it.rgroups) {
517 relabelRGroup(*rgroup.second, finalRlabelMapping);
518 }
519 }
520
521 std::set<int> uniqueMappedValues;
522 std::transform(finalRlabelMapping.cbegin(), finalRlabelMapping.cend(),
523 std::inserter(uniqueMappedValues, uniqueMappedValues.end()),
524 [](const std::pair<int, int> &p) { return p.second; });
525 CHECK_INVARIANT(finalRlabelMapping.size() == uniqueMappedValues.size(),
526 "Error in uniqueness of final RLabel mapping");
528 uniqueMappedValues.size() == userLabels.size() + indexLabels.size(),
529 "Error in final RMapping size");
530 }
531
532 double score(const std::vector<size_t> &permutation,
533 FingerprintVarianceScoreData *fingerprintVarianceScoreData =
534 nullptr) const {
535 RGroupScore scoreMethod = static_cast<RGroupScore>(params.scoreMethod);
536 switch (scoreMethod) {
537 case Match:
539 break;
542 fingerprintVarianceScoreData);
543 break;
544 default:;
545 }
546 return NAN;
547 }
548
550 bool finalize = false) {
551 if (matches.empty()) {
552 return RGroupDecompositionProcessResult(false, -1);
553 }
554 auto t0 = std::chrono::steady_clock::now();
555 std::unique_ptr<CartesianProduct> iterator;
557
558 if (params.matchingStrategy == GA) {
559 RGroupGa ga(*this, params.timeout >= 0 ? &t0 : nullptr);
560 if (ga.numberPermutations() < 100 * ga.getPopsize()) {
562 } else {
563 if (params.gaNumberRuns > 1) {
564 auto results = ga.runBatch();
565 auto best = max_element(results.begin(), results.end(),
566 [](const GaResult &a, const GaResult &b) {
567 return a.rGroupScorer.getBestScore() <
568 b.rGroupScorer.getBestScore();
569 });
570 rGroupScorer = best->rGroupScorer;
571 } else {
572 auto result = ga.run();
573 rGroupScorer = result.rGroupScorer;
574 }
575 }
576 }
577 size_t offset = 0;
578 if (params.matchingStrategy != GA) {
579 // Exhaustive search, get the MxN matrix
580 // (M = matches.size(): number of molecules
581 // N = iterator.maxPermutations)
582 std::vector<size_t> permutations;
583
584 if (pruneMatches && params.scoreMethod != FingerprintVariance) {
585 offset = previousMatchSize;
586 }
587 previousMatchSize = matches.size();
588 std::transform(
589 matches.begin() + offset, matches.end(),
590 std::back_inserter(permutations),
591 [](const std::vector<RGroupMatch> &m) { return m.size(); });
592 permutation = std::vector<size_t>(permutations.size(), 0);
593
594 // run through all possible matches and score each
595 // set
596 size_t count = 0;
597#ifdef DEBUG
598 std::cerr << "Processing" << std::endl;
599#endif
600 std::unique_ptr<CartesianProduct> it(new CartesianProduct(permutations));
601 iterator = std::move(it);
602 // Iterates through the permutation idx, i.e.
603 // [m1_permutation_idx, m2_permutation_idx, m3_permutation_idx]
604
605 while (iterator->next()) {
606 if (count > iterator->maxPermutations) {
607 throw ValueErrorException("next() did not finish");
608 }
609#ifdef DEBUG
610 std::cerr << "**************************************************"
611 << std::endl;
612#endif
613 double newscore = params.scoreMethod == FingerprintVariance
614 ? scoreFromPrunedData(iterator->permutation)
615 : score(iterator->permutation);
616
617 if (fabs(newscore - rGroupScorer.getBestScore()) <
618 1e-6) { // heuristic to overcome floating point comparison issues
619 rGroupScorer.pushTieToStore(iterator->permutation);
620 } else if (newscore > rGroupScorer.getBestScore()) {
621#ifdef DEBUG
622 std::cerr << " ===> current best:" << newscore << ">"
623 << rGroupScorer.getBestScore() << std::endl;
624#endif
625 rGroupScorer.setBestPermutation(iterator->permutation, newscore);
627 rGroupScorer.pushTieToStore(iterator->permutation);
628 }
629 ++count;
630 }
631 }
632
633 if (rGroupScorer.tieStoreSize() > 1) {
636 } else {
638 }
640 if (pruneMatches || finalize) {
641 prune();
642 }
643
644 if (finalize) {
645 relabel();
646 }
647
649 }
650};
651} // namespace RDKit
652
653#endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
The class for representing atoms.
Definition: Atom.h:68
void setNoImplicit(bool what)
sets our noImplicit flag, indicating whether or not we are allowed to have implicit Hs
Definition: Atom.h:214
void setAtomicNum(int newNum)
sets our atomic number
Definition: Atom.h:122
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:120
int getAtomMapNum() const
Definition: Atom.h:382
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
Definition: Atom.h:370
unsigned int getIsotope() const
returns our isotope number
Definition: Atom.h:234
unsigned int getDegree() const
@ SINGLE
Definition: Bond.h:58
bool getPropIfPresent(const std::string &key, T &res) const
Definition: RDProps.h:121
void clearProp(const std::string &key) const
clears the value of a property
Definition: RDProps.h:137
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
Definition: RDProps.h:107
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: RDProps.h:126
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
Definition: RDProps.h:77
vector< GaResult > runBatch()
GaResult run(int runNumber=1)
unsigned int numberPermutations() const
Definition: RGroupGa.h:127
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
Definition: RGroupScore.h:83
void breakTies(const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
Definition: RGroupScore.h:99
double getBestScore() const
return the best score found so far
Definition: RGroupScore.h:101
unsigned int getNumConformers() const
Definition: ROMol.h:542
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:40
static std::string to_string(const Descriptor &desc)
Definition: Descriptor.h:54
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
RDKIT_RDGENERAL_EXPORT const std::string _MolFileRLabel
Std stuff.
Definition: Abbreviations.h:18
@ FingerprintVariance
Definition: RGroupDecomp.h:63
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_CORE_INDEX
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string done
@ Exhaustive
Definition: RGroupDecomp.h:43
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string SIDECHAIN_RLABELS
@ MDLRGroup
Definition: RGroupDecomp.h:51
@ AtomMap
Definition: RGroupDecomp.h:49
@ Isotope
Definition: RGroupDecomp.h:50
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
Definition: RGroupDecomp.h:207
bool isAnyAtomWithMultipleNeighborsOrNotUserRLabel(const Atom &atom)
Definition: RGroupUtils.h:66
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_TYPE
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr)
const unsigned int EMPTY_CORE_LABEL
Definition: RGroupUtils.h:25
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
iterate through all possible permutations of the rgroups
Definition: RGroupScore.h:20
void addVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
void removeVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
RCore is the core common to a series of molecules.
Definition: RGroupCore.h:24
A single rgroup attached to a given core.
Definition: RGroupData.h:27
boost::shared_ptr< RWMol > combinedMol
Definition: RGroupData.h:28
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * > > &atomsToAdd)
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
std::vector< int > processedRlabels
int getRlabel(Atom *atom) const
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int > > &extraAtomRLabels)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
bool onlyMatchAtRGroups
only allow rgroup decomposition at the specified rgroups
Definition: RGroupDecomp.h:84
bool removeAllHydrogenRGroups
remove all user-defined rgroups that only have hydrogens
Definition: RGroupDecomp.h:86
double timeout
timeout in seconds. <=0 indicates no timeout
Definition: RGroupDecomp.h:95
bool removeHydrogensPostMatch
remove all hydrogens from the output molecules
Definition: RGroupDecomp.h:91
bool prepareCore(RWMol &, const RWMol *alignCore)