RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/export.h>
13 #include <RDGeneral/hanoiSort.h>
14 #include <GraphMol/ROMol.h>
15 #include <GraphMol/RingInfo.h>
17 #include <cstdint>
18 #include <boost/dynamic_bitset.hpp>
20 #include <cstring>
21 #include <iostream>
22 #include <cassert>
23 #include <cstring>
24 #include <vector>
25 
26 // #define VERBOSE_CANON 1
27 
28 namespace RDKit {
29 namespace Canon {
30 
33  unsigned int bondStereo;
34  unsigned int nbrSymClass{0};
35  unsigned int nbrIdx{0};
36  const std::string *p_symbol{
37  nullptr}; // if provided, this is used to order bonds
38  bondholder() : bondStereo(static_cast<unsigned int>(Bond::STEREONONE)) {}
39  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
40  unsigned int nsc)
41  : bondType(bt),
42  bondStereo(static_cast<unsigned int>(bs)),
43  nbrSymClass(nsc),
44  nbrIdx(ni) {}
45  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
46  unsigned int nsc)
47  : bondType(bt), bondStereo(bs), nbrSymClass(nsc), nbrIdx(ni) {}
48  bool operator<(const bondholder &o) const {
49  if (p_symbol && o.p_symbol) {
50  return (*p_symbol) < (*o.p_symbol);
51  }
52  if (bondType != o.bondType) {
53  return bondType < o.bondType;
54  }
55  if (bondStereo != o.bondStereo) {
56  return bondStereo < o.bondStereo;
57  }
58  return nbrSymClass < o.nbrSymClass;
59  }
60  static bool greater(const bondholder &lhs, const bondholder &rhs) {
61  if (lhs.p_symbol && rhs.p_symbol && (*lhs.p_symbol) != (*rhs.p_symbol)) {
62  return (*lhs.p_symbol) > (*rhs.p_symbol);
63  }
64  if (lhs.bondType != rhs.bondType) {
65  return lhs.bondType > rhs.bondType;
66  }
67  if (lhs.bondStereo != rhs.bondStereo) {
68  return lhs.bondStereo > rhs.bondStereo;
69  }
70  return lhs.nbrSymClass > rhs.nbrSymClass;
71  }
72 
73  static int compare(const bondholder &x, const bondholder &y,
74  unsigned int div = 1) {
75  if (x.p_symbol && y.p_symbol) {
76  if ((*x.p_symbol) < (*y.p_symbol)) {
77  return -1;
78  } else if ((*x.p_symbol) > (*y.p_symbol)) {
79  return 1;
80  }
81  }
82  if (x.bondType < y.bondType) {
83  return -1;
84  } else if (x.bondType > y.bondType) {
85  return 1;
86  }
87  if (x.bondStereo < y.bondStereo) {
88  return -1;
89  } else if (x.bondStereo > y.bondStereo) {
90  return 1;
91  }
92  return x.nbrSymClass / div - y.nbrSymClass / div;
93  }
94 };
96  public:
97  const Atom *atom{nullptr};
98  int index{-1};
99  unsigned int degree{0};
100  unsigned int totalNumHs{0};
101  bool hasRingNbr{false};
102  bool isRingStereoAtom{false};
103  int *nbrIds{nullptr};
104  const std::string *p_symbol{
105  nullptr}; // if provided, this is used to order atoms
106  std::vector<int> neighborNum;
107  std::vector<int> revistedNeighbors;
108  std::vector<bondholder> bonds;
109 
111 
112  ~canon_atom() { free(nbrIds); }
113 };
114 
116  canon_atom *atoms, std::vector<bondholder> &nbrs);
117 
119  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
120  std::vector<std::pair<unsigned int, unsigned int>> &result);
121 
122 /*
123  * Different types of atom compare functions:
124  *
125  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
126  *dependent chirality
127  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
128  *highly symmetrical graphs/molecules
129  * - AtomCompareFunctor: Basic atom compare function which also allows to
130  *include neighbors within the ranking
131  */
132 
134  public:
135  Canon::canon_atom *dp_atoms{nullptr};
136  const ROMol *dp_mol{nullptr};
137  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
138  *dp_bondsInPlay{nullptr};
139 
142  Canon::canon_atom *atoms, const ROMol &m,
143  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
144  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
145  : dp_atoms(atoms),
146  dp_mol(&m),
147  dp_atomsInPlay(atomsInPlay),
148  dp_bondsInPlay(bondsInPlay) {}
149  int operator()(int i, int j) const {
150  PRECONDITION(dp_atoms, "no atoms");
151  PRECONDITION(dp_mol, "no molecule");
152  PRECONDITION(i != j, "bad call");
153  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
154  return 0;
155  }
156 
157  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
158  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
159  }
160  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
161  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
162  }
163  for (unsigned int ii = 0;
164  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
165  int cmp =
166  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
167  if (cmp) {
168  return cmp;
169  }
170  }
171 
172  std::vector<std::pair<unsigned int, unsigned int>> swapsi;
173  std::vector<std::pair<unsigned int, unsigned int>> swapsj;
174  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
175  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
176  }
177  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
178  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
179  }
180  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
181  int cmp = swapsi[ii].second - swapsj[ii].second;
182  if (cmp) {
183  return cmp;
184  }
185  }
186  return 0;
187  }
188 };
189 
191  public:
192  Canon::canon_atom *dp_atoms{nullptr};
193  const ROMol *dp_mol{nullptr};
194  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
195  *dp_bondsInPlay{nullptr};
196 
199  Canon::canon_atom *atoms, const ROMol &m,
200  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
201  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
202  : dp_atoms(atoms),
203  dp_mol(&m),
204  dp_atomsInPlay(atomsInPlay),
205  dp_bondsInPlay(bondsInPlay) {}
206  int operator()(int i, int j) const {
207  PRECONDITION(dp_atoms, "no atoms");
208  PRECONDITION(dp_mol, "no molecule");
209  PRECONDITION(i != j, "bad call");
210  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
211  return 0;
212  }
213 
214  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
215  return -1;
216  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
217  return 1;
218  }
219 
220  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
221  return -1;
222  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
223  return 1;
224  }
225 
226  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
227  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
228  }
229  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
230  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
231  }
232  for (unsigned int ii = 0;
233  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
234  int cmp =
235  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
236  if (cmp) {
237  return cmp;
238  }
239  }
240 
241  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
242  return -1;
243  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
244  return 1;
245  }
246  return 0;
247  }
248 };
249 
251  unsigned int getAtomRingNbrCode(unsigned int i) const {
252  if (!dp_atoms[i].hasRingNbr) {
253  return 0;
254  }
255 
256  int *nbrs = dp_atoms[i].nbrIds;
257  unsigned int code = 0;
258  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
259  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
260  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
261  }
262  }
263  return code;
264  }
265 
266  int basecomp(int i, int j) const {
267  PRECONDITION(dp_atoms, "no atoms");
268  unsigned int ivi, ivj;
269 
270  // always start with the current class:
271  ivi = dp_atoms[i].index;
272  ivj = dp_atoms[j].index;
273  if (ivi < ivj) {
274  return -1;
275  } else if (ivi > ivj) {
276  return 1;
277  }
278  // use the atom-mapping numbers if they were assigned
279  /* boost::any_cast FILED here:
280  std::string molAtomMapNumber_i="";
281  std::string molAtomMapNumber_j="";
282  */
283  int molAtomMapNumber_i = 0;
284  int molAtomMapNumber_j = 0;
285  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
286  molAtomMapNumber_i);
287  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
288  molAtomMapNumber_j);
289  if (molAtomMapNumber_i < molAtomMapNumber_j) {
290  return -1;
291  } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
292  return 1;
293  }
294  // start by comparing degree
295  ivi = dp_atoms[i].degree;
296  ivj = dp_atoms[j].degree;
297  if (ivi < ivj) {
298  return -1;
299  } else if (ivi > ivj) {
300  return 1;
301  }
302  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
303  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
304  return -1;
305  } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
306  return 1;
307  } else {
308  return 0;
309  }
310  }
311 
312  // move onto atomic number
313  ivi = dp_atoms[i].atom->getAtomicNum();
314  ivj = dp_atoms[j].atom->getAtomicNum();
315  if (ivi < ivj) {
316  return -1;
317  } else if (ivi > ivj) {
318  return 1;
319  }
320  // isotopes if we're using them
321  if (df_useIsotopes) {
322  ivi = dp_atoms[i].atom->getIsotope();
323  ivj = dp_atoms[j].atom->getIsotope();
324  if (ivi < ivj) {
325  return -1;
326  } else if (ivi > ivj) {
327  return 1;
328  }
329  }
330 
331  // nHs
332  ivi = dp_atoms[i].totalNumHs;
333  ivj = dp_atoms[j].totalNumHs;
334  if (ivi < ivj) {
335  return -1;
336  } else if (ivi > ivj) {
337  return 1;
338  }
339  // charge
340  ivi = dp_atoms[i].atom->getFormalCharge();
341  ivj = dp_atoms[j].atom->getFormalCharge();
342  if (ivi < ivj) {
343  return -1;
344  } else if (ivi > ivj) {
345  return 1;
346  }
347  // chirality if we're using it
348  if (df_useChirality) {
349  // first atom stereochem:
350  ivi = 0;
351  ivj = 0;
352  std::string cipCode;
353  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
354  cipCode)) {
355  ivi = cipCode == "R" ? 2 : 1;
356  }
357  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
358  cipCode)) {
359  ivj = cipCode == "R" ? 2 : 1;
360  }
361  if (ivi < ivj) {
362  return -1;
363  } else if (ivi > ivj) {
364  return 1;
365  }
366  // can't actually use values here, because they are arbitrary
367  ivi = dp_atoms[i].atom->getChiralTag() != 0;
368  ivj = dp_atoms[j].atom->getChiralTag() != 0;
369  if (ivi < ivj) {
370  return -1;
371  } else if (ivi > ivj) {
372  return 1;
373  }
374  }
375 
376  if (df_useChiralityRings) {
377  // ring stereochemistry
378  ivi = getAtomRingNbrCode(i);
379  ivj = getAtomRingNbrCode(j);
380  if (ivi < ivj) {
381  return -1;
382  } else if (ivi > ivj) {
383  return 1;
384  } // bond stereo is taken care of in the neighborhood comparison
385  }
386  return 0;
387  }
388 
389  public:
390  Canon::canon_atom *dp_atoms{nullptr};
391  const ROMol *dp_mol{nullptr};
392  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
393  *dp_bondsInPlay{nullptr};
394  bool df_useNbrs{false};
395  bool df_useIsotopes{true};
396  bool df_useChirality{true};
397  bool df_useChiralityRings{true};
398 
401  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
402  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
403  : dp_atoms(atoms),
404  dp_mol(&m),
405  dp_atomsInPlay(atomsInPlay),
406  dp_bondsInPlay(bondsInPlay),
407  df_useNbrs(false),
408  df_useIsotopes(true),
409  df_useChirality(true),
410  df_useChiralityRings(true) {}
411  int operator()(int i, int j) const {
412  PRECONDITION(dp_atoms, "no atoms");
413  PRECONDITION(dp_mol, "no molecule");
414  PRECONDITION(i != j, "bad call");
415  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
416  return 0;
417  }
418  int v = basecomp(i, j);
419  if (v) {
420  return v;
421  }
422 
423  if (df_useNbrs) {
424  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
425  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
426  }
427  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
428  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
429  }
430 
431  for (unsigned int ii = 0;
432  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
433  ++ii) {
434  int cmp =
435  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
436  if (cmp) {
437  return cmp;
438  }
439  }
440 
441  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
442  return -1;
443  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
444  return 1;
445  }
446  }
447  return 0;
448  }
449 };
450 
451 /*
452  * A compare function to discriminate chiral atoms, similar to the CIP rules.
453  * This functionality is currently not used.
454  */
455 
456 const unsigned int ATNUM_CLASS_OFFSET = 10000;
458  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
459  for (unsigned j = 0; j < nbrs.size(); ++j) {
460  unsigned int nbrIdx = nbrs[j].nbrIdx;
461  if (nbrIdx == ATNUM_CLASS_OFFSET) {
462  // Ignore the Hs
463  continue;
464  }
465  const Atom *nbr = dp_atoms[nbrIdx].atom;
466  nbrs[j].nbrSymClass =
467  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
468  }
469  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
470  // FIX: don't want to be doing this long-term
471  }
472 
473  int basecomp(int i, int j) const {
474  PRECONDITION(dp_atoms, "no atoms");
475  unsigned int ivi, ivj;
476 
477  // always start with the current class:
478  ivi = dp_atoms[i].index;
479  ivj = dp_atoms[j].index;
480  if (ivi < ivj) {
481  return -1;
482  } else if (ivi > ivj) {
483  return 1;
484  }
485 
486  // move onto atomic number
487  ivi = dp_atoms[i].atom->getAtomicNum();
488  ivj = dp_atoms[j].atom->getAtomicNum();
489  if (ivi < ivj) {
490  return -1;
491  } else if (ivi > ivj) {
492  return 1;
493  }
494 
495  // isotopes:
496  ivi = dp_atoms[i].atom->getIsotope();
497  ivj = dp_atoms[j].atom->getIsotope();
498  if (ivi < ivj) {
499  return -1;
500  } else if (ivi > ivj) {
501  return 1;
502  }
503 
504  // atom stereochem:
505  ivi = 0;
506  ivj = 0;
507  std::string cipCode;
508  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
509  cipCode)) {
510  ivi = cipCode == "R" ? 2 : 1;
511  }
512  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
513  cipCode)) {
514  ivj = cipCode == "R" ? 2 : 1;
515  }
516  if (ivi < ivj) {
517  return -1;
518  } else if (ivi > ivj) {
519  return 1;
520  }
521 
522  // bond stereo is taken care of in the neighborhood comparison
523  return 0;
524  }
525 
526  public:
527  Canon::canon_atom *dp_atoms{nullptr};
528  const ROMol *dp_mol{nullptr};
529  bool df_useNbrs{false};
532  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
533  int operator()(int i, int j) const {
534  PRECONDITION(dp_atoms, "no atoms");
535  PRECONDITION(dp_mol, "no molecule");
536  PRECONDITION(i != j, "bad call");
537  int v = basecomp(i, j);
538  if (v) {
539  return v;
540  }
541 
542  if (df_useNbrs) {
543  getAtomNeighborhood(dp_atoms[i].bonds);
544  getAtomNeighborhood(dp_atoms[j].bonds);
545 
546  // we do two passes through the neighbor lists. The first just uses the
547  // atomic numbers (by passing the optional 10000 to bondholder::compare),
548  // the second takes the already-computed index into account
549  for (unsigned int ii = 0;
550  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
551  ++ii) {
552  int cmp = bondholder::compare(
553  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
554  if (cmp) {
555  return cmp;
556  }
557  }
558  for (unsigned int ii = 0;
559  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
560  ++ii) {
561  int cmp =
562  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
563  if (cmp) {
564  return cmp;
565  }
566  }
567  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
568  return -1;
569  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
570  return 1;
571  }
572  }
573  return 0;
574  }
575 };
576 
577 /*
578  * Basic canonicalization function to organize the partitions which will be
579  * sorted next.
580  * */
581 
582 template <typename CompareFunc>
583 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
584  int mode, int *order, int *count, int &activeset,
585  int *next, int *changed, char *touchedPartitions) {
586  unsigned int nAtoms = mol.getNumAtoms();
587  int partition;
588  int symclass = 0;
589  int *start;
590  int offset;
591  int index;
592  int len;
593  int i;
594  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
595 
596  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
597  while (activeset != -1) {
598  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
599  // std::cerr<<" next: ";
600  // for(unsigned int ii=0;ii<nAtoms;++ii){
601  // std::cerr<<ii<<":"<<next[ii]<<" ";
602  // }
603  // std::cerr<<std::endl;
604  // for(unsigned int ii=0;ii<nAtoms;++ii){
605  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
606  // "<<atoms[order[ii]].index<<std::endl;
607  // }
608 
609  partition = activeset;
610  activeset = next[partition];
611  next[partition] = -2;
612 
613  len = count[partition];
614  offset = atoms[partition].index;
615  start = order + offset;
616  // std::cerr<<"\n\n**************************************************************"<<std::endl;
617  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
618  // for(unsigned int ii=0;ii<len;++ii){
619  // std::cerr<<" "<<order[offset+ii]+1;
620  // }
621  // std::cerr<<std::endl;
622  // for(unsigned int ii=0;ii<nAtoms;++ii){
623  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
624  // "<<atoms[order[ii]].index<<std::endl;
625  // }
626  hanoisort(start, len, count, changed, compar);
627  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
628  // std::cerr<<" result:";
629  // for(unsigned int ii=0;ii<nAtoms;++ii){
630  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
631  // "<<atoms[order[ii]].index<<std::endl;
632  // }
633  for (int k = 0; k < len; ++k) {
634  changed[start[k]] = 0;
635  }
636 
637  index = start[0];
638  // std::cerr<<" len:"<<len<<" index:"<<index<<"
639  // count:"<<count[index]<<std::endl;
640  for (i = count[index]; i < len; i++) {
641  index = start[i];
642  if (count[index]) {
643  symclass = offset + i;
644  }
645  atoms[index].index = symclass;
646  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
647  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
648  // activeset=index;
649  //}
650  for (unsigned j = 0; j < atoms[index].degree; ++j) {
651  changed[atoms[index].nbrIds[j]] = 1;
652  }
653  }
654  // std::cerr<<std::endl;
655 
656  if (mode) {
657  index = start[0];
658  for (i = count[index]; i < len; i++) {
659  index = start[i];
660  for (unsigned j = 0; j < atoms[index].degree; ++j) {
661  unsigned int nbor = atoms[index].nbrIds[j];
662  touchedPartitions[atoms[nbor].index] = 1;
663  }
664  }
665  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
666  if (touchedPartitions[ii]) {
667  partition = order[ii];
668  if ((count[partition] > 1) && (next[partition] == -2)) {
669  next[partition] = activeset;
670  activeset = partition;
671  }
672  touchedPartitions[ii] = 0;
673  }
674  }
675  }
676  }
677 } // end of RefinePartitions()
678 
679 template <typename CompareFunc>
680 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
681  int mode, int *order, int *count, int &activeset, int *next,
682  int *changed, char *touchedPartitions) {
683  unsigned int nAtoms = mol.getNumAtoms();
684  int partition;
685  int offset;
686  int index;
687  int len;
688  int oldPart = 0;
689 
690  for (unsigned int i = 0; i < nAtoms; i++) {
691  partition = order[i];
692  oldPart = atoms[partition].index;
693  while (count[partition] > 1) {
694  len = count[partition];
695  offset = atoms[partition].index + len - 1;
696  index = order[offset];
697  atoms[index].index = offset;
698  count[partition] = len - 1;
699  count[index] = 1;
700 
701  // test for ions, water molecules with no
702  if (atoms[index].degree < 1) {
703  continue;
704  }
705  for (unsigned j = 0; j < atoms[index].degree; ++j) {
706  unsigned int nbor = atoms[index].nbrIds[j];
707  touchedPartitions[atoms[nbor].index] = 1;
708  changed[nbor] = 1;
709  }
710 
711  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
712  if (touchedPartitions[ii]) {
713  int npart = order[ii];
714  if ((count[npart] > 1) && (next[npart] == -2)) {
715  next[npart] = activeset;
716  activeset = npart;
717  }
718  touchedPartitions[ii] = 0;
719  }
720  }
721  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
722  changed, touchedPartitions);
723  }
724  // not sure if this works each time
725  if (atoms[partition].index != oldPart) {
726  i -= 1;
727  }
728  }
729 } // end of BreakTies()
730 
732  int *order, int *count,
733  canon_atom *atoms);
734 
735 RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
736  int *count, int &activeset,
737  int *next, int *changed);
738 
740  std::vector<unsigned int> &res,
741  bool breakTies = true,
742  bool includeChirality = true,
743  bool includeIsotopes = true);
744 
746  const ROMol &mol, std::vector<unsigned int> &res,
747  const boost::dynamic_bitset<> &atomsInPlay,
748  const boost::dynamic_bitset<> &bondsInPlay,
749  const std::vector<std::string> *atomSymbols,
750  const std::vector<std::string> *bondSymbols, bool breakTies,
751  bool includeChirality, bool includeIsotope);
752 
753 inline void rankFragmentAtoms(
754  const ROMol &mol, std::vector<unsigned int> &res,
755  const boost::dynamic_bitset<> &atomsInPlay,
756  const boost::dynamic_bitset<> &bondsInPlay,
757  const std::vector<std::string> *atomSymbols = nullptr,
758  bool breakTies = true, bool includeChirality = true,
759  bool includeIsotopes = true) {
760  rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
761  breakTies, includeChirality, includeIsotopes);
762 };
763 
765  std::vector<unsigned int> &res);
766 
768  std::vector<Canon::canon_atom> &atoms,
769  bool includeChirality = true);
770 
771 } // namespace Canon
772 } // namespace RDKit
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
The class for representing atoms.
Definition: Atom.h:68
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:120
class for representing a bond
Definition: Bond.h:47
BondType
the type of Bond
Definition: Bond.h:56
@ UNSPECIFIED
Definition: Bond.h:57
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition: Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:400
int operator()(int i, int j) const
Definition: new_canon.h:411
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:531
int operator()(int i, int j) const
Definition: new_canon.h:533
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:141
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:198
std::vector< bondholder > bonds
Definition: new_canon.h:108
unsigned int degree
Definition: new_canon.h:99
std::vector< int > revistedNeighbors
Definition: new_canon.h:107
std::vector< int > neighborNum
Definition: new_canon.h:106
unsigned int getNumAtoms() const
returns our number of atoms
Definition: ROMol.h:395
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:217
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int >> &result)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:456
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:680
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:583
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
Std stuff.
Definition: Abbreviations.h:18
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:151
const std::string * p_symbol
Definition: new_canon.h:36
Bond::BondType bondType
Definition: new_canon.h:32
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:60
bool operator<(const bondholder &o) const
Definition: new_canon.h:48
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:39
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:45
unsigned int bondStereo
Definition: new_canon.h:33
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:73
unsigned int nbrSymClass
Definition: new_canon.h:34