RDKit
Open-source cheminformatics and machine learning.
RDKitFPGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Boran Adas, Google Summer of Code
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_RDFINGERPRINTGEN_H_2018_07
13 #define RD_RDFINGERPRINTGEN_H_2018_07
14 
16 
17 namespace RDKit {
18 namespace RDKitFP {
19 
20 template <typename OutputType>
22  : public FingerprintArguments<OutputType> {
23  public:
24  const unsigned int d_minPath;
25  const unsigned int d_maxPath;
26  const bool df_useHs;
27  const bool df_branchedPaths;
28  const bool df_useBondOrder;
29 
30  OutputType getResultSize() const override;
31 
32  std::string infoString() const override;
33 
34  /**
35  \brief Construct a new RDKitFPArguments object
36 
37  \param minPath the minimum path length (in bonds) to be included
38  \param maxPath the maximum path length (in bonds) to be included
39  \param useHs toggles inclusion of Hs in paths (if the molecule has
40  explicit Hs)
41  \param branchedPaths toggles generation of branched subgraphs, not just
42  linear paths
43  \param useBondOrder toggles inclusion of bond orders in the path hashes
44  \param countSimulation if set, use count simulation while
45  generating the fingerprint
46  \param countBounds boundaries for count simulation, corresponding bit will
47  be set if the count is higher than the number provided for that spot
48  \param fpSize size of the generated fingerprint, does not affect the sparse
49  versions
50  \param numBitsPerFeature controls the number of bits that are set for each
51  path/subgraph found
52 
53  */
54  RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs,
55  bool branchedPaths, bool useBondOrder, bool countSimulation,
56  const std::vector<std::uint32_t> countBounds,
57  std::uint32_t fpSize, std::uint32_t numBitsPerFeature);
58 };
59 
61  : public AtomInvariantsGenerator {
62  public:
63  std::vector<std::uint32_t> *getAtomInvariants(
64  const ROMol &mol) const override;
65 
66  std::string infoString() const override;
67  RDKitFPAtomInvGenerator *clone() const override;
68 };
69 
70 template <typename OutputType>
72  : public AtomEnvironment<OutputType> {
73  const OutputType d_bitId;
74  const boost::dynamic_bitset<> d_atomsInPath;
75  const INT_VECT d_bondPath;
76 
77  public:
79  const std::vector<std::uint32_t> *atomInvariants,
80  const std::vector<std::uint32_t> *bondInvariants,
81  const AdditionalOutput *additionalOutput,
82  bool hashResults = false,
83  const std::uint64_t fpSize = 0) const override;
84 
85  /**
86  \brief Construct a new RDKitFPAtomEnv object
87 
88  \param bitId bitId generated for this environment
89  \param atomsInPath holds atoms in this environment to set additional output
90  \param bondPath the bond path defining the environment
91 
92  */
93  RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath,
94  INT_VECT bondPath)
95  : d_bitId(bitId),
96  d_atomsInPath(std::move(atomsInPath)),
97  d_bondPath(std::move(bondPath)) {}
98 };
99 
100 template <typename OutputType>
102  : public AtomEnvironmentGenerator<OutputType> {
103  public:
104  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
105  const ROMol &mol, FingerprintArguments<OutputType> *arguments,
106  const std::vector<std::uint32_t> *fromAtoms,
107  const std::vector<std::uint32_t> *ignoreAtoms, int confId,
108  const AdditionalOutput *additionalOutput,
109  const std::vector<std::uint32_t> *atomInvariants,
110  const std::vector<std::uint32_t> *bondInvariants,
111  bool hashResults = false) const override;
112 
113  std::string infoString() const override;
114 };
115 
116 /**
117  \brief Get a RDKit fingerprint generator with given parameters
118 
119  \tparam OutputType determines the size of the bitIds and the result, can be 32
120  or 64 bit unsigned integer
121  \param minPath the minimum path length (in bonds) to be included
122  \param maxPath the maximum path length (in bonds) to be included
123  \param useHs toggles inclusion of Hs in paths (if the molecule has
124  explicit Hs)
125  \param branchedPaths toggles generation of branched subgraphs, not just
126  linear paths
127  \param useBondOrder toggles inclusion of bond orders in the path hashes
128  \param atomInvariantsGenerator custom atom invariants generator to use
129  \param countSimulation if set, use count simulation while
130  generating the fingerprint
131  \param countBounds boundaries for count simulation, corresponding bit will be
132  set if the count is higher than the number provided for that spot
133  \param fpSize size of the generated fingerprint, does not affect the sparse
134  versions
135  \param numBitsPerFeature controls the number of bits that are set for each
136  path/subgraph found
137  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
138  fingerprint generator
139 
140  /return FingerprintGenerator<OutputType>* that generates RDKit fingerprints
141 
142  This generator supports the following \c AdditionalOutput types:
143  - \c atomToBits : which bits each atom is involved in
144  - \c atomCounts : how many bits each atom sets
145  - \c bitPaths : map from bitId to vectors of bond indices for the individual
146  subgraphs
147 
148  */
149 template <typename OutputType>
151  unsigned int minPath = 1, unsigned int maxPath = 7, bool useHs = true,
152  bool branchedPaths = true, bool useBondOrder = true,
153  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
154  bool countSimulation = false,
155  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
156  std::uint32_t fpSize = 2048, std::uint32_t numBitsPerFeature = 2,
157  bool ownsAtomInvGen = false);
158 
159 } // namespace RDKitFP
160 } // namespace RDKit
161 
162 #endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, bool countSimulation, const std::vector< std::uint32_t > countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature)
Construct a new RDKitFPArguments object.
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath, INT_VECT bondPath)
Construct a new RDKitFPAtomEnv object.
OutputType getBitId(FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const AdditionalOutput *additionalOutput, bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
RDKitFPAtomInvGenerator * clone() const override
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:169
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getRDKitFPGenerator(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2, bool ownsAtomInvGen=false)
Get a RDKit fingerprint generator with given parameters.
Std stuff.
Definition: Abbreviations.h:18
std::vector< int > INT_VECT
Definition: types.h:277