RDKit
Open-source cheminformatics and machine learning.
FilterCatalog.h
Go to the documentation of this file.
1// Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following
12// disclaimer in the documentation and/or other materials provided
13// with the distribution.
14// * Neither the name of Novartis Institutes for BioMedical Research Inc.
15// nor the names of its contributors may be used to endorse or promote
16// products derived from this software without specific prior written
17// permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30//
31
32#include <RDGeneral/export.h>
33#ifndef _RD_FILTER_CATALOG_PARAMS_
34#define _RD_FILTER_CATALOG_PARAMS_
35
36#include <Catalogs/Catalog.h>
38#include "FilterCatalogEntry.h"
39
40namespace RDKit {
41class FilterCatalog;
44 public:
46 PAINS_A = (1u << 1),
47 PAINS_B = (1u << 2),
48 PAINS_C = (1u << 3),
49 PAINS = PAINS_A | PAINS_B | PAINS_C,
50
51 BRENK = (1u << 4),
52 NIH = (1u << 5),
53 ZINC = (1u << 6),
54
55 ALL = PAINS | BRENK | NIH | ZINC
56 };
57
58 FilterCatalogParams() : RDCatalog::CatalogParams() {
59 setTypeStr("Filter Catalog Parameters");
60 }
61
62 FilterCatalogParams(FilterCatalogs catalogs) : RDCatalog::CatalogParams() {
63 setTypeStr("Filter Catalog Parameters");
64 addCatalog(catalogs);
65 }
66
68 : RDCatalog::CatalogParams(other), d_catalogs(other.d_catalogs) {}
69
71
72 //------------------------------------
73 //! Adds an existing FilterCatalog specification to be used in the
74 /// FilterCatalog
75 //
76 /*!
77 Specifies an existing filter catalog to be used.
78
79 \param catalogs One of the enumerated known FilterCatalogs
80 */
81 virtual bool addCatalog(FilterCatalogs catalogs);
82
83 //------------------------------------
84 //! Returns the existing list of FilterCatalogs to be used.
85 const std::vector<FilterCatalogs> &getCatalogs() const { return d_catalogs; }
86 //! Fill a catalog with the appropriate entries
87 virtual void fillCatalog(FilterCatalog &catalog) const;
88
89 //! serializes (pickles) to a stream
90 void toStream(std::ostream &ss) const override;
91 //! returns a string with a serialized (pickled) representation
92 std::string Serialize() const override;
93 //! initializes from a stream pickle
94 void initFromStream(std::istream &ss) override;
95 //! initializes from a string pickle
96 void initFromString(const std::string &text) override;
97
98 private:
99 std::vector<FilterCatalogs> d_catalogs;
100
101#ifdef RDK_USE_BOOST_SERIALIZATION
102 friend class boost::serialization::access;
103 template <class Archive>
104 void serialize(Archive &ar, const unsigned int version) {
105 RDUNUSED_PARAM(version);
106 ar &d_catalogs;
107 }
108#endif
109};
110
113 public:
114 // syntactic sugar for getMatch(es) return values.
115 typedef boost::shared_ptr<FilterCatalogEntry> SENTRY;
116
117 // If boost::python can support shared_ptr of const objects
118 // we can enable support for this feature
119 typedef boost::shared_ptr<const entryType_t> CONST_SENTRY;
120
121 FilterCatalog() : FCatalog(), d_entries() {}
122
124 : FCatalog(), d_entries() {
125 paramType_t temp_params(catalogs);
126 setCatalogParams(&temp_params);
127 }
128
129 FilterCatalog(const FilterCatalogParams &params) : FCatalog(), d_entries() {
130 setCatalogParams(&params);
131 }
132
134 : FCatalog(rhs), d_entries(rhs.d_entries) {}
135
136 FilterCatalog(const std::string &binStr);
137
138 ~FilterCatalog() override;
139
140 std::string Serialize() const override;
141
142 // Adds a new FilterCatalogEntry to the catalog
143 /*!
144 Adds a new FilterCatalogEntry to the catalog The catalog
145 owns the entry
146
147 \param entry The FilterCatalogEntry to add.
148 \param updateFPLength unused in the FilterCatalog object.
149 */
150
151 unsigned int addEntry(FilterCatalogEntry *entry,
152 bool updateFPLength = true) override;
153
154 // Adds a new FilterCatalogEntry to the catalog
155 /*!
156 Adds a new FilterCatalogEntry to the catalog The catalog
157 owns the entry
158
159 \param entry The shared_ptr of the FilterCatalogEntry to add.
160 \param updateFPLength unused in the FilterCatalog object.
161 */
162
163 virtual unsigned int addEntry(SENTRY entry, bool updateFPLength = true);
164
165 // Removes a FilterCatalogEntry to the catalog by description
166 /*!
167 Removes a FilterCatalogEntry from the catalog.
168
169 \param idx The FilterCatalogEntry index for the entry to remove.
170 n.b. removing an entry may change the indices of other entries.
171 To safely remove entries, remove entries with the highest idx
172 first.
173 */
174 bool removeEntry(unsigned int idx);
176
177 //------------------------------------
178 //! returns a particular FilterCatalogEntry in the Catalog
179 //! required by Catalog.h API
180 const FilterCatalogEntry *getEntryWithIdx(unsigned int idx) const override;
181
182 //------------------------------------
183 //! returns a particular FilterCatalogEntry in the Catalog
184 //! memory safe version of getEntryWithIdx
185 CONST_SENTRY getEntry(unsigned int idx) const;
186
187 //------------------------------------
188 //! returns the idx of the given entry, UINT_MAX if not found.
189
190 unsigned int getIdxForEntry(const FilterCatalogEntry *entry) const;
191 unsigned int getIdxForEntry(CONST_SENTRY entry) const;
192
193 //------------------------------------
194 //! returns the number of entries in the catalog
195 unsigned int getNumEntries() const override {
196 return static_cast<unsigned int>(d_entries.size());
197 }
198
199 //------------------------------------
200 //! Reset the current catalog to match the specified FilterCatalogParameters
201 /*
202 \param params The new FilterCatalogParams specifying the new state of the
203 catalog
204 */
205 void setCatalogParams(const FilterCatalogParams *params) override;
206
207 //------------------------------------
208 //! Returns true if the molecule matches any entry in the catalog
209 /*
210 \param mol ROMol to match against the catalog
211 */
212 bool hasMatch(const ROMol &mol) const;
213
214 //------------------------------------
215 //! Returns the first match against the catalog
216 /*
217 \param mol ROMol to match against the catalog
218 */
220
221 //-------------------------------------------
222 //! Returns all entry matches to the molecule
223 /*
224 \param mol ROMol to match against the catalog
225 */
226 const std::vector<CONST_SENTRY> getMatches(const ROMol &mol) const;
227
228 //--------------------------------------------
229 //! Returns all FilterMatches for the molecule
230 /*
231 \param mol ROMol to match against the catalog
232 */
233 const std::vector<FilterMatch> getFilterMatches(const ROMol &mol) const;
234
235 private:
236 void Clear();
237 std::vector<SENTRY> d_entries;
238};
239
241
242//! Run a filter catalog on a set of smiles strings
243/*
244 \param smiles vector of smiles strings to analyze
245 \param nthreads specify the number of threads to use or specify 0 to use all
246 processors [default 1]
247 \returns a vector of vectors. For each input smiles string, returns
248 a vector of shared_ptr::FilterMatchEntry objects.
249 If a molecule matches no filters, the vector will be empty.
250 If a smiles can't be parsed, a 'no valid RDKit molecule'
251 catalog entry is returned.
252
253*/
255std::vector<std::vector<boost::shared_ptr<const FilterCatalogEntry>>>
256RunFilterCatalog(const FilterCatalog &filterCatalog,
257 const std::vector<std::string> &smiles, int numThreads = 1);
258} // namespace RDKit
259
260#endif
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
abstract base class for the container used to create a catalog
Definition: CatalogParams.h:18
abstract base class for a catalog object
Definition: Catalog.h:40
paramType paramType_t
Definition: Catalog.h:43
void toStream(std::ostream &ss) const override
serializes (pickles) to a stream
FilterCatalogParams(const FilterCatalogParams &other)
Definition: FilterCatalog.h:67
void initFromStream(std::istream &ss) override
initializes from a stream pickle
std::string Serialize() const override
returns a string with a serialized (pickled) representation
virtual void fillCatalog(FilterCatalog &catalog) const
Fill a catalog with the appropriate entries.
void initFromString(const std::string &text) override
initializes from a string pickle
const std::vector< FilterCatalogs > & getCatalogs() const
Returns the existing list of FilterCatalogs to be used.
Definition: FilterCatalog.h:85
virtual bool addCatalog(FilterCatalogs catalogs)
FilterCatalogParams(FilterCatalogs catalogs)
Definition: FilterCatalog.h:62
FilterCatalog(FilterCatalogParams::FilterCatalogs catalogs)
const std::vector< CONST_SENTRY > getMatches(const ROMol &mol) const
Returns all entry matches to the molecule.
unsigned int getNumEntries() const override
returns the number of entries in the catalog
bool removeEntry(CONST_SENTRY entry)
unsigned int addEntry(FilterCatalogEntry *entry, bool updateFPLength=true) override
boost::shared_ptr< const entryType_t > CONST_SENTRY
~FilterCatalog() override
CONST_SENTRY getEntry(unsigned int idx) const
FilterCatalog(const FilterCatalogParams &params)
const FilterCatalogEntry * getEntryWithIdx(unsigned int idx) const override
unsigned int getIdxForEntry(const FilterCatalogEntry *entry) const
returns the idx of the given entry, UINT_MAX if not found.
const std::vector< FilterMatch > getFilterMatches(const ROMol &mol) const
Returns all FilterMatches for the molecule.
bool removeEntry(unsigned int idx)
unsigned int getIdxForEntry(CONST_SENTRY entry) const
FilterCatalog(const FilterCatalog &rhs)
bool hasMatch(const ROMol &mol) const
Returns true if the molecule matches any entry in the catalog.
void setCatalogParams(const FilterCatalogParams *params) override
Reset the current catalog to match the specified FilterCatalogParameters.
virtual unsigned int addEntry(SENTRY entry, bool updateFPLength=true)
FilterCatalog(const std::string &binStr)
std::string Serialize() const override
return a serialized form of the Catalog as an std::string
boost::shared_ptr< FilterCatalogEntry > SENTRY
CONST_SENTRY getFirstMatch(const ROMol &mol) const
Returns the first match against the catalog.
#define RDKIT_FILTERCATALOG_EXPORT
Definition: export.h:161
Std stuff.
Definition: Abbreviations.h:18
RDCatalog::Catalog< FilterCatalogEntry, FilterCatalogParams > FCatalog
RDKIT_FILTERCATALOG_EXPORT std::vector< std::vector< boost::shared_ptr< const FilterCatalogEntry > > > RunFilterCatalog(const FilterCatalog &filterCatalog, const std::vector< std::string > &smiles, int numThreads=1)
Run a filter catalog on a set of smiles strings.
RDKIT_FILTERCATALOG_EXPORT bool FilterCatalogCanSerialize()