RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef _RD_FILEPARSERS_H
12#define _RD_FILEPARSERS_H
13
14#include <RDGeneral/types.h>
15#include <GraphMol/RDKitBase.h>
16
17#include <string>
18#include <iostream>
19#include <vector>
20#include <exception>
21
22#include <boost/shared_ptr.hpp>
23
24namespace RDKit {
25const int MOLFILE_MAXLINE = 256;
26RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
27
29 : public std::exception {
30 public:
31 //! construct with an error message
32 explicit MolFileUnhandledFeatureException(const char *msg) : _msg(msg) {}
33 //! construct with an error message
34 explicit MolFileUnhandledFeatureException(const std::string msg)
35 : _msg(msg) {}
36 //! get the error message
37 const char *what() const noexcept override { return _msg.c_str(); }
38 ~MolFileUnhandledFeatureException() noexcept override = default;
39
40 private:
41 std::string _msg;
42};
43
44//-----
45// mol files
46//-----
47typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
48// \brief construct a molecule from MDL mol data in a stream
49/*!
50 * \param inStream - stream containing the data
51 * \param line - current line number (used for error reporting)
52 * \param sanitize - toggles sanitization and stereochemistry
53 * perception of the molecule
54 * \param removeHs - toggles removal of Hs from the molecule. H removal
55 * is only done if the molecule is sanitized
56 * \param line - current line number (used for error reporting)
57 * \param strictParsing - if set to false, the parser is more lax about
58 * correctness of the contents.
59 *
60 */
62 unsigned int &line,
63 bool sanitize = true,
64 bool removeHs = true,
65 bool strictParsing = true);
66// \overload
68 unsigned int &line,
69 bool sanitize = true,
70 bool removeHs = true,
71 bool strictParsing = true);
72// \brief construct a molecule from an MDL mol block
73/*!
74 * \param molBlock - string containing the mol block
75 * \param sanitize - toggles sanitization and stereochemistry
76 * perception of the molecule
77 * \param removeHs - toggles removal of Hs from the molecule. H removal
78 * is only done if the molecule is sanitized
79 * \param strictParsing - if set to false, the parser is more lax about
80 * correctness of the contents.
81 */
82RDKIT_FILEPARSERS_EXPORT RWMol *MolBlockToMol(const std::string &molBlock,
83 bool sanitize = true,
84 bool removeHs = true,
85 bool strictParsing = true);
86
87// \brief construct a molecule from an MDL mol file
88/*!
89 * \param fName - string containing the file name
90 * \param sanitize - toggles sanitization and stereochemistry
91 * perception of the molecule
92 * \param removeHs - toggles removal of Hs from the molecule. H removal
93 * is only done if the molecule is sanitized
94 * \param strictParsing - if set to false, the parser is more lax about
95 * correctness of the contents.
96 */
98 bool sanitize = true,
99 bool removeHs = true,
100 bool strictParsing = true);
101
102// \brief generates an MDL mol block for a molecule
103/*!
104 * \param mol - the molecule in question
105 * \param includeStereo - toggles inclusion of stereochemistry information
106 * \param confId - selects the conformer to be used
107 * \param kekulize - triggers kekulization of the molecule before it is
108 * written
109 * \param forceV3000 - force generation a V3000 mol block (happens
110 * automatically with
111 * more than 999 atoms or bonds)
112 */
114 bool includeStereo = true,
115 int confId = -1,
116 bool kekulize = true,
117 bool forceV3000 = false);
118
119// \brief generates an MDL v3000 mol block for a molecule
120/*!
121 * \param mol - the molecule in question
122 * \param includeStereo - toggles inclusion of stereochemistry information
123 * \param confId - selects the conformer to be used
124 * \param kekulize - triggers kekulization of the molecule before it is
125 * written
126 */
127inline std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo = true,
128 int confId = -1, bool kekulize = true) {
129 return MolToMolBlock(mol, includeStereo, confId, kekulize, true);
130}
131
132// \brief Writes a molecule to an MDL mol file
133/*!
134 * \param mol - the molecule in question
135 * \param fName - the name of the file to use
136 * \param includeStereo - toggles inclusion of stereochemistry information
137 * \param confId - selects the conformer to be used
138 * \param kekulize - triggers kekulization of the molecule before it is
139 * written
140 * \param forceV3000 - force generation a V3000 mol block (happens
141 * automatically with
142 * more than 999 atoms or bonds)
143 */
145 const ROMol &mol, const std::string &fName, bool includeStereo = true,
146 int confId = -1, bool kekulize = true, bool forceV3000 = false);
147
148// \brief Writes a molecule to an MDL V3000 mol file
149/*!
150 * \param mol - the molecule in question
151 * \param fName - the name of the file to use
152 * \param includeStereo - toggles inclusion of stereochemistry information
153 * \param confId - selects the conformer to be used
154 * \param kekulize - triggers kekulization of the molecule before it is
155 * written
156 */
157inline void MolToV3KMolFile(const ROMol &mol, const std::string &fName,
158 bool includeStereo = true, int confId = -1,
159 bool kekulize = true) {
160 MolToMolFile(mol, fName, includeStereo, confId, kekulize, true);
161}
162
164 int confId = -1,
165 bool kekulize = true);
166
168 const std::string &fName,
169 int confId = -1,
170 bool kekulize = true);
171
173 int confId = -1);
174
176 const std::string &fName,
177 int confId = -1);
178
179//-----
180// TPL handling:
181//-----
182
183//! \brief translate TPL data (BioCad format) into a multi-conf molecule
184/*!
185 \param inStream: the stream from which to read
186 \param line: used to track the line number of errors
187 \param sanitize: toggles sanitization and stereochemistry
188 perception of the molecule
189 \param skipFirstConf: according to the TPL format description, the atomic
190 coords in the atom-information block describe the first
191 conformation and the first conf block describes second
192 conformation. The CombiCode, on the other hand, writes
193 the first conformation data both to the atom-information
194 block and to the first conf block. We want to be able to
195 read CombiCode-style tpls, so we'll allow this
196 mis-feature
197 to be parsed when this flag is set.
198*/
200 unsigned int &line,
201 bool sanitize = true,
202 bool skipFirstConf = false);
203
204//! \brief construct a multi-conf molecule from a TPL (BioCad format) file
205/*!
206 \param fName: the name of the file from which to read
207 \param sanitize: toggles sanitization and stereochemistry
208 perception of the molecule
209 \param skipFirstConf: according to the TPL format description, the atomic
210 coords in the atom-information block describe the first
211 conformation and the first conf block describes second
212 conformation. The CombiCode, on the other hand, writes
213 the first conformation data both to the atom-information
214 block and to the first conf block. We want to be able to
215 read CombiCode-style tpls, so we'll allow this
216 mis-feature
217 to be parsed when this flag is set.
218*/
220 bool sanitize = true,
221 bool skipFirstConf = false);
222
224 const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
225 bool writeFirstConfTwice = false);
227 const ROMol &mol, const std::string &fName,
228 const std::string &partialChargeProp = "_GasteigerCharge",
229 bool writeFirstConfTwice = false);
230
231//-----
232// MOL2 handling
233//-----
234
235typedef enum {
236 CORINA = 0 //! supports output from Corina and some dbtranslate output
238
239// \brief construct a molecule from a Tripos mol2 file
240/*!
241 *
242 * \param fName - string containing the file name
243 * \param sanitize - toggles sanitization of the molecule
244 * \param removeHs - toggles removal of Hs from the molecule. H removal
245 * is only done if the molecule is sanitized
246 * \param variant - the atom type definitions to use
247 * \param cleanupSubstructures - toggles recognition and cleanup of common
248 * substructures
249 */
251 bool sanitize = true,
252 bool removeHs = true,
253 Mol2Type variant = CORINA,
254 bool cleanupSubstructures = true);
255
256// \brief construct a molecule from Tripos mol2 data in a stream
257/*!
258 * \param inStream - stream containing the data
259 * \param sanitize - toggles sanitization of the molecule
260 * \param removeHs - toggles removal of Hs from the molecule. H removal
261 * is only done if the molecule is sanitized
262 * \param variant - the atom type definitions to use
263 * \param cleanupSubstructures - toggles recognition and cleanup of common
264 * substructures
265 */
267 std::istream *inStream, bool sanitize = true, bool removeHs = true,
268 Mol2Type variant = CORINA, bool cleanupSubstructures = true);
269// \overload
271 std::istream &inStream, bool sanitize = true, bool removeHs = true,
272 Mol2Type variant = CORINA, bool cleanupSubstructures = true);
273
274// \brief construct a molecule from a Tripos mol2 block
275/*!
276 * \param molBlock - string containing the mol block
277 * \param sanitize - toggles sanitization of the molecule
278 * \param removeHs - toggles removal of Hs from the molecule. H removal
279 * is only done if the molecule is sanitized
280 * \param variant - the atom type definitions to use
281 * \param cleanupSubstructures - toggles recognition and cleanup of common
282 * substructures
283 */
285 const std::string &molBlock, bool sanitize = true, bool removeHs = true,
286 Mol2Type variant = CORINA, bool cleanupSubstructures = true);
287
289 bool sanitize = true,
290 bool removeHs = true,
291 unsigned int flavor = 0,
292 bool proximityBonding = true);
293
295 bool sanitize = true,
296 bool removeHs = true,
297 unsigned int flavor = 0,
298 bool proximityBonding = true);
300 std::istream *inStream, bool sanitize = true, bool removeHs = true,
301 unsigned int flavor = 0, bool proximityBonding = true);
303 std::istream &inStream, bool sanitize = true, bool removeHs = true,
304 unsigned int flavor = 0, bool proximityBonding = true);
306 bool sanitize = true,
307 bool removeHs = true,
308 unsigned int flavor = 0,
309 bool proximityBonding = true);
310
311// \brief generates an PDB block for a molecule
312/*!
313 * \param mol - the molecule in question
314 * \param confId - selects the conformer to be used
315 * \param flavor - controls what gets written:
316 * flavor & 1 : Write MODEL/ENDMDL lines around each record
317 * flavor & 2 : Don't write single CONECT records
318 * flavor & 4 : Write CONECT records in both directions
319 * flavor & 8 : Don't use multiple CONECTs to encode bond order
320 * flavor & 16 : Write MASTER record
321 * flavor & 32 : Write TER record
322 */
324 int confId = -1,
325 unsigned int flavor = 0);
326// \brief Writes a molecule to an MDL mol file
327/*!
328 * \param mol - the molecule in question
329 * \param fName - the name of the file to use
330 * \param confId - selects the conformer to be used
331 * \param flavor - controls what gets written:
332 * flavor & 1 : Write MODEL/ENDMDL lines around each record
333 * flavor & 2 : Don't write single CONECT records
334 * flavor & 4 : Write CONECT records in both directions
335 * flavor & 8 : Don't use multiple CONECTs to encode bond order
336 * flavor & 16 : Write MASTER record
337 * flavor & 32 : Write TER record
338 */
340 const std::string &fname,
341 int confId = -1,
342 unsigned int flavor = 0);
343
344// \brief reads a molecule from the metadata in an RDKit-generated SVG file
345/*!
346 * \param svg - string containing the SVG
347 * \param sanitize - toggles sanitization of the molecule
348 * \param removeHs - toggles removal of Hs from the molecule. H removal
349 * is only done if the molecule is sanitized
350 *
351 * **NOTE** This functionality should be considered beta.
352 */
354 bool sanitize = true,
355 bool removeHs = true);
356/*! \overload
357 */
359 bool sanitize = true,
360 bool removeHs = true);
361
362inline std::unique_ptr<RDKit::RWMol> operator"" _ctab(const char *text,
363 size_t len) {
364 std::string data(text, len);
365 RWMol *ptr = nullptr;
366 try {
367 ptr = MolBlockToMol(data);
368 } catch (const RDKit::MolSanitizeException &) {
369 ptr = nullptr;
370 }
371 return std::unique_ptr<RWMol>(ptr);
372}
373inline std::unique_ptr<RDKit::RWMol> operator"" _mol2(const char *text,
374 size_t len) {
375 std::string data(text, len);
376 RWMol *ptr = nullptr;
377 try {
378 ptr = Mol2BlockToMol(data);
379 } catch (const RDKit::MolSanitizeException &) {
380 ptr = nullptr;
381 }
382 return std::unique_ptr<RWMol>(ptr);
383}
384
385inline std::unique_ptr<RDKit::RWMol> operator"" _pdb(const char *text,
386 size_t len) {
387 std::string data(text, len);
388 RWMol *ptr = nullptr;
389 try {
390 ptr = PDBBlockToMol(data);
391 } catch (const RDKit::MolSanitizeException &) {
392 ptr = nullptr;
393 }
394 return std::unique_ptr<RWMol>(ptr);
395}
396
397} // namespace RDKit
398
399#endif
pulls in the core RDKit functionality
MolFileUnhandledFeatureException(const char *msg)
construct with an error message
Definition: FileParsers.h:32
MolFileUnhandledFeatureException(const std::string msg)
construct with an error message
Definition: FileParsers.h:34
~MolFileUnhandledFeatureException() noexcept override=default
const char * what() const noexcept override
get the error message
Definition: FileParsers.h:37
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:153
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.
Definition: Abbreviations.h:18
RDKIT_FILEPARSERS_EXPORT RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true)
Definition: FileParsers.h:127
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
RDKIT_FILEPARSERS_EXPORT RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT std::string MolToXYZBlock(const ROMol &mol, int confId=-1)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_FILEPARSERS_EXPORT void MolToXYZFile(const ROMol &mol, const std::string &fName, int confId=-1)
RDKIT_FILEPARSERS_EXPORT RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT void MolToCMLFile(const ROMol &mol, const std::string &fName, int confId=-1, bool kekulize=true)
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)
RDKIT_FILEPARSERS_EXPORT RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
void MolToV3KMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true)
Definition: FileParsers.h:157
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT std::string MolToCMLBlock(const ROMol &mol, int confId=-1, bool kekulize=true)
@ CORINA
Definition: FileParsers.h:236
RDKIT_FILEPARSERS_EXPORT void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
const int MOLFILE_MAXLINE
Definition: FileParsers.h:25
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:47
RDKIT_FILEPARSERS_EXPORT RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
boost::shared_ptr< RWMol > RWMOL_SPTR
Definition: RWMol.h:217