RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2017 Greg Landrum, Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef _RD_MOLWRITERS_H_
13#define _RD_MOLWRITERS_H_
14
15#include <RDGeneral/types.h>
16
17#include <string>
18#include <iostream>
19#include <GraphMol/ROMol.h>
20
21namespace RDKit {
22
23static int defaultConfId = -1;
25 public:
26 virtual ~MolWriter() {}
27 virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
28 virtual void flush() = 0;
29 virtual void close() = 0;
30 virtual void setProps(const STR_VECT &propNames) = 0;
31 virtual unsigned int numMols() const = 0;
32};
33
34//! The SmilesWriter is for writing molecules and properties to
35//! delimited text files.
37 /******************************************************************************
38 * A Smiles Table writer - this is how it is used
39 * - create a SmilesWriter with a output file name (or a ostream), a
40 *delimiter,
41 * and a list of properties that need to be written out
42 * - then a call is made to the write function for each molecule that needs
43 *to
44 * be written out
45 ******************************************************************************/
46 public:
47 /*!
48 \param fileName : filename to write to ("-" to write to stdout)
49 \param delimiter : delimiter to use in the text file
50 \param nameHeader : used to label the name column in the output. If this
51 is provided as the empty string, no names will be
52 written.
53 \param includeHeader : toggles inclusion of a header line in the output
54 \param isomericSmiles : toggles generation of isomeric SMILES
55 \param kekuleSmiles : toggles the generation of kekule SMILES
56
57 */
58 SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
59 const std::string &nameHeader = "Name",
60 bool includeHeader = true, bool isomericSmiles = true,
61 bool kekuleSmiles = false);
62 //! \overload
63 SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
64 std::string nameHeader = "Name", bool includeHeader = true,
65 bool takeOwnership = false, bool isomericSmiles = true,
66 bool kekuleSmiles = false);
67
68 ~SmilesWriter() override;
69
70 //! \brief set a vector of property names that are need to be
71 //! written out for each molecule
72 void setProps(const STR_VECT &propNames) override;
73
74 //! \brief write a new molecule to the file
75 void write(const ROMol &mol, int confId = defaultConfId) override;
76
77 //! \brief flush the ostream
78 void flush() override {
79 PRECONDITION(dp_ostream, "no output stream");
80 try {
81 dp_ostream->flush();
82 } catch (...) {
83 try {
84 if (dp_ostream->good()) {
85 dp_ostream->setstate(std::ios::badbit);
86 }
87 } catch (const std::runtime_error &) {
88 }
89 }
90 }
91
92 //! \brief close our stream (the writer cannot be used again)
93 void close() override {
94 if (dp_ostream) {
95 flush();
96 }
97 if (df_owner) {
98 delete dp_ostream;
99 df_owner = false;
100 }
101 dp_ostream = nullptr;
102 }
103
104 //! \brief get the number of molecules written so far
105 unsigned int numMols() const override { return d_molid; }
106
107 private:
108 // local initialization
109 void init(const std::string &delimiter, const std::string &nameHeader,
110 bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
111
112 // dumps a header line to the output stream
113 void dumpHeader() const;
114
115 std::ostream *dp_ostream;
116 bool df_owner;
117 bool df_includeHeader; // whether or not to include a title line
118 unsigned int d_molid; // the number of the molecules we wrote so far
119 std::string d_delim; // delimiter string between various records
120 std::string d_nameHeader; // header for the name column in the output file
121 STR_VECT d_props; // list of property name that need to be written out
122 bool df_isomericSmiles; // whether or not to do isomeric smiles
123 bool df_kekuleSmiles; // whether or not to do kekule smiles
124};
125
126//! The SDWriter is for writing molecules and properties to
127//! SD files
129 /**************************************************************************************
130 * A SD file ( or stream) writer - this is how it is used
131 * - create a SDMolWriter with a output file name (or a ostream),
132 * and a list of properties that need to be written out
133 * - then a call is made to the write function for each molecule that needs
134 *to be written out
135 **********************************************************************************************/
136 public:
137 /*!
138 \param fileName : filename to write to ("-" to write to stdout)
139 */
140 SDWriter(const std::string &fileName);
141 SDWriter(std::ostream *outStream, bool takeOwnership = false);
142
143 ~SDWriter() override;
144
145 //! \brief set a vector of property names that are need to be
146 //! written out for each molecule
147 void setProps(const STR_VECT &propNames) override;
148
149 //! \brief return the text that would be written to the file
150 static std::string getText(const ROMol &mol, int confId = defaultConfId,
151 bool kekulize = true, bool force_V3000 = false,
152 int molid = -1, STR_VECT *propNames = nullptr);
153
154 //! \brief write a new molecule to the file
155 void write(const ROMol &mol, int confId = defaultConfId) override;
156
157 //! \brief flush the ostream
158 void flush() override {
159 PRECONDITION(dp_ostream, "no output stream");
160 try {
161 dp_ostream->flush();
162 } catch (...) {
163 try {
164 if (dp_ostream->good()) {
165 dp_ostream->setstate(std::ios::badbit);
166 }
167 } catch (const std::runtime_error &) {
168 }
169 }
170 }
171
172 //! \brief close our stream (the writer cannot be used again)
173 void close() override {
174 if (dp_ostream) {
175 flush();
176 }
177 if (df_owner) {
178 delete dp_ostream;
179 df_owner = false;
180 }
181 dp_ostream = nullptr;
182 }
183
184 //! \brief get the number of molecules written so far
185 unsigned int numMols() const override { return d_molid; }
186
187 void setForceV3000(bool val) { df_forceV3000 = val; }
188 bool getForceV3000() const { return df_forceV3000; }
189
190 void setKekulize(bool val) { df_kekulize = val; }
191 bool getKekulize() const { return df_kekulize; }
192
193 private:
194 void writeProperty(const ROMol &mol, const std::string &name);
195
196 std::ostream *dp_ostream;
197 bool df_owner;
198 unsigned int d_molid; // the number of the molecules we wrote so far
199 STR_VECT d_props; // list of property name that need to be written out
200 bool df_forceV3000; // force writing the mol blocks as V3000
201 bool df_kekulize; // toggle kekulization of molecules on writing
202};
203
204//! The TDTWriter is for writing molecules and properties to
205//! TDT files
207 /**************************************************************************************
208 * A TDT file ( or stream) writer - this is how it is used
209 * - create a TDTWriter with a output file name (or a ostream),
210 * and a list of properties that need to be written out
211 * - then a call is made to the write function for each molecule that needs
212 *to be written out
213 **********************************************************************************************/
214 public:
215 /*!
216 \param fileName : filename to write to ("-" to write to stdout)
217 */
218 TDTWriter(const std::string &fileName);
219 TDTWriter(std::ostream *outStream, bool takeOwnership = false);
220
221 ~TDTWriter() override;
222
223 //! \brief set a vector of property names that are need to be
224 //! written out for each molecule
225 void setProps(const STR_VECT &propNames) override;
226
227 //! \brief write a new molecule to the file
228 void write(const ROMol &mol, int confId = defaultConfId) override;
229
230 //! \brief flush the ostream
231 void flush() override {
232 PRECONDITION(dp_ostream, "no output stream");
233 try {
234 dp_ostream->flush();
235 } catch (...) {
236 try {
237 if (dp_ostream->good()) {
238 dp_ostream->setstate(std::ios::badbit);
239 }
240 } catch (const std::runtime_error &) {
241 }
242 }
243 }
244
245 //! \brief close our stream (the writer cannot be used again)
246 void close() override {
247 if (dp_ostream) {
248 // if we've written any mols, finish with a "|" line
249 if (d_molid > 0) {
250 *dp_ostream << "|\n";
251 }
252 flush();
253 }
254 if (df_owner) {
255 delete dp_ostream;
256 df_owner = false;
257 }
258 dp_ostream = nullptr;
259 }
260
261 //! \brief get the number of molecules written so far
262 unsigned int numMols() const override { return d_molid; }
263
264 void setWrite2D(bool state = true) { df_write2D = state; }
265 bool getWrite2D() const { return df_write2D; }
266
267 void setWriteNames(bool state = true) { df_writeNames = state; }
268 bool getWriteNames() const { return df_writeNames; }
269
270 void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; }
271 unsigned int getNumDigits() const { return d_numDigits; }
272
273 private:
274 void writeProperty(const ROMol &mol, const std::string &name);
275
276 std::ostream *dp_ostream;
277 bool df_owner;
278 unsigned int d_molid; // the number of molecules we wrote so far
279 STR_VECT d_props; // list of property name that need to be written out
280 bool df_write2D; // write 2D coordinates instead of 3D
281 bool df_writeNames; // write a name record for each molecule
282 unsigned int
283 d_numDigits; // number of digits to use in our output of coordinates;
284};
285
286//! The PDBWriter is for writing molecules to Brookhaven Protein
287//! DataBank format files.
289 public:
290 PDBWriter(const std::string &fileName, unsigned int flavor = 0);
291 PDBWriter(std::ostream *outStream, bool takeOwnership = false,
292 unsigned int flavor = 0);
293 ~PDBWriter() override;
294
295 //! \brief write a new molecule to the file
296 void write(const ROMol &mol, int confId = defaultConfId) override;
297
298 void setProps(const STR_VECT &) override {}
299
300 //! \brief flush the ostream
301 void flush() override {
302 PRECONDITION(dp_ostream, "no output stream");
303 try {
304 dp_ostream->flush();
305 } catch (...) {
306 try {
307 if (dp_ostream->good()) {
308 dp_ostream->setstate(std::ios::badbit);
309 }
310 } catch (const std::runtime_error &) {
311 }
312 }
313 }
314
315 //! \brief close our stream (the writer cannot be used again)
316 void close() override {
317 if (dp_ostream) {
318 flush();
319 }
320 if (df_owner) {
321 delete dp_ostream;
322 df_owner = false;
323 }
324 dp_ostream = nullptr;
325 }
326
327 //! \brief get the number of molecules written so far
328 unsigned int numMols() const override { return d_count; }
329
330 private:
331 std::ostream *dp_ostream;
332 unsigned int d_flavor;
333 unsigned int d_count;
334 bool df_owner;
335};
336} // namespace RDKit
337
338#endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
virtual void flush()=0
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
virtual ~MolWriter()
Definition: MolWriters.h:26
virtual void close()=0
virtual void setProps(const STR_VECT &propNames)=0
virtual unsigned int numMols() const =0
PDBWriter(const std::string &fileName, unsigned int flavor=0)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void flush() override
flush the ostream
Definition: MolWriters.h:301
PDBWriter(std::ostream *outStream, bool takeOwnership=false, unsigned int flavor=0)
void setProps(const STR_VECT &) override
Definition: MolWriters.h:298
~PDBWriter() override
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:328
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:316
~SDWriter() override
bool getForceV3000() const
Definition: MolWriters.h:188
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:185
SDWriter(std::ostream *outStream, bool takeOwnership=false)
bool getKekulize() const
Definition: MolWriters.h:191
void flush() override
flush the ostream
Definition: MolWriters.h:158
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
static std::string getText(const ROMol &mol, int confId=defaultConfId, bool kekulize=true, bool force_V3000=false, int molid=-1, STR_VECT *propNames=nullptr)
return the text that would be written to the file
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:173
void setForceV3000(bool val)
Definition: MolWriters.h:187
SDWriter(const std::string &fileName)
void setKekulize(bool val)
Definition: MolWriters.h:190
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:105
SmilesWriter(const std::string &fileName, const std::string &delimiter=" ", const std::string &nameHeader="Name", bool includeHeader=true, bool isomericSmiles=true, bool kekuleSmiles=false)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
~SmilesWriter() override
SmilesWriter(std::ostream *outStream, std::string delimiter=" ", std::string nameHeader="Name", bool includeHeader=true, bool takeOwnership=false, bool isomericSmiles=true, bool kekuleSmiles=false)
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:93
void flush() override
flush the ostream
Definition: MolWriters.h:78
~TDTWriter() override
bool getWrite2D() const
Definition: MolWriters.h:265
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:270
void setWrite2D(bool state=true)
Definition: MolWriters.h:264
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:262
unsigned int getNumDigits() const
Definition: MolWriters.h:271
TDTWriter(std::ostream *outStream, bool takeOwnership=false)
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:246
TDTWriter(const std::string &fileName)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void setWriteNames(bool state=true)
Definition: MolWriters.h:267
void flush() override
flush the ostream
Definition: MolWriters.h:231
bool getWriteNames() const
Definition: MolWriters.h:268
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:153
Std stuff.
Definition: Abbreviations.h:18
std::vector< std::string > STR_VECT
Definition: Dict.h:29
static int defaultConfId
Definition: MolWriters.h:23