RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2017 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOLWRITERS_H_
13 #define _RD_MOLWRITERS_H_
14 
15 #include <RDGeneral/types.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <GraphMol/ROMol.h>
20 
21 namespace RDKit {
22 
23 static int defaultConfId = -1;
25  public:
26  virtual ~MolWriter() {}
27  virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
28  virtual void flush() = 0;
29  virtual void close() = 0;
30  virtual void setProps(const STR_VECT &propNames) = 0;
31  virtual unsigned int numMols() const = 0;
32 };
33 
34 //! The SmilesWriter is for writing molecules and properties to
35 //! delimited text files.
37  /******************************************************************************
38  * A Smiles Table writer - this is how it is used
39  * - create a SmilesWriter with a output file name (or a ostream), a
40  *delimiter,
41  * and a list of properties that need to be written out
42  * - then a call is made to the write function for each molecule that needs
43  *to
44  * be written out
45  ******************************************************************************/
46  public:
47  /*!
48  \param fileName : filename to write to ("-" to write to stdout)
49  \param delimiter : delimiter to use in the text file
50  \param nameHeader : used to label the name column in the output. If this
51  is provided as the empty string, no names will be
52  written.
53  \param includeHeader : toggles inclusion of a header line in the output
54  \param isomericSmiles : toggles generation of isomeric SMILES
55  \param kekuleSmiles : toggles the generation of kekule SMILES
56 
57  */
58  SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
59  const std::string &nameHeader = "Name",
60  bool includeHeader = true, bool isomericSmiles = true,
61  bool kekuleSmiles = false);
62  //! \overload
63  SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
64  std::string nameHeader = "Name", bool includeHeader = true,
65  bool takeOwnership = false, bool isomericSmiles = true,
66  bool kekuleSmiles = false);
67 
68  ~SmilesWriter() override;
69 
70  //! \brief set a vector of property names that are need to be
71  //! written out for each molecule
72  void setProps(const STR_VECT &propNames) override;
73 
74  //! \brief write a new molecule to the file
75  void write(const ROMol &mol, int confId = defaultConfId) override;
76 
77  //! \brief flush the ostream
78  void flush() override {
79  PRECONDITION(dp_ostream, "no output stream");
80  try {
81  dp_ostream->flush();
82  } catch (...) {
83  try {
84  if (dp_ostream->good()) {
85  dp_ostream->setstate(std::ios::badbit);
86  }
87  } catch (const std::runtime_error &) {
88  }
89  }
90  }
91 
92  //! \brief close our stream (the writer cannot be used again)
93  void close() override {
94  if (dp_ostream) {
95  flush();
96  }
97  if (df_owner) {
98  delete dp_ostream;
99  df_owner = false;
100  }
101  dp_ostream = nullptr;
102  }
103 
104  //! \brief get the number of molecules written so far
105  unsigned int numMols() const override { return d_molid; }
106 
107  private:
108  // local initialization
109  void init(const std::string &delimiter, const std::string &nameHeader,
110  bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
111 
112  // dumps a header line to the output stream
113  void dumpHeader() const;
114 
115  std::ostream *dp_ostream;
116  bool df_owner;
117  bool df_includeHeader; // whether or not to include a title line
118  unsigned int d_molid; // the number of the molecules we wrote so far
119  std::string d_delim; // delimiter string between various records
120  std::string d_nameHeader; // header for the name column in the output file
121  STR_VECT d_props; // list of property name that need to be written out
122  bool df_isomericSmiles; // whether or not to do isomeric smiles
123  bool df_kekuleSmiles; // whether or not to do kekule smiles
124 };
125 
126 //! The SDWriter is for writing molecules and properties to
127 //! SD files
129  /**************************************************************************************
130  * A SD file ( or stream) writer - this is how it is used
131  * - create a SDMolWriter with a output file name (or a ostream),
132  * and a list of properties that need to be written out
133  * - then a call is made to the write function for each molecule that needs
134  *to be written out
135  **********************************************************************************************/
136  public:
137  /*!
138  \param fileName : filename to write to ("-" to write to stdout)
139  */
140  SDWriter(const std::string &fileName);
141  SDWriter(std::ostream *outStream, bool takeOwnership = false);
142 
143  ~SDWriter() override;
144 
145  //! \brief set a vector of property names that are need to be
146  //! written out for each molecule
147  void setProps(const STR_VECT &propNames) override;
148 
149  //! \brief return the text that would be written to the file
150  static std::string getText(const ROMol &mol, int confId = defaultConfId,
151  bool kekulize = true, bool force_V3000 = false,
152  int molid = -1, STR_VECT *propNames = nullptr);
153 
154  //! \brief write a new molecule to the file
155  void write(const ROMol &mol, int confId = defaultConfId) override;
156 
157  //! \brief flush the ostream
158  void flush() override {
159  PRECONDITION(dp_ostream, "no output stream");
160  try {
161  dp_ostream->flush();
162  } catch (...) {
163  try {
164  if (dp_ostream->good()) {
165  dp_ostream->setstate(std::ios::badbit);
166  }
167  } catch (const std::runtime_error &) {
168  }
169  }
170  }
171 
172  //! \brief close our stream (the writer cannot be used again)
173  void close() override {
174  if (dp_ostream) {
175  flush();
176  }
177  if (df_owner) {
178  delete dp_ostream;
179  df_owner = false;
180  }
181  dp_ostream = nullptr;
182  }
183 
184  //! \brief get the number of molecules written so far
185  unsigned int numMols() const override { return d_molid; }
186 
187  void setForceV3000(bool val) { df_forceV3000 = val; }
188  bool getForceV3000() const { return df_forceV3000; }
189 
190  void setKekulize(bool val) { df_kekulize = val; }
191  bool getKekulize() const { return df_kekulize; }
192 
193  private:
194  void writeProperty(const ROMol &mol, const std::string &name);
195 
196  std::ostream *dp_ostream;
197  bool df_owner;
198  unsigned int d_molid; // the number of the molecules we wrote so far
199  STR_VECT d_props; // list of property name that need to be written out
200  bool df_forceV3000; // force writing the mol blocks as V3000
201  bool df_kekulize; // toggle kekulization of molecules on writing
202 };
203 
204 //! The TDTWriter is for writing molecules and properties to
205 //! TDT files
207  /**************************************************************************************
208  * A TDT file ( or stream) writer - this is how it is used
209  * - create a TDTWriter with a output file name (or a ostream),
210  * and a list of properties that need to be written out
211  * - then a call is made to the write function for each molecule that needs
212  *to be written out
213  **********************************************************************************************/
214  public:
215  /*!
216  \param fileName : filename to write to ("-" to write to stdout)
217  */
218  TDTWriter(const std::string &fileName);
219  TDTWriter(std::ostream *outStream, bool takeOwnership = false);
220 
221  ~TDTWriter() override;
222 
223  //! \brief set a vector of property names that are need to be
224  //! written out for each molecule
225  void setProps(const STR_VECT &propNames) override;
226 
227  //! \brief write a new molecule to the file
228  void write(const ROMol &mol, int confId = defaultConfId) override;
229 
230  //! \brief flush the ostream
231  void flush() override {
232  PRECONDITION(dp_ostream, "no output stream");
233  try {
234  dp_ostream->flush();
235  } catch (...) {
236  try {
237  if (dp_ostream->good()) {
238  dp_ostream->setstate(std::ios::badbit);
239  }
240  } catch (const std::runtime_error &) {
241  }
242  }
243  }
244 
245  //! \brief close our stream (the writer cannot be used again)
246  void close() override {
247  if (dp_ostream) {
248  // if we've written any mols, finish with a "|" line
249  if (d_molid > 0) {
250  *dp_ostream << "|\n";
251  }
252  flush();
253  }
254  if (df_owner) {
255  delete dp_ostream;
256  df_owner = false;
257  }
258  dp_ostream = nullptr;
259  }
260 
261  //! \brief get the number of molecules written so far
262  unsigned int numMols() const override { return d_molid; }
263 
264  void setWrite2D(bool state = true) { df_write2D = state; }
265  bool getWrite2D() const { return df_write2D; }
266 
267  void setWriteNames(bool state = true) { df_writeNames = state; }
268  bool getWriteNames() const { return df_writeNames; }
269 
270  void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; }
271  unsigned int getNumDigits() const { return d_numDigits; }
272 
273  private:
274  void writeProperty(const ROMol &mol, const std::string &name);
275 
276  std::ostream *dp_ostream;
277  bool df_owner;
278  unsigned int d_molid; // the number of molecules we wrote so far
279  STR_VECT d_props; // list of property name that need to be written out
280  bool df_write2D; // write 2D coordinates instead of 3D
281  bool df_writeNames; // write a name record for each molecule
282  unsigned int
283  d_numDigits; // number of digits to use in our output of coordinates;
284 };
285 
286 //! The PDBWriter is for writing molecules to Brookhaven Protein
287 //! DataBank format files.
289  public:
290  PDBWriter(const std::string &fileName, unsigned int flavor = 0);
291  PDBWriter(std::ostream *outStream, bool takeOwnership = false,
292  unsigned int flavor = 0);
293  ~PDBWriter() override;
294 
295  //! \brief write a new molecule to the file
296  void write(const ROMol &mol, int confId = defaultConfId) override;
297 
298  void setProps(const STR_VECT &) override {}
299 
300  //! \brief flush the ostream
301  void flush() override {
302  PRECONDITION(dp_ostream, "no output stream");
303  try {
304  dp_ostream->flush();
305  } catch (...) {
306  try {
307  if (dp_ostream->good()) {
308  dp_ostream->setstate(std::ios::badbit);
309  }
310  } catch (const std::runtime_error &) {
311  }
312  }
313  }
314 
315  //! \brief close our stream (the writer cannot be used again)
316  void close() override {
317  if (dp_ostream) {
318  flush();
319  }
320  if (df_owner) {
321  delete dp_ostream;
322  df_owner = false;
323  }
324  dp_ostream = nullptr;
325  }
326 
327  //! \brief get the number of molecules written so far
328  unsigned int numMols() const override { return d_count; }
329 
330  private:
331  std::ostream *dp_ostream;
332  unsigned int d_flavor;
333  unsigned int d_count;
334  bool df_owner;
335 };
336 } // namespace RDKit
337 
338 #endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
virtual void flush()=0
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
virtual ~MolWriter()
Definition: MolWriters.h:26
virtual void close()=0
virtual void setProps(const STR_VECT &propNames)=0
virtual unsigned int numMols() const =0
PDBWriter(const std::string &fileName, unsigned int flavor=0)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void flush() override
flush the ostream
Definition: MolWriters.h:301
PDBWriter(std::ostream *outStream, bool takeOwnership=false, unsigned int flavor=0)
void setProps(const STR_VECT &) override
Definition: MolWriters.h:298
~PDBWriter() override
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:328
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:316
~SDWriter() override
bool getForceV3000() const
Definition: MolWriters.h:188
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:185
SDWriter(std::ostream *outStream, bool takeOwnership=false)
bool getKekulize() const
Definition: MolWriters.h:191
void flush() override
flush the ostream
Definition: MolWriters.h:158
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
static std::string getText(const ROMol &mol, int confId=defaultConfId, bool kekulize=true, bool force_V3000=false, int molid=-1, STR_VECT *propNames=nullptr)
return the text that would be written to the file
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:173
void setForceV3000(bool val)
Definition: MolWriters.h:187
SDWriter(const std::string &fileName)
void setKekulize(bool val)
Definition: MolWriters.h:190
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:105
SmilesWriter(const std::string &fileName, const std::string &delimiter=" ", const std::string &nameHeader="Name", bool includeHeader=true, bool isomericSmiles=true, bool kekuleSmiles=false)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
~SmilesWriter() override
SmilesWriter(std::ostream *outStream, std::string delimiter=" ", std::string nameHeader="Name", bool includeHeader=true, bool takeOwnership=false, bool isomericSmiles=true, bool kekuleSmiles=false)
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:93
void flush() override
flush the ostream
Definition: MolWriters.h:78
~TDTWriter() override
bool getWrite2D() const
Definition: MolWriters.h:265
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:270
void setWrite2D(bool state=true)
Definition: MolWriters.h:264
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:262
unsigned int getNumDigits() const
Definition: MolWriters.h:271
TDTWriter(std::ostream *outStream, bool takeOwnership=false)
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:246
TDTWriter(const std::string &fileName)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void setWriteNames(bool state=true)
Definition: MolWriters.h:267
void flush() override
flush the ostream
Definition: MolWriters.h:231
bool getWriteNames() const
Definition: MolWriters.h:268
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:153
Std stuff.
Definition: Abbreviations.h:18
std::vector< std::string > STR_VECT
Definition: Dict.h:29
static int defaultConfId
Definition: MolWriters.h:23