RDKit
Open-source cheminformatics and machine learning.
ReactionParser.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2007-2022, Novartis Institutes for BioMedical Research Inc.
3 // and other RDKit contributors
4 //
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are
9 // met:
10 //
11 // * Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // * Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following
15 // disclaimer in the documentation and/or other materials provided
16 // with the distribution.
17 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
18 // nor the names of its contributors may be used to endorse or promote
19 // products derived from this software without specific prior written
20 // permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 //
34 
35 #include <RDGeneral/export.h>
36 #ifndef RD_REACTIONPARSER_H_21Aug2006
37 #define RD_REACTIONPARSER_H_21Aug2006
38 
39 #include <string>
40 #include <iostream>
41 #include <fstream>
42 #include <sstream>
43 #include <utility>
44 #include <boost/format.hpp>
47 
48 namespace RDKit {
49 class ChemicalReaction;
50 
51 //! used to indicate an error in parsing reaction data
53  : public std::exception {
54  public:
55  //! construct with an error message
56  explicit ChemicalReactionParserException(const char *msg) : _msg(msg) {}
57  //! construct with an error message
58  explicit ChemicalReactionParserException(std::string msg)
59  : _msg(std::move(msg)) {}
60  //! get the error message
61  const char *what() const noexcept override { return _msg.c_str(); }
62  ~ChemicalReactionParserException() noexcept override = default;
63 
64  private:
65  std::string _msg;
66 };
67 
68 //---------------------------------------------------------------------------
69 //! \name Reaction SMARTS/SMILES Support
70 //@{
71 
72 //! Parse a string containing "Reaction SMARTS" into a ChemicalReaction
73 /*!
74  Our definition of Reaction SMARTS is something that looks a lot like reaction
75  SMILES, except that SMARTS queries are allowed on the reactant side and that
76  atom-map numbers are required (at least for now)
77 
78  \param text the SMARTS to convert
79 
80  \param replacements a string->string map of replacement strings. \see
81  SmilesToMol for more information about replacements
82 
83  \param useSmiles if set, the SMILES parser will be used instead of the
84  SMARTS parserfor the individual components
85 
86  \param allowCXSMILES if set, any CXSMILES extensions present will be
87  parsed, otherwise it will be ignored
88  */
90  const std::string &text,
91  std::map<std::string, std::string> *replacements = nullptr,
92  bool useSmiles = false, bool allowCXSMILES = true);
93 
94 //! returns the reaction SMARTS for a reaction
96  const ChemicalReaction &rxn);
97 
98 //! returns the reaction SMILES for a reaction
100  const ChemicalReaction &rxn, bool canonical = true);
101 //@}
102 
103 //---------------------------------------------------------------------------
104 //! \name Reaction Mol Support
105 //@{
106 
107 //! Parse a ROMol into a ChemicalReaction, RXN role must be set before
108 /*!
109  Alternative to build a reaction from a molecule (fragments) which have RXN
110  roles set as atom properties: common_properties::molRxnRole (1=reactant,
111  2=product, 3=agent)
112 
113  \param mol ROMol with RXN roles set
114  */
116  const ROMol &mol);
117 
118 //! returns a ROMol with RXN roles used to describe the reaction
120  const ChemicalReaction &rxn);
121 //@}
122 
123 //---------------------------------------------------------------------------
124 //! \name MDL rxn Support
125 //@{
126 
127 //! Parse a text block in MDL rxn format into a ChemicalReaction
129  const std::string &rxnBlock, bool sanitize = false, bool removeHs = false,
130  bool strictParsing = true);
131 //! Parse a file in MDL rxn format into a ChemicalReaction
133  const std::string &fileName, bool sanitize = false, bool removeHs = false,
134  bool strictParsing = true);
135 //! Parse a text stream in MDL rxn format into a ChemicalReaction
137  std::istream &rxnStream, unsigned int &line, bool sanitize = false,
138  bool removeHs = false, bool strictParsing = true);
139 //! returns an rxn block for a reaction
140 /*!
141  \param rxn chemical reaction
142 
143  \param separateAgents flag to decide if agents are put in a separate block,
144  otherwise they are included in the reactants block
145  (default)
146 
147  \param forceV3000 flag to cause the V3000 format to be used instead of
148  V2000
149  */
151  const ChemicalReaction &rxn, bool separateAgents = false,
152  bool forceV3000 = false);
153 //! returns an V3000 rxn block for a reaction
154 /*!
155  \param rxn chemical reaction
156 
157  \param separateAgents flag to decide if agents are put in a separate block,
158  otherwise they are included in the reactants block
159  (default)
160 */
162  const ChemicalReaction &rxn, bool separateAgents = false);
163 
164 //@}
165 
166 //---------------------------------------------------------------------------
167 //! \name PNG Support
168 //@{
169 
170 //! Tags used for PNG metadata
171 namespace PNGData {
172 RDKIT_CHEMREACTIONS_EXPORT extern const std::string rxnSmilesTag;
173 RDKIT_CHEMREACTIONS_EXPORT extern const std::string rxnSmartsTag;
174 RDKIT_CHEMREACTIONS_EXPORT extern const std::string rxnRxnTag;
175 RDKIT_CHEMREACTIONS_EXPORT extern const std::string rxnPklTag;
176 } // namespace PNGData
177 
178 //! \brief constructs a ChemicalReaction from the metadata in a PNG stream
179 /*!
180 
181 Looks through the metadata in the PNG to find the first tag that matches one of
182 the tags in \c RDKit::PNGData. A molecule is constructed from this chunk.
183 
184 Throws a \c FileParseException if no suitable tag is found.
185 
186 The caller is responsible for the returned pointer.
187 
188  */
190  std::istream &pngStream);
191 //! \brief constructs a ChemicalReaction from the metadata in a PNG string
192 //! See \c PNGStreamToChemicalReaction() for more details
193 inline ChemicalReaction *PNGStringToChemicalReaction(const std::string &data) {
194  std::stringstream inStream(data);
195  return PNGStreamToChemicalReaction(inStream);
196 };
197 //! \brief constructs a ChemicalReaction from the metadata in a PNG file
198 //! See \c PNGStreamToChemicalReaction() for more details
199 inline ChemicalReaction *PNGFileToChemicalReaction(const std::string &fname) {
200  std::ifstream inStream(fname.c_str(), std::ios::binary);
201  if (!inStream || (inStream.bad())) {
202  throw BadFileException((boost::format("Bad input file %s") % fname).str());
203  }
204  return PNGStreamToChemicalReaction(inStream);
205 };
206 
207 //! \brief adds metadata for a ChemicalReaction to the data from a PNG stream.
208 //! The modified PNG data is returned.
209 /*!
210 
211  \param rxn the reaction to add
212  \param iStream the stream to read from
213  \param includePkl include a reaction pickle
214  \param includeSmiles include reaction SMILES for the reaction
215  \param includeSmarts include reaction SMARTS for the reaction
216  \param includeRxn include an RXN block for the reaction
217 
218 */
220  const ChemicalReaction &rxn, std::istream &iStream, bool includePkl = true,
221  bool includeSmiles = true, bool includeSmarts = false,
222  bool includeRxn = false);
223 //! \brief adds metadata for a ChemicalReaction to the data from a PNG string.
224 //! See addChemicalReactionToPNGStream() for more details.
225 inline std::string addChemicalReactionToPNGString(const ChemicalReaction &rxn,
226  const std::string &pngString,
227  bool includePkl = true,
228  bool includeSmiles = true,
229  bool includeSmarts = false,
230  bool includeRxn = false) {
231  std::stringstream inStream(pngString);
233  rxn, inStream, includePkl, includeSmiles, includeSmarts, includeRxn);
234 }
235 //! \brief adds metadata for a ChemicalReaction to the data from a PNG string.
236 //! See addChemicalReactionToPNGStream() for more details.
237 inline std::string addChemicalReactionToPNGFile(const ChemicalReaction &rxn,
238  const std::string &fname,
239  bool includePkl = true,
240  bool includeSmiles = true,
241  bool includeSmarts = false,
242  bool includeRxn = false) {
243  std::ifstream inStream(fname.c_str(), std::ios::binary);
245  rxn, inStream, includePkl, includeSmiles, includeSmarts, includeRxn);
246 }
247 //@}
248 
249 inline std::unique_ptr<ChemicalReaction> operator"" _rxnsmarts(const char *text,
250  size_t len) {
251  std::string sma(text, len);
252  ChemicalReaction *ptr = nullptr;
253  try {
254  ptr = RxnSmartsToChemicalReaction(sma);
255  } catch (...) {
256  ptr = nullptr;
257  }
258  return std::unique_ptr<ChemicalReaction>(ptr);
259 }
260 inline std::unique_ptr<ChemicalReaction> operator"" _rxnsmiles(const char *text,
261  size_t len) {
262  std::string sma(text, len);
263  ChemicalReaction *ptr = nullptr;
264  try {
265  ptr = RxnSmartsToChemicalReaction(sma, nullptr, true);
266  } catch (...) {
267  ptr = nullptr;
268  }
269  return std::unique_ptr<ChemicalReaction>(ptr);
270 }
271 
272 } // namespace RDKit
273 
274 #endif
used by various file parsing classes to indicate a bad file
used to indicate an error in parsing reaction data
const char * what() const noexcept override
get the error message
ChemicalReactionParserException(std::string msg)
construct with an error message
~ChemicalReactionParserException() noexcept override=default
ChemicalReactionParserException(const char *msg)
construct with an error message
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:121
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:49
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_CHEMREACTIONS_EXPORT const std::string rxnRxnTag
RDKIT_CHEMREACTIONS_EXPORT const std::string rxnSmilesTag
RDKIT_CHEMREACTIONS_EXPORT const std::string rxnSmartsTag
RDKIT_CHEMREACTIONS_EXPORT const std::string rxnPklTag
Std stuff.
Definition: Abbreviations.h:18
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToRxnSmiles(const ChemicalReaction &rxn, bool canonical=true)
returns the reaction SMILES for a reaction
ChemicalReaction * PNGFileToChemicalReaction(const std::string &fname)
constructs a ChemicalReaction from the metadata in a PNG file See PNGStreamToChemicalReaction() for m...
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToRxnBlock(const ChemicalReaction &rxn, bool separateAgents=false, bool forceV3000=false)
returns an rxn block for a reaction
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * PNGStreamToChemicalReaction(std::istream &pngStream)
constructs a ChemicalReaction from the metadata in a PNG stream
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnSmartsToChemicalReaction(const std::string &text, std::map< std::string, std::string > *replacements=nullptr, bool useSmiles=false, bool allowCXSMILES=true)
Parse a string containing "Reaction SMARTS" into a ChemicalReaction.
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnBlockToChemicalReaction(const std::string &rxnBlock, bool sanitize=false, bool removeHs=false, bool strictParsing=true)
Parse a text block in MDL rxn format into a ChemicalReaction.
RDKIT_CHEMREACTIONS_EXPORT std::string addChemicalReactionToPNGStream(const ChemicalReaction &rxn, std::istream &iStream, bool includePkl=true, bool includeSmiles=true, bool includeSmarts=false, bool includeRxn=false)
adds metadata for a ChemicalReaction to the data from a PNG stream. The modified PNG data is returned...
RDKIT_CHEMREACTIONS_EXPORT ROMol * ChemicalReactionToRxnMol(const ChemicalReaction &rxn)
returns a ROMol with RXN roles used to describe the reaction
ChemicalReaction * PNGStringToChemicalReaction(const std::string &data)
constructs a ChemicalReaction from the metadata in a PNG string See PNGStreamToChemicalReaction() for...
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnDataStreamToChemicalReaction(std::istream &rxnStream, unsigned int &line, bool sanitize=false, bool removeHs=false, bool strictParsing=true)
Parse a text stream in MDL rxn format into a ChemicalReaction.
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToRxnSmarts(const ChemicalReaction &rxn)
returns the reaction SMARTS for a reaction
std::string addChemicalReactionToPNGFile(const ChemicalReaction &rxn, const std::string &fname, bool includePkl=true, bool includeSmiles=true, bool includeSmarts=false, bool includeRxn=false)
adds metadata for a ChemicalReaction to the data from a PNG string. See addChemicalReactionToPNGStrea...
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToV3KRxnBlock(const ChemicalReaction &rxn, bool separateAgents=false)
returns an V3000 rxn block for a reaction
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnFileToChemicalReaction(const std::string &fileName, bool sanitize=false, bool removeHs=false, bool strictParsing=true)
Parse a file in MDL rxn format into a ChemicalReaction.
RDKIT_CHEMREACTIONS_EXPORT ChemicalReaction * RxnMolToChemicalReaction(const ROMol &mol)
Parse a ROMol into a ChemicalReaction, RXN role must be set before.
std::string addChemicalReactionToPNGString(const ChemicalReaction &rxn, const std::string &pngString, bool includePkl=true, bool includeSmiles=true, bool includeSmarts=false, bool includeRxn=false)
adds metadata for a ChemicalReaction to the data from a PNG string. See addChemicalReactionToPNGStrea...