RDKit
Open-source cheminformatics and machine learning.
SmilesParse.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2021 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SMILESPARSE_H
12#define RD_SMILESPARSE_H
13
14#include <GraphMol/RWMol.h>
16#include <string>
17#include <exception>
18#include <map>
19
20namespace RDKit {
21
23 int debugParse = 0; /**< enable debugging in the SMILES parser*/
24 bool sanitize = true; /**< sanitize the molecule after building it */
25 std::map<std::string, std::string> *replacements =
26 nullptr; /**< allows SMILES "macros" */
27 bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
28 bool strictCXSMILES =
29 true; /**< throw an exception if the CXSMILES parsing fails */
30 bool parseName = true; /**< parse (and set) the molecule name as well */
31 bool removeHs = true; /**< remove Hs after constructing the molecule */
32 bool useLegacyStereo =
33 true; /**< use the legacy stereochemistry perception code */
34 bool skipCleanup =
35 false; /**< skip the final cleanup stage (for internal use) */
36};
38 const SmilesParserParams &params);
39
42
43//! Construct a molecule from a SMILES string
44/*!
45 \param smi the SMILES to convert
46 \param debugParse toggles verbose debugging information from the parser
47 \param sanitize toggles H removal and sanitization of the molecule
48 \param replacements a string->string map of replacement strings. See below
49 for more information about replacements.
50
51 \return a pointer to the new molecule; the caller is responsible for free'ing
52 this.
53
54 The optional replacements map can be used to do string substitution of
55 abbreviations
56 in the input SMILES. The set of substitutions is repeatedly looped through
57 until
58 the string no longer changes. It is the responsibility of the caller to make
59 sure
60 that substitutions results in legal and sensible SMILES.
61
62 Examples of substitutions:
63 \code
64 CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
65 C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
66 C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CNCCNC
67 \endcode
68
69 */
71 const std::string &smi, int debugParse = 0, bool sanitize = true,
72 std::map<std::string, std::string> *replacements = nullptr) {
73 SmilesParserParams params;
74 params.debugParse = debugParse;
75 params.replacements = replacements;
76 if (sanitize) {
77 params.sanitize = true;
78 params.removeHs = true;
79 } else {
80 params.sanitize = false;
81 params.removeHs = false;
82 }
83 return SmilesToMol(smi, params);
84};
85
87 int debugParse = 0; /**< enable debugging in the SMARTS parser*/
88 std::map<std::string, std::string> *replacements =
89 nullptr; /**< allows SMARTS "macros" */
90 bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
91 bool strictCXSMILES =
92 true; /**< throw an exception if the CXSMILES parsing fails */
93 bool parseName = true; /**< parse (and set) the molecule name as well */
94 bool mergeHs =
95 true; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
96 bool skipCleanup =
97 false; /**< skip the final cleanup stage (for internal use) */
98};
100 const SmartsParserParams &ps);
101
102//! Construct a molecule from a SMARTS string
103/*!
104 \param sma the SMARTS to convert
105 \param debugParse toggles verbose debugging information from the parser
106 \param mergeHs toggles merging H atoms in the SMARTS into neighboring
107 atoms
108 \param replacements a string->string map of replacement strings.
109 \see SmilesToMol for more information about replacements
110
111 \return a pointer to the new molecule; the caller is responsible for free'ing
112 this.
113 */
115 const std::string &sma, int debugParse = 0, bool mergeHs = false,
116 std::map<std::string, std::string> *replacements = nullptr) {
118 ps.debugParse = debugParse;
119 ps.mergeHs = mergeHs;
120 ps.replacements = replacements;
121 return SmartsToMol(sma, ps);
122};
123
126
127class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
128 public:
129 SmilesParseException(const char *msg) : _msg(msg) {}
130 SmilesParseException(const std::string msg) : _msg(msg) {}
131 const char *what() const noexcept override { return _msg.c_str(); }
132 ~SmilesParseException() noexcept override = default;
133
134 private:
135 std::string _msg;
136};
137
138inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
139 size_t len) {
140 std::string smi(text, len);
141 RWMol *ptr = nullptr;
142 try {
143 ptr = SmilesToMol(smi);
144 } catch (const RDKit::MolSanitizeException &) {
145 ptr = nullptr;
146 }
147 return std::unique_ptr<RWMol>(ptr);
148}
149inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
150 size_t len) {
151 std::string smi(text, len);
152 // no need for exception handling here: SmartsToMol() doesn't do
153 // sanitization
154 RWMol *ptr = SmartsToMol(smi);
155 return std::unique_ptr<RWMol>(ptr);
156}
157
158} // namespace RDKit
159
160#endif
Defines the editable molecule class RWMol.
The class for representing atoms.
Definition: Atom.h:68
class for representing a bond
Definition: Bond.h:47
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
SmilesParseException(const std::string msg)
Definition: SmilesParse.h:130
SmilesParseException(const char *msg)
Definition: SmilesParse.h:129
const char * what() const noexcept override
Definition: SmilesParse.h:131
~SmilesParseException() noexcept override=default
#define RDKIT_SMILESPARSE_EXPORT
Definition: export.h:449
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.
Definition: Abbreviations.h:18
RDKIT_SMILESPARSE_EXPORT RWMol * SmartsToMol(const std::string &sma, const SmartsParserParams &ps)
RDKIT_SMILESPARSE_EXPORT Atom * SmartsToAtom(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT Bond * SmilesToBond(const std::string &smi)
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
RDKIT_SMILESPARSE_EXPORT Bond * SmartsToBond(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT Atom * SmilesToAtom(const std::string &smi)
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:88
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:25