RDKit
Open-source cheminformatics and machine learning.
SmilesWrite.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2021 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SMILESWRITE_H_012020
12#define RD_SMILESWRITE_H_012020
13
14#include <string>
15#include <vector>
16#include <memory>
17#include <cstdint>
18#include <limits>
19
20namespace RDKit {
21class Atom;
22class Bond;
23class ROMol;
24
26 bool doIsomericSmiles =
27 true; /**< include stereochemistry and isotope information */
28 bool doKekule = false; /**< kekulize the molecule before generating the SMILES
29 and output single/double bonds. NOTE that the output
30 is not canonical and that this will thrown an
31 exception if the molecule cannot be kekulized. */
32 bool canonical = true; /**< generate canonical SMILES */
33 bool allBondsExplicit = false; /**< include symbols for all bonds */
34 bool allHsExplicit = false; /**< provide hydrogen counts for every atom */
35 bool doRandom = false; /**< randomize the output order. The resulting SMILES
36 is not canonical */
37 int rootedAtAtom = -1; /**< make sure the SMILES starts at the specified
38 atom. The resulting SMILES is not canonical */
39};
40namespace SmilesWrite {
41
42enum CXSmilesFields : uint32_t {
46 CX_COORDS = 1 << 2,
47 CX_RADICALS = 1 << 3,
48 CX_ATOM_PROPS = 1 << 4,
49 CX_LINKNODES = 1 << 5,
51 CX_SGROUPS = 1 << 7,
52 CX_POLYMER = 1 << 8,
53 CX_BOND_CFG = 1 << 9,
54 CX_ALL = 0x7fffffff,
56};
57
58//! \brief returns the cxsmiles data for a molecule
60 const ROMol &mol, std::uint32_t flags = CXSmilesFields::CX_ALL);
61
62//! \brief returns true if the atom number is in the SMILES organic subset
64
65//! \brief returns the SMILES for an atom
66/*!
67 \param atom : the atom to work with
68 \param doKekule : we're doing kekulized smiles (e.g. don't use
69 lower case for the atom label)
70 \param bondIn : the bond we came into the atom on (unused)
71 \param allHsExplicit : if true, hydrogen counts will be provided for every
72 atom.
73 \param isomericSmiles : if true, isomeric SMILES will be generated
74*/
76 bool doKekule = false,
77 const Bond *bondIn = nullptr,
78 bool allHsExplicit = false,
79 bool isomericSmiles = true);
80
81//! \brief returns the SMILES for a bond
82/*!
83 \param bond : the bond to work with
84 \param atomToLeftIdx : the index of the atom preceding \c bond
85 in the SMILES
86 \param doKekule : we're doing kekulized smiles (e.g. write out
87 bond orders for aromatic bonds)
88 \param allBondsExplicit : if true, symbols will be included for all bonds.
89*/
91 const Bond *bond, int atomToLeftIdx = -1, bool doKekule = false,
92 bool allBondsExplicit = false);
93} // namespace SmilesWrite
94
95//! \brief returns canonical SMILES for a molecule
97 const ROMol &mol, const SmilesWriteParams &params);
98
99//! \brief returns canonical SMILES for a molecule
100/*!
101 \param mol : the molecule in question.
102 \param doIsomericSmiles : include stereochemistry and isotope information
103 in the SMILES
104
105 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) NOTE that
106 this will throw an exception if the molecule cannot be kekulized.
107
108 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
109 The resulting SMILES is not, of course, canonical.
110 \param canonical : if false, no attempt will be made to canonicalize the
111 SMILES
112 \param allBondsExplicit : if true, symbols will be included for all bonds.
113 \param allHsExplicit : if true, hydrogen counts will be provided for every
114 atom.
115 */
116inline std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles = true,
117 bool doKekule = false, int rootedAtAtom = -1,
118 bool canonical = true,
119 bool allBondsExplicit = false,
120 bool allHsExplicit = false,
121 bool doRandom = false) {
123 ps.doIsomericSmiles = doIsomericSmiles;
124 ps.doKekule = doKekule;
125 ps.rootedAtAtom = rootedAtAtom;
126 ps.canonical = canonical;
127 ps.allBondsExplicit = allBondsExplicit;
128 ps.allHsExplicit = allHsExplicit;
129 ps.doRandom = doRandom;
130 return MolToSmiles(mol, ps);
131};
132
133//! \brief returns a vector of random SMILES for a molecule (may contain
134//! duplicates)
135/*!
136 \param mol : the molecule in question.
137 \param numSmiles : the number of SMILES to return
138 \param randomSeed : if >0, will be used to seed the random number generator
139 \param doIsomericSmiles : include stereochemistry and isotope information
140 in the SMILES
141 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
142 \param allBondsExplicit : if true, symbols will be included for all bonds.
143 \param allHsExplicit : if true, hydrogen counts will be provided for every
144 atom.
145 */
147 const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0,
148 bool doIsomericSmiles = true, bool doKekule = false,
149 bool allBondsExplicit = false, bool allHsExplicit = false);
150
151//! \brief returns canonical SMILES for part of a molecule
153 const ROMol &mol, const SmilesWriteParams &params,
154 const std::vector<int> &atomsToUse,
155 const std::vector<int> *bondsToUse = nullptr,
156 const std::vector<std::string> *atomSymbols = nullptr,
157 const std::vector<std::string> *bondSymbols = nullptr);
158
159//! \brief returns canonical SMILES for part of a molecule
160/*!
161 \param mol : the molecule in question.
162 \param atomsToUse : indices of the atoms in the fragment
163 \param bondsToUse : indices of the bonds in the fragment. If this is not
164 provided,
165 all bonds between the atoms in atomsToUse will be included
166 \param atomSymbols : symbols to use for the atoms in the output SMILES
167 \param bondSymbols : symbols to use for the bonds in the output SMILES
168 \param doIsomericSmiles : include stereochemistry and isotope information
169 in the SMILES
170 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
171 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
172 The resulting SMILES is not, of course, canonical.
173 \param canonical : if false, no attempt will be made to canonicalize the
174 SMILES
175 \param allBondsExplicit : if true, symbols will be included for all bonds.
176 \param allHsExplicit : if true, hydrogen counts will be provided for every
177 atom.
178 \param doRandom : generate a randomized smiles string by randomly choosing
179 the priority to follow in the DFS traversal. [default false]
180
181 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
182
183 */
184inline std::string MolFragmentToSmiles(
185 const ROMol &mol, const std::vector<int> &atomsToUse,
186 const std::vector<int> *bondsToUse = nullptr,
187 const std::vector<std::string> *atomSymbols = nullptr,
188 const std::vector<std::string> *bondSymbols = nullptr,
189 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
190 bool canonical = true, bool allBondsExplicit = false,
191 bool allHsExplicit = false) {
193 ps.doIsomericSmiles = doIsomericSmiles;
194 ps.doKekule = doKekule;
195 ps.rootedAtAtom = rootedAtAtom;
196 ps.canonical = canonical;
197 ps.allBondsExplicit = allBondsExplicit;
198 ps.allHsExplicit = allHsExplicit;
199 return MolFragmentToSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
200 bondSymbols);
201}
202
203//! \brief returns canonical CXSMILES for a molecule
205 const ROMol &mol, const SmilesWriteParams &ps,
206 std::uint32_t flags = SmilesWrite::CXSmilesFields::CX_ALL);
207
208//! \brief returns canonical CXSMILES for a molecule
209/*!
210 \param mol : the molecule in question.
211 \param doIsomericSmiles : include stereochemistry and isotope information
212 in the SMILES
213 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
214 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
215 The resulting SMILES is not, of course, canonical.
216 \param canonical : if false, no attempt will be made to canonicalize the
217 SMILES
218 \param allBondsExplicit : if true, symbols will be included for all bonds.
219 \param allHsExplicit : if true, hydrogen counts will be provided for every
220 atom.
221 */
222inline std::string MolToCXSmiles(const ROMol &mol, bool doIsomericSmiles = true,
223 bool doKekule = false, int rootedAtAtom = -1,
224 bool canonical = true,
225 bool allBondsExplicit = false,
226 bool allHsExplicit = false,
227 bool doRandom = false) {
229 ps.doIsomericSmiles = doIsomericSmiles;
230 ps.doKekule = doKekule;
231 ps.rootedAtAtom = rootedAtAtom;
232 ps.canonical = canonical;
233 ps.allBondsExplicit = allBondsExplicit;
234 ps.allHsExplicit = allHsExplicit;
235 ps.doRandom = doRandom;
236 return MolToCXSmiles(mol, ps);
237};
238
239//! \brief returns canonical CXSMILES for part of a molecule
241 const ROMol &mol, const SmilesWriteParams &params,
242 const std::vector<int> &atomsToUse,
243 const std::vector<int> *bondsToUse = nullptr,
244 const std::vector<std::string> *atomSymbols = nullptr,
245 const std::vector<std::string> *bondSymbols = nullptr);
246
247//! \brief returns canonical CXSMILES for part of a molecule
248/*!
249 \param mol : the molecule in question.
250 \param atomsToUse : indices of the atoms in the fragment
251 \param bondsToUse : indices of the bonds in the fragment. If this is not
252 provided,
253 all bonds between the atoms in atomsToUse will be included
254 \param atomSymbols : symbols to use for the atoms in the output SMILES
255 \param bondSymbols : symbols to use for the bonds in the output SMILES
256 \param doIsomericSmiles : include stereochemistry and isotope information
257 in the SMILES
258 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
259 \param rootedAtAtom : make sure the SMILES starts at the specified atom.
260 The resulting SMILES is not, of course, canonical.
261 \param canonical : if false, no attempt will be made to canonicalize the
262 SMILES
263 \param allBondsExplicit : if true, symbols will be included for all bonds.
264 \param allHsExplicit : if true, hydrogen counts will be provided for every
265 atom.
266
267 \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
268
269 */
270inline std::string MolFragmentToCXSmiles(
271 const ROMol &mol, const std::vector<int> &atomsToUse,
272 const std::vector<int> *bondsToUse = nullptr,
273 const std::vector<std::string> *atomSymbols = nullptr,
274 const std::vector<std::string> *bondSymbols = nullptr,
275 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
276 bool canonical = true, bool allBondsExplicit = false,
277 bool allHsExplicit = false) {
279 ps.doIsomericSmiles = doIsomericSmiles;
280 ps.doKekule = doKekule;
281 ps.rootedAtAtom = rootedAtAtom;
282 ps.canonical = canonical;
283 ps.allBondsExplicit = allBondsExplicit;
284 ps.allHsExplicit = allHsExplicit;
285 return MolFragmentToCXSmiles(mol, ps, atomsToUse, bondsToUse, atomSymbols,
286 bondSymbols);
287}
288
289} // namespace RDKit
290#endif
The class for representing atoms.
Definition: Atom.h:68
class for representing a bond
Definition: Bond.h:47
#define RDKIT_SMILESPARSE_EXPORT
Definition: export.h:457
RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber)
returns true if the atom number is in the SMILES organic subset
RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx=-1, bool doKekule=false, bool allBondsExplicit=false)
returns the SMILES for a bond
RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(const ROMol &mol, std::uint32_t flags=CXSmilesFields::CX_ALL)
returns the cxsmiles data for a molecule
RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom, bool doKekule=false, const Bond *bondIn=nullptr, bool allHsExplicit=false, bool isomericSmiles=true)
returns the SMILES for an atom
Std stuff.
Definition: Abbreviations.h:19
RDKIT_SMILESPARSE_EXPORT std::vector< std::string > MolToRandomSmilesVect(const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed=0, bool doIsomericSmiles=true, bool doKekule=false, bool allBondsExplicit=false, bool allHsExplicit=false)
returns a vector of random SMILES for a molecule (may contain duplicates)
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical SMILES for part of a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles(const ROMol &mol, const SmilesWriteParams &ps, std::uint32_t flags=SmilesWrite::CXSmilesFields::CX_ALL)
returns canonical CXSMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles(const ROMol &mol, const SmilesWriteParams &params, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr)
returns canonical CXSMILES for part of a molecule