RDKit
Open-source cheminformatics and machine learning.
StreamOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10//
11#include <RDGeneral/export.h>
12#ifndef _RD_STREAMOPS_H
13#define _RD_STREAMOPS_H
14
15#include "types.h"
16#include "Invariant.h"
17#include "RDProps.h"
18#include <string>
19#include <sstream>
20#include <iostream>
21#include <boost/cstdint.hpp>
22#include <boost/predef.h>
23
24namespace RDKit {
25// this code block for handling endian problems is adapted from :
26// http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
27enum EEndian {
30#if defined(BOOST_ENDIAN_LITTLE_BYTE) || defined(BOOST_ENDIAN_LITTLE_WORD)
31 HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
32#elif defined(BOOST_ENDIAN_BIG_BYTE)
33 HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
34#elif defined(BOOST_ENDIAN_BIG_WORD)
35#error "Cannot compile on word-swapped big-endian systems"
36#else
37#error "Failed to determine the system endian value"
38#endif
39};
40
41// this function swap the bytes of values given it's size as a template
42// parameter (could sizeof be used?).
43template <class T, unsigned int size>
44inline T SwapBytes(T value) {
45 if (size < 2) {
46 return value;
47 }
48
49 union {
50 T value;
51 char bytes[size];
52 } in, out;
53
54 in.value = value;
55
56 for (unsigned int i = 0; i < size; ++i) {
57 out.bytes[i] = in.bytes[size - 1 - i];
58 }
59
60 return out.value;
61}
62
63// Here is the function you will use. Again there is two compile-time assertion
64// that use the boost libraries. You could probably comment them out, but if you
65// do be cautious not to use this function for anything else than integers
66// types. This function need to be called like this :
67//
68// int x = someValue;
69// int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
70//
71template <EEndian from, EEndian to, class T>
72inline T EndianSwapBytes(T value) {
73 // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
74 BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
75 sizeof(T) == 8);
76 if (sizeof(T) == 1) {
77 return value;
78 }
79
80 // A : La donnée à swapper est d'un type arithmetic
81 // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
82
83 // Si from et to sont du même type on ne swap pas.
84 if (from == to) {
85 return value;
86 }
87
88 return SwapBytes<T, sizeof(T)>(value);
89}
90template <EEndian from, EEndian to>
91inline char EndianSwapBytes(char value) {
92 return value;
93}
94template <EEndian from, EEndian to>
95inline unsigned char EndianSwapBytes(unsigned char value) {
96 return value;
97}
98template <EEndian from, EEndian to>
99inline signed char EndianSwapBytes(signed char value) {
100 return value;
101}
102// --------------------------------------
103
104//! Packs an integer and outputs it to a stream
105inline void appendPackedIntToStream(std::stringstream &ss,
106 boost::uint32_t num) {
107 int nbytes, bix;
108 unsigned int val, res;
109 char tc;
110
111 res = num;
112 while (1) {
113 if (res < (1 << 7)) {
114 val = (res << 1);
115 nbytes = 1;
116 break;
117 }
118 res -= (1 << 7);
119 if (res < (1 << 14)) {
120 val = ((res << 2) | 1);
121 nbytes = 2;
122 break;
123 }
124 res -= (1 << 14);
125 if (res < (1 << 21)) {
126 val = ((res << 3) | 3);
127 nbytes = 3;
128 break;
129 }
130 res -= (1 << 21);
131 if (res < (1 << 29)) {
132 val = ((res << 3) | 7);
133 nbytes = 4;
134 break;
135 } else {
136 CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
137 }
138 }
139 // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
140
141 for (bix = 0; bix < nbytes; bix++) {
142 tc = (char)(val & 255);
143 ss.write(&tc, 1);
144 val >>= 8;
145 }
146}
147
148//! Reads an integer from a stream in packed format and returns the result.
149inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
150 boost::uint32_t val, num;
151 int shift, offset;
152 char tmp;
153 ss.read(&tmp, sizeof(tmp));
154 if (ss.fail()) {
155 throw std::runtime_error("failed to read from stream");
156 }
157
158 val = UCHAR(tmp);
159 offset = 0;
160 if ((val & 1) == 0) {
161 shift = 1;
162 } else if ((val & 3) == 1) {
163 ss.read((char *)&tmp, sizeof(tmp));
164 if (ss.fail()) {
165 throw std::runtime_error("failed to read from stream");
166 }
167
168 val |= (UCHAR(tmp) << 8);
169 shift = 2;
170 offset = (1 << 7);
171 } else if ((val & 7) == 3) {
172 ss.read((char *)&tmp, sizeof(tmp));
173 if (ss.fail()) {
174 throw std::runtime_error("failed to read from stream");
175 }
176
177 val |= (UCHAR(tmp) << 8);
178 ss.read((char *)&tmp, sizeof(tmp));
179 if (ss.fail()) {
180 throw std::runtime_error("failed to read from stream");
181 }
182
183 val |= (UCHAR(tmp) << 16);
184 shift = 3;
185 offset = (1 << 7) + (1 << 14);
186 } else {
187 ss.read((char *)&tmp, sizeof(tmp));
188 if (ss.fail()) {
189 throw std::runtime_error("failed to read from stream");
190 }
191
192 val |= (UCHAR(tmp) << 8);
193 ss.read((char *)&tmp, sizeof(tmp));
194 if (ss.fail()) {
195 throw std::runtime_error("failed to read from stream");
196 }
197
198 val |= (UCHAR(tmp) << 16);
199 ss.read((char *)&tmp, sizeof(tmp));
200 if (ss.fail()) {
201 throw std::runtime_error("failed to read from stream");
202 }
203
204 val |= (UCHAR(tmp) << 24);
205 shift = 3;
206 offset = (1 << 7) + (1 << 14) + (1 << 21);
207 }
208 num = (val >> shift) + offset;
209 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
210 return num;
211}
212
213//! Reads an integer from a char * in packed format and returns the result.
214//! The argument is advanced
215inline boost::uint32_t pullPackedIntFromString(const char *&text) {
216 boost::uint32_t val, num;
217 int shift, offset;
218 char tmp;
219 tmp = *text;
220 text++;
221 val = UCHAR(tmp);
222 offset = 0;
223 if ((val & 1) == 0) {
224 shift = 1;
225 } else if ((val & 3) == 1) {
226 tmp = *text;
227 text++;
228 val |= (UCHAR(tmp) << 8);
229 shift = 2;
230 offset = (1 << 7);
231 } else if ((val & 7) == 3) {
232 tmp = *text;
233 text++;
234 val |= (UCHAR(tmp) << 8);
235 tmp = *text;
236 text++;
237 val |= (UCHAR(tmp) << 16);
238 shift = 3;
239 offset = (1 << 7) + (1 << 14);
240 } else {
241 tmp = *text;
242 text++;
243 val |= (UCHAR(tmp) << 8);
244 tmp = *text;
245 text++;
246 val |= (UCHAR(tmp) << 16);
247 tmp = *text;
248 text++;
249 val |= (UCHAR(tmp) << 24);
250 shift = 3;
251 offset = (1 << 7) + (1 << 14) + (1 << 21);
252 }
253 num = (val >> shift) + offset;
254 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
255 return num;
256}
257
258//! does a binary write of an object to a stream
259template <typename T>
260void streamWrite(std::ostream &ss, const T &val) {
261 T tval = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(val);
262 ss.write((const char *)&tval, sizeof(T));
263}
264
265//! special case for string
266inline void streamWrite(std::ostream &ss, const std::string &what) {
267 unsigned int l = rdcast<unsigned int>(what.length());
268 ss.write((const char *)&l, sizeof(l));
269 ss.write(what.c_str(), sizeof(char) * l);
270};
271
272template <typename T>
273void streamWriteVec(std::ostream &ss, const T &val) {
274 streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
275 for (size_t i = 0; i < val.size(); ++i) {
276 streamWrite(ss, val[i]);
277 }
278}
279
280//! does a binary read of an object from a stream
281template <typename T>
282void streamRead(std::istream &ss, T &loc) {
283 T tloc;
284 ss.read((char *)&tloc, sizeof(T));
285 if (ss.fail()) {
286 throw std::runtime_error("failed to read from stream");
287 }
288 loc = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(tloc);
289}
290
291//! special case for string
292template <class T>
293void streamRead(std::istream &ss, T &obj, int version) {
294 RDUNUSED_PARAM(version);
295 streamRead(ss, obj);
296}
297
298inline void streamRead(std::istream &ss, std::string &what, int version) {
299 RDUNUSED_PARAM(version);
300 unsigned int l;
301 ss.read((char *)&l, sizeof(l));
302 if (ss.fail()) {
303 throw std::runtime_error("failed to read from stream");
304 }
305 char *buff = new char[l];
306 ss.read(buff, sizeof(char) * l);
307 if (ss.fail()) {
308 throw std::runtime_error("failed to read from stream");
309 }
310 what = std::string(buff, l);
311 delete[] buff;
312};
313
314template <class T>
315void streamReadVec(std::istream &ss, T &val) {
316 boost::uint64_t size;
317 streamRead(ss, size);
318 val.resize(boost::numeric_cast<size_t>(size));
319
320 for (size_t i = 0; i < size; ++i) {
321 streamRead(ss, val[i]);
322 }
323}
324
325inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val,
326 int version) {
327 boost::uint64_t size;
328 streamRead(ss, size);
329 val.resize(size);
330
331 for (size_t i = 0; i < size; ++i) {
332 streamRead(ss, val[i], version);
333 }
334}
335
336//! grabs the next line from an instream and returns it.
337inline std::string getLine(std::istream *inStream) {
338 std::string res;
339 std::getline(*inStream, res);
340 if (!res.empty() && (res.back() == '\r')) {
341 res.resize(res.length() - 1);
342 }
343 return res;
344}
345//! grabs the next line from an instream and returns it.
346inline std::string getLine(std::istream &inStream) {
347 return getLine(&inStream);
348}
349
350// n.b. We can't use RDTypeTag directly, they are implementation
351// specific
352namespace DTags {
353const unsigned char StringTag = 0;
354const unsigned char IntTag = 1;
355const unsigned char UnsignedIntTag = 2;
356const unsigned char BoolTag = 3;
357const unsigned char FloatTag = 4;
358const unsigned char DoubleTag = 5;
359const unsigned char VecStringTag = 6;
360const unsigned char VecIntTag = 7;
361const unsigned char VecUIntTag = 8;
362const unsigned char VecBoolTag = 9;
363const unsigned char VecFloatTag = 10;
364const unsigned char VecDoubleTag = 11;
365
366const unsigned char CustomTag = 0xFE; // custom data
367const unsigned char EndTag = 0xFF;
368} // namespace DTags
369
371 public:
373 virtual const char *getPropName() const = 0;
374 virtual bool canSerialize(const RDValue &value) const = 0;
375 virtual bool read(std::istream &ss, RDValue &value) const = 0;
376 virtual bool write(std::ostream &ss, const RDValue &value) const = 0;
377 virtual CustomPropHandler *clone() const = 0;
378};
379
380typedef std::vector<std::shared_ptr<const CustomPropHandler>>
382
383inline bool isSerializable(const Dict::Pair &pair,
384 const CustomPropHandlerVec &handlers = {}) {
385 switch (pair.val.getTag()) {
392
398 return true;
400 for (auto &handler : handlers) {
401 if (handler->canSerialize(pair.val)) {
402 return true;
403 }
404 }
405 return false;
406 default:
407 return false;
408 }
409}
410
411inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair,
412 const CustomPropHandlerVec &handlers = {}) {
413 if (!isSerializable(pair, handlers)) {
414 return false;
415 }
416
417 streamWrite(ss, pair.key);
418 switch (pair.val.getTag()) {
421 streamWrite(ss, rdvalue_cast<std::string>(pair.val));
422 break;
426 break;
430 break;
434 break;
438 break;
442 break;
443
446 streamWriteVec(ss, rdvalue_cast<std::vector<std::string>>(pair.val));
447 break;
450 streamWriteVec(ss, rdvalue_cast<std::vector<double>>(pair.val));
451 break;
454 streamWriteVec(ss, rdvalue_cast<std::vector<float>>(pair.val));
455 break;
458 streamWriteVec(ss, rdvalue_cast<std::vector<int>>(pair.val));
459 break;
462 streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int>>(pair.val));
463 break;
464 default:
465 for (auto &handler : handlers) {
466 if (handler->canSerialize(pair.val)) {
467 // The form of a custom tag is
468 // CustomTag
469 // customPropName (must be unique)
470 // custom serialization
472 streamWrite(ss, std::string(handler->getPropName()));
473 handler->write(ss, pair.val);
474 return true;
475 }
476 }
477
478 return false;
479 }
480 return true;
481}
482
483inline bool streamWriteProps(std::ostream &ss, const RDProps &props,
484 bool savePrivate = false,
485 bool saveComputed = false,
486 const CustomPropHandlerVec &handlers = {}) {
487 STR_VECT propsToSave = props.getPropList(savePrivate, saveComputed);
488 std::set<std::string> propnames(propsToSave.begin(), propsToSave.end());
489
490 const Dict &dict = props.getDict();
491 unsigned int count = 0;
492 for (Dict::DataType::const_iterator it = dict.getData().begin();
493 it != dict.getData().end(); ++it) {
494 if (propnames.find(it->key) != propnames.end()) {
495 if (isSerializable(*it, handlers)) {
496 count++;
497 }
498 }
499 }
500
501 streamWrite(ss, count); // packed int?
502
503 unsigned int writtenCount = 0;
504 for (Dict::DataType::const_iterator it = dict.getData().begin();
505 it != dict.getData().end(); ++it) {
506 if (propnames.find(it->key) != propnames.end()) {
507 if (isSerializable(*it, handlers)) {
508 // note - not all properties are serializable, this may be
509 // a null op
510 if (streamWriteProp(ss, *it, handlers)) {
511 writtenCount++;
512 }
513 }
514 }
515 }
516 POSTCONDITION(count == writtenCount,
517 "Estimated property count not equal to written");
518 return true;
519}
520
521template <class T>
522void readRDValue(std::istream &ss, RDValue &value) {
523 T v;
524 streamRead(ss, v);
525 value = v;
526}
527
528template <class T>
529void readRDVecValue(std::istream &ss, RDValue &value) {
530 std::vector<T> v;
531 streamReadVec(ss, v);
532 value = v;
533}
534
535inline void readRDValueString(std::istream &ss, RDValue &value) {
536 std::string v;
537 int version = 0;
538 streamRead(ss, v, version);
539 value = v;
540}
541
542inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
543 std::vector<std::string> v;
544 int version = 0;
545 streamReadStringVec(ss, v, version);
546 value = v;
547}
548
549inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
550 bool &dictHasNonPOD,
551 const CustomPropHandlerVec &handlers = {}) {
552 int version = 0;
553 streamRead(ss, pair.key, version);
554
555 unsigned char type;
556 streamRead(ss, type);
557 switch (type) {
558 case DTags::IntTag:
559 readRDValue<int>(ss, pair.val);
560 break;
562 readRDValue<unsigned int>(ss, pair.val);
563 break;
564 case DTags::BoolTag:
565 readRDValue<bool>(ss, pair.val);
566 break;
567 case DTags::FloatTag:
568 readRDValue<float>(ss, pair.val);
569 break;
570 case DTags::DoubleTag:
571 readRDValue<double>(ss, pair.val);
572 break;
573
574 case DTags::StringTag:
575 readRDValueString(ss, pair.val);
576 dictHasNonPOD = true;
577 break;
579 readRDStringVecValue(ss, pair.val);
580 dictHasNonPOD = true;
581 break;
582 case DTags::VecIntTag:
583 readRDVecValue<int>(ss, pair.val);
584 dictHasNonPOD = true;
585 break;
587 readRDVecValue<unsigned int>(ss, pair.val);
588 dictHasNonPOD = true;
589 break;
591 readRDVecValue<float>(ss, pair.val);
592 dictHasNonPOD = true;
593 break;
595 readRDVecValue<double>(ss, pair.val);
596 dictHasNonPOD = true;
597 break;
598 case DTags::CustomTag: {
599 std::string propType;
600 int version = 0;
601 streamRead(ss, propType, version);
602 for (auto &handler : handlers) {
603 if (propType == handler->getPropName()) {
604 handler->read(ss, pair.val);
605 dictHasNonPOD = true;
606 return true;
607 }
608 }
609 return false;
610 }
611
612 default:
613 return false;
614 }
615 return true;
616}
617
618inline unsigned int streamReadProps(std::istream &ss, RDProps &props,
619 const CustomPropHandlerVec &handlers = {}) {
620 unsigned int count;
621 streamRead(ss, count);
622
623 Dict &dict = props.getDict();
624 dict.reset(); // Clear data before repopulating
625 dict.getData().resize(count);
626 for (unsigned index = 0; index < count; ++index) {
627 CHECK_INVARIANT(streamReadProp(ss, dict.getData()[index],
628 dict.getNonPODStatus(), handlers),
629 "Corrupted property serialization detected");
630 }
631
632 return count;
633}
634
635} // namespace RDKit
636
637#endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define POSTCONDITION(expr, mess)
Definition: Invariant.h:117
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
virtual bool read(std::istream &ss, RDValue &value) const =0
virtual bool write(std::ostream &ss, const RDValue &value) const =0
virtual const char * getPropName() const =0
virtual CustomPropHandler * clone() const =0
virtual ~CustomPropHandler()
Definition: StreamOps.h:372
virtual bool canSerialize(const RDValue &value) const =0
void reset()
Clears all keys (and values) from the dictionary.
Definition: Dict.h:330
const Dict & getDict() const
gets the underlying Dictionary
Definition: RDProps.h:36
STR_VECT getPropList(bool includePrivate=true, bool includeComputed=true) const
returns a list with the names of our properties
Definition: RDProps.h:45
const unsigned char IntTag
Definition: StreamOps.h:354
const unsigned char VecUIntTag
Definition: StreamOps.h:361
const unsigned char VecBoolTag
Definition: StreamOps.h:362
const unsigned char VecIntTag
Definition: StreamOps.h:360
const unsigned char CustomTag
Definition: StreamOps.h:366
const unsigned char StringTag
Definition: StreamOps.h:353
const unsigned char VecFloatTag
Definition: StreamOps.h:363
const unsigned char DoubleTag
Definition: StreamOps.h:358
const unsigned char VecStringTag
Definition: StreamOps.h:359
const unsigned char EndTag
Definition: StreamOps.h:367
const unsigned char BoolTag
Definition: StreamOps.h:356
const unsigned char VecDoubleTag
Definition: StreamOps.h:364
const unsigned char FloatTag
Definition: StreamOps.h:357
const unsigned char UnsignedIntTag
Definition: StreamOps.h:355
static const boost::uint64_t UnsignedIntTag
static const boost::uint64_t StringTag
static const boost::uint64_t VecStringTag
static const boost::uint64_t VecIntTag
static const boost::uint64_t FloatTag
static const boost::uint64_t VecUnsignedIntTag
static const boost::uint64_t DoubleTag
static const boost::uint64_t IntTag
static const boost::uint64_t AnyTag
static const boost::uint64_t VecFloatTag
static const boost::uint64_t VecDoubleTag
static const boost::uint64_t BoolTag
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::string > STR_VECT
Definition: Dict.h:29
int rdvalue_cast< int >(RDValue_cast_t v)
unsigned char UCHAR
Definition: types.h:276
unsigned int rdvalue_cast< unsigned int >(RDValue_cast_t v)
unsigned int streamReadProps(std::istream &ss, RDProps &props, const CustomPropHandlerVec &handlers={})
Definition: StreamOps.h:618
boost::uint32_t pullPackedIntFromString(const char *&text)
Definition: StreamOps.h:215
double rdvalue_cast< double >(RDValue_cast_t v)
EEndian
Definition: StreamOps.h:27
@ LITTLE_ENDIAN_ORDER
Definition: StreamOps.h:28
@ BIG_ENDIAN_ORDER
Definition: StreamOps.h:29
T SwapBytes(T value)
Definition: StreamOps.h:44
void readRDStringVecValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:542
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition: StreamOps.h:282
std::string getLine(std::istream *inStream)
grabs the next line from an instream and returns it.
Definition: StreamOps.h:337
bool streamWriteProps(std::ostream &ss, const RDProps &props, bool savePrivate=false, bool saveComputed=false, const CustomPropHandlerVec &handlers={})
Definition: StreamOps.h:483
void readRDValueString(std::istream &ss, RDValue &value)
Definition: StreamOps.h:535
boost::uint32_t readPackedIntFromStream(std::stringstream &ss)
Reads an integer from a stream in packed format and returns the result.
Definition: StreamOps.h:149
bool isSerializable(const Dict::Pair &pair, const CustomPropHandlerVec &handlers={})
Definition: StreamOps.h:383
void streamReadStringVec(std::istream &ss, std::vector< std::string > &val, int version)
Definition: StreamOps.h:325
void readRDVecValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:529
void streamWriteVec(std::ostream &ss, const T &val)
Definition: StreamOps.h:273
T rdvalue_cast(RDValue_cast_t v)
void streamReadVec(std::istream &ss, T &val)
Definition: StreamOps.h:315
void readRDValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:522
T EndianSwapBytes(T value)
Definition: StreamOps.h:72
bool streamReadProp(std::istream &ss, Dict::Pair &pair, bool &dictHasNonPOD, const CustomPropHandlerVec &handlers={})
Definition: StreamOps.h:549
bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair, const CustomPropHandlerVec &handlers={})
Definition: StreamOps.h:411
bool rdvalue_cast< bool >(RDValue_cast_t v)
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition: StreamOps.h:260
void appendPackedIntToStream(std::stringstream &ss, boost::uint32_t num)
Packs an integer and outputs it to a stream.
Definition: StreamOps.h:105
float rdvalue_cast< float >(RDValue_cast_t v)
std::vector< std::shared_ptr< const CustomPropHandler > > CustomPropHandlerVec
Definition: StreamOps.h:381
std::string key
Definition: Dict.h:39
RDValue val
Definition: Dict.h:40
boost::uint64_t getTag() const