RDKit
Open-source cheminformatics and machine learning.
MultithreadedMolSupplier.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2020 Shrey Aryan
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifdef RDK_THREADSAFE_SSS
11 #ifndef MULTITHREADED_MOL_SUPPLIER
12 #define MULTITHREADED_MOL_SUPPLIER
13 
18 #include <RDGeneral/RDLog.h>
19 #include <RDGeneral/RDThreads.h>
20 #include <RDGeneral/StreamOps.h>
21 
22 #include <atomic>
23 #include <boost/tokenizer.hpp>
24 
25 #include "FileParsers.h"
26 #include "MolSupplier.h"
27 
28 typedef boost::tokenizer<boost::char_separator<char>> tokenizer;
29 
30 namespace RDKit {
31 class RDKIT_FILEPARSERS_EXPORT MultithreadedMolSupplier : public MolSupplier {
32  //! this is an abstract base class to concurrently supply molecules one at a
33  //! time
34  public:
35  MultithreadedMolSupplier(){};
36  virtual ~MultithreadedMolSupplier();
37  //! pop elements from the output queue
38  ROMol *next();
39  //! returns true when all records have been read from the supplier
40  bool atEnd();
41 
42  //! included for the interface, always returns false
43  bool getEOFHitOnRead() const { return false; }
44 
45  //! returns the record id of the last extracted item
46  //! Note: d_LastRecordId = 0, initially therefore the value 0 is returned
47  //! if and only if the function is called before extracting the first
48  //! record
49  unsigned int getLastRecordId() const;
50  //! returns the text block for the last extracted item
51  std::string getLastItemText() const;
52 
53  protected:
54  //! starts reader and writer threads
55  void startThreads();
56 
57  private:
58  //! reads lines from input stream to populate the input queue
59  void reader();
60  //! parses lines from the input queue converting them to ROMol objects
61  //! populating the output queue
62  void writer();
63  //! finalizes the reader and writer threads
64  void endThreads();
65  //! disable automatic copy constructors and assignment operators
66  //! for this class and its subclasses. They will likely be
67  //! carrying around stream pointers and copying those is a recipe
68  //! for disaster.
69  MultithreadedMolSupplier(const MultithreadedMolSupplier &);
70  MultithreadedMolSupplier &operator=(const MultithreadedMolSupplier &);
71  //! not yet implemented
72  virtual void reset();
73  virtual void init() = 0;
74  virtual bool getEnd() const = 0;
75  //! extracts next record from the input file or stream
76  virtual bool extractNextRecord(std::string &record, unsigned int &lineNum,
77  unsigned int &index) = 0;
78  //! processes the record into an ROMol object
79  virtual ROMol *processMoleculeRecord(const std::string &record,
80  unsigned int lineNum) = 0;
81 
82  private:
83  std::atomic<unsigned int> d_threadCounter{1}; //! thread counter
84  std::vector<std::thread> d_writerThreads; //! vector writer threads
85  std::thread d_readerThread; //! single reader thread
86 
87  protected:
88  unsigned int d_lastRecordId = 0; //! stores last extracted record id
89  std::string d_lastItemText; //! stores last extracted record
90  const unsigned int d_numReaderThread = 1; //! number of reader thread
91  unsigned int d_numWriterThreads; //! number of writer threads
92  size_t d_sizeInputQueue; //! size of input queue
93  size_t d_sizeOutputQueue; //! size of output queue
94 
95  ConcurrentQueue<std::tuple<std::string, unsigned int, unsigned int>>
96  *d_inputQueue; //! concurrent input queue
97  ConcurrentQueue<std::tuple<ROMol *, std::string, unsigned int>>
98  *d_outputQueue; //! concurrent output queue
99 };
100 } // namespace RDKit
101 #endif
102 #endif
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:255
Std stuff.
Definition: Abbreviations.h:17