gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-252-gf9a3f78
a Global And Modular Bsm Inference Tool
hdf5printer.hpp
Go to the documentation of this file.
1 // GAMBIT: Global and Modular BSM Inference Tool
2 // *********************************************
16 
17 
18 #ifndef __hdf5printer_hpp__
19 #define __hdf5printer_hpp__
20 
21 // Standard libraries
22 #include <map>
23 #include <vector>
24 #include <algorithm>
25 #include <sstream>
26 #include <iostream>
27 #include <fstream>
28 #include <iomanip>
29 
30 // Gambit
39 #include "gambit/Utils/cats.hpp"
40 #include "gambit/Logs/logger.hpp"
41 
42 // MPI bindings
45 
46 // BOOST_PP
47 #include <boost/preprocessor/seq/for_each_i.hpp>
48 
49 
50 //#define DEBUG_MODE
51 //#define HDEBUG_MODE // "High output" debug mode (info with every single print command)
52 
53 // Code!
54 namespace Gambit
55 {
56  namespace Printers
57  {
58 
59  // Parameter controlling the length of all the standard buffers
60  static const std::size_t BUFFERLENGTH = 100; // Change to 10000 or something. Currently cannot change this dynamically though, sorry.
64  static const unsigned long MAX_PPIDPAIRS = 10*BUFFERLENGTH;
65 
67 
69  typedef std::map<VBIDpair, VertexBufferBase*> BaseBufferMap;
70 
72  template<class T, class U>
73  void error_if_key_exists(const std::map<T,U>& m, const T& key, const std::string& tag)
74  {
75  typename std::map<T,U>::const_iterator it = m.find(key);
76  if ( it == m.end() ) {
77  return;
78  }
79  else {
80  std::ostringstream errmsg;
81  errmsg << "Error! Supplied key for a VertexBuffer already exists in map (tag="<<tag<<")! This is a bug in the HDF5Printer (or Reader) class, please report it.";
82  printer_error().raise(LOCAL_INFO, errmsg.str());
83  }
84  }
85 
86  // foward declaration
87  class HDF5Printer;
88 
90  template<class BuffType>
92  {
93  private:
94  // Buffers local to a print function. Access whichever ones match the IDcode.
95  std::map<VBIDpair, BuffType> local_buffers;
96 
97  // Pointer to "parent" HDF5Printer object
98  // Need to use two-stage initialisation because the automated
99  // declaration of new buffer managers requires a default
100  // constructor
102 
104  // (map is from IDcodes to flags)
105  std::map<VBIDpair,bool> first_print;
106 
110 
112  char access;
113 
114  public:
117  : printer(NULL)
118  , synchronised(true)
119  , access('w')
120  {}
121 
123  void init(HDF5Printer* p, bool synchronised);
124 
126  bool ready() { if(printer==NULL){return false;}else{return true;} }
127 
129  BuffType& get_buffer(const int vID, const unsigned int i, const std::string& label);
130 
131  };
132 
133 
135  class HDF5Printer : public BasePrinter
136  {
137  public:
139  HDF5Printer(const Options&, BasePrinter* const primary = NULL);
140 
142  void common_constructor(const Options&);
143 
145  // Overload the base class virtual destructor
146  ~HDF5Printer();
147 
150 
151  // Initialisation function
152  // Run by dependency resolver, which supplies the functors with a vector of VertexIDs whose requiresPrinting flags are set to true.
153  void initialise(const std::vector<int>&);
154  void flush();
155  void reset(bool force=false);
156  void finalise(bool abnormal=false);
157 
158  // Get options required to construct a reader object that can read
159  // the previous output of this printer.
160  Options resume_reader_options();
161 
163 
165  using BasePrinter::_print; // Tell compiler we are using some of the base class overloads of this on purpose.
166  #define DECLARE_PRINT(r,data,i,elem) void _print(elem const&, const std::string&, const int, const uint, const ulong);
168  #ifndef SCANNER_STANDALONE
170  #endif
171  #undef DECLARE_PRINT
172 
175 
177  bool is_stream_managed(VBIDpair& key) const;
178 
180  hid_t get_location() const;
181  hid_t get_RA_location() const;
182 
184  void insert_buffer(VBIDpair& key, VertexBufferBase& newbuffer);
185 
188  unsigned long get_sync_pos() const { return sync_pos; }
189 
190  private:
191 
193  // Need one for every directly retrievable type, and a specialisation
194  // of 'get_mybuffermanager' to access it. But the latter have to be
195  // defined outside the class declaration, so they can be found below.
196  // Could create all these with a macro, but I am sick of macros so
197  // will just do it the "old-fashioned" way.
198  #define BT(TYPE) VertexBufferNumeric1D_HDF5<TYPE,BUFFERLENGTH>
207 
209  template<class T>
210  H5P_LocalBufferManager<BT(T)>& get_mybuffermanager(ulong, uint);
211  #undef BT
212 
214  void add_PPID_to_list(const PPIDpair&);
215 
217  void reset_PPID_lists();
218 
222  // TODO: DEPRECATED
223  //unsigned long getHighestPointID(const int rank);
224 
226  bool seen_PPID_before(const PPIDpair& ppid);
227 
229  void synchronise_buffers();
230 
235  void check_sync(const std::string& label, const int sync_type, bool checkall);
236 
237  #ifdef WITH_MPI
238 
240  GMPI::Comm& get_Comm() { return myComm; }
241  #endif
242 
246  void empty_sync_buffers(bool force=false);
247 
250  void check_for_new_point(const PPIDpair&);
251 
253  unsigned long get_global_index(const ulong pointID, const uint mpirank);
254 
256  std::string get_printer_name() { return printer_name; }
257 
262  unsigned long get_N_RApointIDs() { return primary_printer->reverse_global_index_lookup.size() + primary_printer->RA_dset_offset; }
263 
265  void increment_sync_pos() { sync_pos+=1; }
266 
268  void clear_previous_points() { std::vector<PPIDpair>().swap(previous_points); } // This technique also shrinks the capacity of the vector, which 'clear' does not do.
269 
272  void prepare_and_combine_tmp_files();
273 
274  //PPIDpair get_highest_PPID_from_HDF5(hid_t group_id);
275  std::map<unsigned long, unsigned long long int> get_highest_PPID_from_HDF5(hid_t group_id);
276 
278  std::vector<std::string> find_temporary_files(const bool error_if_inconsistent=false);
279 
281  void combine_output_py(const std::vector<std::string> tmp_files, const bool finalcombine); // Python version
282  void combine_output(const std::vector<std::string> tmp_files, const bool finalcombine); // Greg version
283 
285 
286 
288  // Used to reduce repetition in definitions of virtual function overloads
289  // (useful since there is no automatic type conversion possible)
290  template<class T>
291  void template_print(T const& value, const std::string& label, const int IDcode, const unsigned int mpirank, const unsigned long pointID)
292  {
293  // Retrieve the buffer manager for buffers with this type
294  auto& buffer_manager = get_mybuffermanager<T>(pointID,mpirank);
295 
296  // Extract a buffer from the manager corresponding to this
297  auto& selected_buffer = buffer_manager.get_buffer(IDcode, 0, label);
298 
299  #ifdef HDEBUG_MODE
300  std::cout<<"rank "<<myRank<<", printer "<<this->get_printer_name()<<": printing "<<typeid(T).name()<<", "<<label<<" = "<<value<<std::endl;
301  std::cout<<"rank "<<myRank<<", printer "<<this->get_printer_name()<<": pointID="<<pointID<<", mpirank="<<mpirank<<std::endl;
302  #endif
303 
304  PPIDpair ppid(pointID,mpirank);
305  if(synchronised)
306  {
307  // Write the data to the selected buffer ("just works" for simple numeric types)
308  selected_buffer.append(value,ppid);
309  }
310  else
311  {
312  // Queue up a desynchronised ("random access") dataset write to previous scan iteration
313  if(not seen_PPID_before(ppid))
314  {
315  add_PPID_to_list(ppid);
316  }
317  selected_buffer.RA_write(value,ppid,primary_printer->global_index_lookup);
318  }
319  }
320 
321  private:
322  // String names for output file and group
323  std::string tmp_comb_file; // temporary combined output filename
324  std::string tmpfile; // temporary filename (unique to each process)
325  std::string finalfile; // Combined results moved here only upon successful scan completion
326  std::string group; // HDF5 group location to store datasets
327 
328  // Handles for HDF5 files and groups containing the datasets
332 
333  // Handle to a location in a HDF5 to which the datasets will be written
334  // i.e. a file or a group.
337 
339  // (if this is an auxilliary printer, else it is "this" //NULL)
340  HDF5Printer* primary_printer = this; //NULL;
341 
343  bool is_primary_printer = false;
344 
346  bool disable_combine_routines = false;
347 
349  // Note: Each buffer contains a bool to indicate whether it has done an "append" for the point "lastPointID"
350  BaseBufferMap all_my_buffers;
351 
353  // Need this so that we can compute when (at least initial) writing to a model point has ceased
355 
357  // i.e. this location in the output dataset is currently the target of print functions
358  unsigned long current_dset_position;
359 
361  // Needed for dataset writes which return to old points.
362  std::map<PPIDpair, unsigned long> global_index_lookup;
363 
364  // Matching vector for the above, for reverse lookup
365  std::vector<PPIDpair> reverse_global_index_lookup;
366 
369  unsigned long RA_dset_offset = 0;
370 
372  std::string printer_name;
373 
375  unsigned int myRank; // Needed even without MPI available, for some default behaviour.
376  unsigned int mpiSize; // " "
377  #ifdef WITH_MPI
378  // Gambit MPI communicator context for use within the hdf5 printer system
379  GMPI::Comm myComm;
380  #endif
381 
385  bool synchronised = true;
386 
388  // i.e. print data will not be associated with parameter space points,
389  // but will be "global" data about the whole scan (e.g. max log likelihood
390  // found, scan statistics, etc.)
391  bool global = false;
392 
395  //unsigned long startpos = 0; // OBSOLETE
396 
398  unsigned long sync_pos = 0;
399 
401  std::vector<PPIDpair> previous_points;
402 
403  protected:
405 
407  // Note: Each buffer contains a bool to indicate whether it has done an "append" for the point "lastPointID"
408  BaseBufferMap all_buffers;
409 
410  };
411 
413  // Need to use it outside the class body, and be sure to typedef
414  // types which can not form part of a valid variable name.
415  #define DEFINE_BUFFMAN_GETTER(TYPE) \
416  template<> \
417  inline H5P_LocalBufferManager<VertexBufferNumeric1D_HDF5<TYPE,BUFFERLENGTH>>& \
418  HDF5Printer::get_mybuffermanager<TYPE>(ulong pointID, uint mpirank) \
419  { \
420  /* If the buffermanger hasn't been initialised, do so now */ \
421  if( not CAT(hdf5_localbufferman_,TYPE).ready() ) \
422  { \
423  CAT(hdf5_localbufferman_,TYPE).init(this,synchronised); \
424  } \
425  \
426  /* While we are at it, check if the buffers need to be \
427  synchronised to a new point. But only if this printer is running \
428  in "synchronised" mode. */ \
429  if(synchronised) \
430  { \
431  check_for_new_point(PPIDpair(pointID, mpirank)); \
432  } \
433  return CAT(hdf5_localbufferman_,TYPE); \
434  }
435 
438  DEFINE_BUFFMAN_GETTER(long )
442  DEFINE_BUFFMAN_GETTER(float )
443  DEFINE_BUFFMAN_GETTER(double )
444 
445 
446 
447 
448  template<class BuffType>
450  {
451  /* Set global behaviour flag */
452  synchronised = sync;
453 
454  /* Attempt to attach to printer */
455  if(p==NULL)
456  {
457  std::ostringstream errmsg;
458  errmsg << "Error! Tried to initialise a H5P_LocalBufferManager with a null pointer! Need an actual HDF5Printer object in order to work. This is a bug in the HDF5Printer class, please report it.";
459  printer_error().raise(LOCAL_INFO, errmsg.str());
460  }
461  if(not ready()) {
462  printer = p;
463  } else {
464  std::ostringstream errmsg;
465  errmsg << "Error! Tried to initialise a H5P_LocalBufferManager twice! This is a bug in the HDF5Printer class, please report it.";
466  printer_error().raise(LOCAL_INFO, errmsg.str());
467  }
468  }
469 
470  template<class BuffType>
471  BuffType& H5P_LocalBufferManager<BuffType>::get_buffer(const int vertexID, const unsigned int aux_i, const std::string& label)
472  {
473  if(not ready()) {
474  std::ostringstream errmsg;
475  errmsg << "Error! Tried to retrieve a buffer from a buffer manager without first initialising it! This is a bug in the HDF5Printer class, please report it.";
476  printer_error().raise(LOCAL_INFO, errmsg.str());
477  }
478 
479  VBIDpair key;
480  key.vertexID = vertexID;
481  key.index = aux_i;
482 
483  typename std::map<VBIDpair, BuffType>::iterator it = local_buffers.find(key);
484 
485  if( it == local_buffers.end() )
486  {
487  error_if_key_exists(local_buffers, key, "local_buffers");
488  // No local buffer exists for this output stream yet, so make one
489  // But check first if another printer manager is already handling this
490  // output stream. If so, we relinquish control over it and silence the
491  // new output stream.
492  bool silence = false;
493  #ifdef DEBUG_MODE
494  std::cout<<"Preparing to create new print output stream..."<<std::endl;
495  std::cout<<"...label = "<<label<<std::endl;
496  std::cout<<"...is stream already managed? "<<printer->is_stream_managed(key)<<std::endl;
497  std::cout<<"...from printer with name = "<<printer->get_printer_name()<<std::endl;
498  std::cout<<"...from printer with name = "<<printer->get_printer_name()<<std::endl;
499  #endif
500  if( printer->is_stream_managed(key) )
501  {
502  silence = true;
503  }
504  #ifdef DEBUG_MODE
505  std::cout<<"...is silenced? "<<silence<<std::endl;
506  #endif
507 
508  // Create the new buffer object
509  hid_t loc(-1);
510  if(synchronised)
511  {
512  loc = printer->get_location();
513  }
514  else // write to the RA group
515  {
516  loc = printer->get_RA_location();
517  }
518 
519  local_buffers[key] = BuffType( loc
520  , label/*deconstruct?*/
521  , vertexID
522  , aux_i
523  , synchronised
524  , silence
525  , false /*printer->get_resume() -- In this new version of the HDF5Printer we write temporary files and then combine them at the end of the scan, so each individual buffer no longer needs to be in 'resume' mode, it can just start anew and be combined with the old data later on */
526  , access /* r/w mode. Buffers can now be used for reading also. */
527  );
528 
529  // Get the new (possibly silenced) buffer back out of the map
530  it = local_buffers.find(key);
531 
532  // Add a pointer to the new buffer to the full list as well
533  if(not silence) printer->insert_buffer( key, it->second );
534 
535  // Force increment the buffer to "catch it up" to the current sync
536  // position, in case it has been created "late".
537  // We subtract one because another increment will happen after
538  // the print statement (that triggered the creation of the new
539  // buffer) completes.
540  if(synchronised) it->second.fast_forward(printer->get_sync_pos()-1);
541  }
542 
543  if( it == local_buffers.end() )
544  {
545  std::ostringstream errmsg;
546  errmsg << "Error! Failed to retrieve newly created buffer (label="<<label<<") from local_buffers map! Key was: ("<<vertexID<<","<<aux_i<<")"<<std::endl;
547  printer_error().raise(LOCAL_INFO, errmsg.str());
548  }
549 
550  return it->second;
551  }
552 
554 
555 
556  // Avoid cluttering up "macro namespace"
557  #undef DEFINE_BUFFMAN_GETTER
558  #undef BT
559 
560  // Register printer so it can be constructed via inifile instructions
561  // First argument is string label for inifile access, second is class from which to construct printer
562  LOAD_PRINTER(hdf5_v1, HDF5Printer)
563 
564  } // end namespace Printers
565 
566 } // end namespace Gambit
567 
568 #ifdef DEBUG_MODE
569  #undef DEBUG_MODE
570 #endif
571 
572 #endif
unsigned long get_sync_pos() const
Get the number of pointIDs know to this printer (should correspond to the number of "appends" each ac...
BaseBufferMap all_my_buffers
Map containing pointers to all VertexBuffers contained in this printer.
unsigned int myRank
MPI rank and size.
EXPORT_SYMBOLS error & printer_error()
Printer errors.
hid_t get_location() const
Retrieve pointer to HDF5 location to which datasets are added.
H5P_LocalBufferManager< BT(ulonglong)> hdf5_localbufferman_ulonglong
std::vector< PPIDpair > reverse_global_index_lookup
unsigned long current_dset_position
Current absolute dataset index.
The main printer class for output to HDF5 format.
Declaration of VertexBufferBase class This is the base buffer class used by the HDF5Printer vertex bu...
void init(HDF5Printer *p, bool synchronised)
Initialise the buffer (attach it to a printer and set its behaviour)
#define LOCAL_INFO
Definition: local_info.hpp:34
std::vector< PPIDpair > previous_points
In resume mode: storage for PPIDpairs harvested from previous scan data.
BuffType & get_buffer(const int vID, const unsigned int i, const std::string &label)
Retrieve a buffer for an IDcode/auxilliary-index pair.
void template_print(T const &value, const std::string &label, const int IDcode, const unsigned int mpirank, const unsigned long pointID)
Helper print functions.
PPIDpair lastPointID
ID of the point that this printer is currently working on.
H5P_LocalBufferManager< BT(long)> hdf5_localbufferman_long
void swap(Spectrum &first, Spectrum &second)
Swap resources of two Spectrum objects Note: Not a member function! This is an external function whic...
Definition: spectrum.cpp:57
Logging access header for GAMBIT.
BOOST_PP_SEQ_FOR_EACH_I(GETTYPEID, _, PRINTABLE_TYPES) void printAllTypeIDs(void)
For debugging; print to stdout all the typeIDs for all types.
Definition: baseprinter.cpp:36
Definitions of new MPI datatypes needed by printers.
vertexID / sub-print index pair Identifies individual buffers (I call them VertexBuffer, but actually there can be more than one per vertex)
unsigned long long int ulonglong
H5P_LocalBufferManager< BT(longlong)> hdf5_localbufferman_longlong
bool ready()
Signal whether initialisation has occured.
Declarations for the YAML options class.
std::map< VBIDpair, BuffType > local_buffers
Definition: hdf5printer.hpp:95
H5P_LocalBufferManager< BT(double)> hdf5_localbufferman_double
Declaration of VertexBufferNumeric1D_HDF5 class.
HDF5_BACKEND_TYPES bool is_stream_managed(VBIDpair &key) const
HDF5Printer-specific functions.
std::string get_printer_name()
Get the name of this printer.
bool synchronised
Flag to trigger treatment of buffers as synchronised or not i.e.
H5P_LocalBufferManager< BT(int)> hdf5_localbufferman_int
DEFINE_BUFFMAN_GETTER(int) DEFINE_BUFFMAN_GETTER(uint) DEFINE_BUFFMAN_GETTER(long) DEFINE_BUFFMAN_GETTER(ulong) DEFINE_BUFFMAN_GETTER(longlong) DEFINE_BUFFMAN_GETTER(ulonglong) DEFINE_BUFFMAN_GETTER(float) DEFINE_BUFFMAN_GETTER(double) template< class BuffType > void H5P_LocalBufferManager< BuffType >
Templated H5P_LocalBufferManager member functions.
#define DECLARE_PRINT(r, data, i, elem)
H5P_LocalBufferManager< BT(float)> hdf5_localbufferman_float
Declaration and definition of printer base class.
void error_if_key_exists(const std::map< T, U > &m, const T &key, const std::string &tag)
Helper function to check if a VertexBuffer key already exists in a map.
Definition: hdf5printer.hpp:73
#define LOAD_PRINTER(tag,...)
Definition: baseprinter.hpp:57
unsigned long int ulong
A simple C++ wrapper for the MPI C bindings.
Declaration for class DataSetInterfaceScalar.
void increment_sync_pos()
Move head dataset sync position.
std::map< PPIDpair, unsigned long > global_index_lookup
Map from pointID,thread pairs to absolute dataset indices.
std::map< VBIDpair, VertexBufferBase * > BaseBufferMap
Helpful typedefs.
Definition: hdf5printer.hpp:69
unsigned long get_N_RApointIDs()
Get the number of RA write locations known to the primary printer NOTE: the meaning of this has chang...
std::string printer_name
Label for printer, mostly for more helpful error messages.
Sequence of all types printable by the HDF5 printer.
char access
Buffer access mode (&#39;r&#39;/&#39;w&#39;)
void insert_buffer(VBIDpair &key, VertexBufferBase &newbuffer)
Add a pointer to a new buffer to the global list.
A collection of tools for interacting with HDF5 databases.
Keeps track of vertex buffers local to a print function.
Definition: hdf5printer.hpp:91
#define HDF5_TYPES
Definition: hdf5types.hpp:25
#define HDF5_BACKEND_TYPES
Definition: hdf5types.hpp:41
H5P_LocalBufferManager< BT(ulong)> hdf5_localbufferman_ulong
pointID / process number pair Used to identify a single parameter space point
TODO: see if we can use this one:
Definition: Analysis.hpp:33
A small wrapper object for &#39;options&#39; nodes.
std::map< VBIDpair, bool > first_print
Flag to check if a print function has been run before.
std::pair< std::vector< std::string >, std::vector< size_t > > find_temporary_files(const std::string &finalfile)
Search for temporary files to be combined.
void clear_previous_points()
Clear previous points list.
BaseBufferMap all_buffers
Things which other printers need access to.
long long int longlong
MPI tag definitions for the VertexBuffer classes.
Concatenation macros.
H5P_LocalBufferManager< BT(uint)> hdf5_localbufferman_uint