gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-2191-ga4742ac
a Global And Modular Bsm Inference Tool
hdf5printer.hpp
Go to the documentation of this file.
1 // GAMBIT: Global and Modular BSM Inference Tool
2 // *********************************************
16 
17 
18 #ifndef __hdf5printer_hpp__
19 #define __hdf5printer_hpp__
20 
21 // Standard libraries
22 #include <map>
23 #include <vector>
24 #include <algorithm>
25 #include <sstream>
26 #include <iostream>
27 #include <fstream>
28 #include <iomanip>
29 
30 // Gambit
39 #include "gambit/Utils/cats.hpp"
40 #include "gambit/Logs/logger.hpp"
41 
42 // MPI bindings
45 
46 // BOOST_PP
47 #include <boost/preprocessor/seq/for_each_i.hpp>
48 
49 
50 //#define DEBUG_MODE
51 //#define HDEBUG_MODE // "High output" debug mode (info with every single print command)
52 
53 // Code!
54 namespace Gambit
55 {
56  namespace Printers
57  {
58 
59  // Parameter controlling the length of all the standard buffers
60  static const std::size_t BUFFERLENGTH = 100; // Change to 10000 or something. Currently cannot change this dynamically though, sorry.
61 
65  static const unsigned long MAX_PPIDPAIRS = 10*BUFFERLENGTH;
66 
68 
70  typedef std::map<VBIDpair, VertexBufferBase*> BaseBufferMap;
71 
73  template<class T, class U>
74  void error_if_key_exists(const std::map<T,U>& m, const T& key, const std::string& tag)
75  {
76  typename std::map<T,U>::const_iterator it = m.find(key);
77  if ( it == m.end() ) {
78  return;
79  }
80  else {
81  std::ostringstream errmsg;
82  errmsg << "Error! Supplied key for a VertexBuffer already exists in map (tag="<<tag<<")! This is a bug in the HDF5Printer (or Reader) class, please report it.";
83  printer_error().raise(LOCAL_INFO, errmsg.str());
84  }
85  }
86 
87  // foward declaration
88  class HDF5Printer;
89 
91  template<class BuffType>
93  {
94  private:
95  // Buffers local to a print function. Access whichever ones match the IDcode.
96  std::map<VBIDpair, BuffType> local_buffers;
97 
98  // Pointer to "parent" HDF5Printer object
99  // Need to use two-stage initialisation because the automated
100  // declaration of new buffer managers requires a default
101  // constructor
103 
105  // (map is from IDcodes to flags)
106  std::map<VBIDpair,bool> first_print;
107 
111 
113  char access;
114 
115  public:
118  : printer(NULL)
119  , synchronised(true)
120  , access('w')
121  {}
122 
124  void init(HDF5Printer* p, bool synchronised);
125 
127  bool ready() { if(printer==NULL){return false;}else{return true;} }
128 
130  BuffType& get_buffer(const int vID, const unsigned int i, const std::string& label);
131 
132  };
133 
134 
136  class HDF5Printer : public BasePrinter
137  {
138  public:
140  HDF5Printer(const Options&, BasePrinter* const primary = NULL);
141 
143  void common_constructor(const Options&);
144 
146  // Overload the base class virtual destructor
147  ~HDF5Printer();
148 
151 
152  // Initialisation function
153  // Run by dependency resolver, which supplies the functors with a vector of VertexIDs whose requiresPrinting flags are set to true.
154  void initialise(const std::vector<int>&);
155  void flush();
156  void reset(bool force=false);
157  void finalise(bool abnormal=false);
158 
159  // Get options required to construct a reader object that can read
160  // the previous output of this printer.
161  Options resume_reader_options();
162 
164 
166  using BasePrinter::_print; // Tell compiler we are using some of the base class overloads of this on purpose.
167  #define DECLARE_PRINT(r,data,i,elem) void _print(elem const&, const std::string&, const int, const uint, const ulong);
169  #ifndef SCANNER_STANDALONE
171  #endif
172  #undef DECLARE_PRINT
173 
176 
178  bool is_stream_managed(VBIDpair& key) const;
179 
181  hid_t get_location() const;
182  hid_t get_RA_location() const;
183 
185  void insert_buffer(VBIDpair& key, VertexBufferBase& newbuffer);
186 
189  unsigned long get_sync_pos() const { return sync_pos; }
190 
191  private:
192 
194  // Need one for every directly retrievable type, and a specialisation
195  // of 'get_mybuffermanager' to access it. But the latter have to be
196  // defined outside the class declaration, so they can be found below.
197  // Could create all these with a macro, but I am sick of macros so
198  // will just do it the "old-fashioned" way.
199  #define BT(TYPE) VertexBufferNumeric1D_HDF5<TYPE,BUFFERLENGTH>
208 
210  template<class T>
211  H5P_LocalBufferManager<BT(T)>& get_mybuffermanager(ulong, uint);
212  #undef BT
213 
215  void add_PPID_to_list(const PPIDpair&);
216 
218  void reset_PPID_lists();
219 
223  // TODO: DEPRECATED
224  //unsigned long getHighestPointID(const int rank);
225 
227  bool seen_PPID_before(const PPIDpair& ppid);
228 
230  void synchronise_buffers();
231 
236  void check_sync(const std::string& label, const int sync_type, bool checkall);
237 
238  #ifdef WITH_MPI
239 
241  GMPI::Comm& get_Comm() { return myComm; }
242  #endif
243 
247  void empty_sync_buffers(bool force=false);
248 
251  void check_for_new_point(const PPIDpair&);
252 
254  unsigned long get_global_index(const ulong pointID, const uint mpirank);
255 
257  std::string get_printer_name() { return printer_name; }
258 
263  unsigned long get_N_RApointIDs() { return primary_printer->reverse_global_index_lookup.size() + primary_printer->RA_dset_offset; }
264 
266  void increment_sync_pos() { sync_pos+=1; }
267 
269  void clear_previous_points() { std::vector<PPIDpair>().swap(previous_points); } // This technique also shrinks the capacity of the vector, which 'clear' does not do.
270 
273  void prepare_and_combine_tmp_files();
274 
275  //PPIDpair get_highest_PPID_from_HDF5(hid_t group_id);
276  std::map<unsigned long, unsigned long long int> get_highest_PPID_from_HDF5(hid_t group_id);
277 
279  std::vector<std::string> find_temporary_files(const bool error_if_inconsistent=false);
280 
282  void combine_output_py(const std::vector<std::string> tmp_files, const bool finalcombine); // Python version
283  void combine_output(const std::vector<std::string> tmp_files, const bool finalcombine); // Greg version
284 
286 
287 
289  // Used to reduce repetition in definitions of virtual function overloads
290  // (useful since there is no automatic type conversion possible)
291  template<class T>
292  void template_print(T const& value, const std::string& label, const int IDcode, const unsigned int mpirank, const unsigned long pointID)
293  {
294  // Retrieve the buffer manager for buffers with this type
295  auto& buffer_manager = get_mybuffermanager<T>(pointID,mpirank);
296 
297  // Extract a buffer from the manager corresponding to this
298  auto& selected_buffer = buffer_manager.get_buffer(IDcode, 0, label);
299 
300  #ifdef HDEBUG_MODE
301  std::cout<<"rank "<<myRank<<", printer "<<this->get_printer_name()<<": printing "<<typeid(T).name()<<", "<<label<<" = "<<value<<std::endl;
302  std::cout<<"rank "<<myRank<<", printer "<<this->get_printer_name()<<": pointID="<<pointID<<", mpirank="<<mpirank<<std::endl;
303  #endif
304 
305  PPIDpair ppid(pointID,mpirank);
306  if(synchronised)
307  {
308  // Write the data to the selected buffer ("just works" for simple numeric types)
309  selected_buffer.append(value,ppid);
310  }
311  else
312  {
313  // Queue up a desynchronised ("random access") dataset write to previous scan iteration
314  if(not seen_PPID_before(ppid))
315  {
316  add_PPID_to_list(ppid);
317  }
318  selected_buffer.RA_write(value,ppid,primary_printer->global_index_lookup);
319  }
320  }
321 
322  private:
323  // String names for output file and group
324  std::string tmp_comb_file; // temporary combined output filename
325  std::string tmpfile; // temporary filename (unique to each process)
326  std::string finalfile; // Combined results moved here only upon successful scan completion
327  std::string group; // HDF5 group location to store datasets
328 
329  // Handles for HDF5 files and groups containing the datasets
333 
334  // Handle to a location in a HDF5 to which the datasets will be written
335  // i.e. a file or a group.
338 
340  // (if this is an auxilliary printer, else it is "this" //NULL)
341  HDF5Printer* primary_printer = this; //NULL;
342 
344  bool is_primary_printer = false;
345 
347  bool disable_combine_routines = false;
348 
350  // Note: Each buffer contains a bool to indicate whether it has done an "append" for the point "lastPointID"
351  BaseBufferMap all_my_buffers;
352 
354  // Need this so that we can compute when (at least initial) writing to a model point has ceased
356 
358  // i.e. this location in the output dataset is currently the target of print functions
359  unsigned long current_dset_position;
360 
362  // Needed for dataset writes which return to old points.
363  std::map<PPIDpair, unsigned long> global_index_lookup;
364 
365  // Matching vector for the above, for reverse lookup
366  std::vector<PPIDpair> reverse_global_index_lookup;
367 
370  unsigned long RA_dset_offset = 0;
371 
373  std::string printer_name;
374 
376  unsigned int myRank; // Needed even without MPI available, for some default behaviour.
377  unsigned int mpiSize; // " "
378  #ifdef WITH_MPI
379  // Gambit MPI communicator context for use within the hdf5 printer system
380  GMPI::Comm myComm;
381  #endif
382 
386  bool synchronised = true;
387 
389  // i.e. print data will not be associated with parameter space points,
390  // but will be "global" data about the whole scan (e.g. max log likelihood
391  // found, scan statistics, etc.)
392  bool global = false;
393 
396  //unsigned long startpos = 0; // OBSOLETE
397 
399  unsigned long sync_pos = 0;
400 
402  std::vector<PPIDpair> previous_points;
403 
404  protected:
406 
408  // Note: Each buffer contains a bool to indicate whether it has done an "append" for the point "lastPointID"
409  BaseBufferMap all_buffers;
410 
411  };
412 
414  // Need to use it outside the class body, and be sure to typedef
415  // types which can not form part of a valid variable name.
416  #define DEFINE_BUFFMAN_GETTER(TYPE) \
417  template<> \
418  inline H5P_LocalBufferManager<VertexBufferNumeric1D_HDF5<TYPE,BUFFERLENGTH>>& \
419  HDF5Printer::get_mybuffermanager<TYPE>(ulong pointID, uint mpirank) \
420  { \
421  /* If the buffermanger hasn't been initialised, do so now */ \
422  if( not CAT(hdf5_localbufferman_,TYPE).ready() ) \
423  { \
424  CAT(hdf5_localbufferman_,TYPE).init(this,synchronised); \
425  } \
426  \
427  /* While we are at it, check if the buffers need to be \
428  synchronised to a new point. But only if this printer is running \
429  in "synchronised" mode. */ \
430  if(synchronised) \
431  { \
432  check_for_new_point(PPIDpair(pointID, mpirank)); \
433  } \
434  return CAT(hdf5_localbufferman_,TYPE); \
435  }
436 
439  DEFINE_BUFFMAN_GETTER(long )
443  DEFINE_BUFFMAN_GETTER(float )
444  DEFINE_BUFFMAN_GETTER(double )
445 
446 
447 
448 
449  template<class BuffType>
451  {
452  /* Set global behaviour flag */
453  synchronised = sync;
454 
455  /* Attempt to attach to printer */
456  if(p==NULL)
457  {
458  std::ostringstream errmsg;
459  errmsg << "Error! Tried to initialise a H5P_LocalBufferManager with a null pointer! Need an actual HDF5Printer object in order to work. This is a bug in the HDF5Printer class, please report it.";
460  printer_error().raise(LOCAL_INFO, errmsg.str());
461  }
462  if(not ready()) {
463  printer = p;
464  } else {
465  std::ostringstream errmsg;
466  errmsg << "Error! Tried to initialise a H5P_LocalBufferManager twice! This is a bug in the HDF5Printer class, please report it.";
467  printer_error().raise(LOCAL_INFO, errmsg.str());
468  }
469  }
470 
471  template<class BuffType>
472  BuffType& H5P_LocalBufferManager<BuffType>::get_buffer(const int vertexID, const unsigned int aux_i, const std::string& label)
473  {
474  if(not ready()) {
475  std::ostringstream errmsg;
476  errmsg << "Error! Tried to retrieve a buffer from a buffer manager without first initialising it! This is a bug in the HDF5Printer class, please report it.";
477  printer_error().raise(LOCAL_INFO, errmsg.str());
478  }
479 
480  VBIDpair key;
481  key.vertexID = vertexID;
482  key.index = aux_i;
483 
484  typename std::map<VBIDpair, BuffType>::iterator it = local_buffers.find(key);
485 
486  if( it == local_buffers.end() )
487  {
488  error_if_key_exists(local_buffers, key, "local_buffers");
489  // No local buffer exists for this output stream yet, so make one
490  // But check first if another printer manager is already handling this
491  // output stream. If so, we relinquish control over it and silence the
492  // new output stream.
493  bool silence = false;
494  #ifdef DEBUG_MODE
495  std::cout<<"Preparing to create new print output stream..."<<std::endl;
496  std::cout<<"...label = "<<label<<std::endl;
497  std::cout<<"...is stream already managed? "<<printer->is_stream_managed(key)<<std::endl;
498  std::cout<<"...from printer with name = "<<printer->get_printer_name()<<std::endl;
499  std::cout<<"...from printer with name = "<<printer->get_printer_name()<<std::endl;
500  #endif
501  if( printer->is_stream_managed(key) )
502  {
503  silence = true;
504  }
505  #ifdef DEBUG_MODE
506  std::cout<<"...is silenced? "<<silence<<std::endl;
507  #endif
508 
509  // Create the new buffer object
510  hid_t loc(-1);
511  if(synchronised)
512  {
513  loc = printer->get_location();
514  }
515  else // write to the RA group
516  {
517  loc = printer->get_RA_location();
518  }
519 
520  local_buffers[key] = BuffType( loc
521  , label/*deconstruct?*/
522  , vertexID
523  , aux_i
524  , synchronised
525  , silence
526  , false /*printer->get_resume() -- In this new version of the HDF5Printer we write temporary files and then combine them at the end of the scan, so each individual buffer no longer needs to be in 'resume' mode, it can just start anew and be combined with the old data later on */
527  , access /* r/w mode. Buffers can now be used for reading also. */
528  );
529 
530  // Get the new (possibly silenced) buffer back out of the map
531  it = local_buffers.find(key);
532 
533  // Add a pointer to the new buffer to the full list as well
534  if(not silence) printer->insert_buffer( key, it->second );
535 
536  // Force increment the buffer to "catch it up" to the current sync
537  // position, in case it has been created "late".
538  // We subtract one because another increment will happen after
539  // the print statement (that triggered the creation of the new
540  // buffer) completes.
541  if(synchronised) it->second.fast_forward(printer->get_sync_pos()-1);
542  }
543 
544  if( it == local_buffers.end() )
545  {
546  std::ostringstream errmsg;
547  errmsg << "Error! Failed to retrieve newly created buffer (label="<<label<<") from local_buffers map! Key was: ("<<vertexID<<","<<aux_i<<")"<<std::endl;
548  printer_error().raise(LOCAL_INFO, errmsg.str());
549  }
550 
551  return it->second;
552  }
553 
555 
556 
557  // Avoid cluttering up "macro namespace"
558  #undef DEFINE_BUFFMAN_GETTER
559  #undef BT
560 
561  // Register printer so it can be constructed via inifile instructions
562  // First argument is string label for inifile access, second is class from which to construct printer
563  LOAD_PRINTER(hdf5_v1, HDF5Printer)
564 
565  } // end namespace Printers
566 
567 } // end namespace Gambit
568 
569 #ifdef DEBUG_MODE
570  #undef DEBUG_MODE
571 #endif
572 
573 #endif
unsigned long get_sync_pos() const
Get the number of pointIDs know to this printer (should correspond to the number of "appends" each ac...
BaseBufferMap all_my_buffers
Map containing pointers to all VertexBuffers contained in this printer.
unsigned int myRank
MPI rank and size.
EXPORT_SYMBOLS error & printer_error()
Printer errors.
hid_t get_location() const
Retrieve pointer to HDF5 location to which datasets are added.
H5P_LocalBufferManager< BT(ulonglong)> hdf5_localbufferman_ulonglong
std::vector< PPIDpair > reverse_global_index_lookup
unsigned long current_dset_position
Current absolute dataset index.
The main printer class for output to HDF5 format.
Declaration of VertexBufferBase class This is the base buffer class used by the HDF5Printer vertex bu...
void init(HDF5Printer *p, bool synchronised)
Initialise the buffer (attach it to a printer and set its behaviour)
#define LOCAL_INFO
Definition: local_info.hpp:34
std::vector< PPIDpair > previous_points
In resume mode: storage for PPIDpairs harvested from previous scan data.
BuffType & get_buffer(const int vID, const unsigned int i, const std::string &label)
Retrieve a buffer for an IDcode/auxilliary-index pair.
void template_print(T const &value, const std::string &label, const int IDcode, const unsigned int mpirank, const unsigned long pointID)
Helper print functions.
PPIDpair lastPointID
ID of the point that this printer is currently working on.
H5P_LocalBufferManager< BT(long)> hdf5_localbufferman_long
void swap(Spectrum &first, Spectrum &second)
Swap resources of two Spectrum objects Note: Not a member function! This is an external function whic...
Definition: spectrum.cpp:57
Logging access header for GAMBIT.
BOOST_PP_SEQ_FOR_EACH_I(GETTYPEID, _, PRINTABLE_TYPES) void printAllTypeIDs(void)
For debugging; print to stdout all the typeIDs for all types.
Definition: baseprinter.cpp:36
Definitions of new MPI datatypes needed by printers.
vertexID / sub-print index pair Identifies individual buffers (I call them VertexBuffer, but actually there can be more than one per vertex)
unsigned long long int ulonglong
H5P_LocalBufferManager< BT(longlong)> hdf5_localbufferman_longlong
bool ready()
Signal whether initialisation has occured.
Declarations for the YAML options class.
std::map< VBIDpair, BuffType > local_buffers
Definition: hdf5printer.hpp:96
H5P_LocalBufferManager< BT(double)> hdf5_localbufferman_double
Declaration of VertexBufferNumeric1D_HDF5 class.
HDF5_BACKEND_TYPES bool is_stream_managed(VBIDpair &key) const
HDF5Printer-specific functions.
std::string get_printer_name()
Get the name of this printer.
bool synchronised
Flag to trigger treatment of buffers as synchronised or not i.e.
H5P_LocalBufferManager< BT(int)> hdf5_localbufferman_int
DEFINE_BUFFMAN_GETTER(int) DEFINE_BUFFMAN_GETTER(uint) DEFINE_BUFFMAN_GETTER(long) DEFINE_BUFFMAN_GETTER(ulong) DEFINE_BUFFMAN_GETTER(longlong) DEFINE_BUFFMAN_GETTER(ulonglong) DEFINE_BUFFMAN_GETTER(float) DEFINE_BUFFMAN_GETTER(double) template< class BuffType > void H5P_LocalBufferManager< BuffType >
Templated H5P_LocalBufferManager member functions.
#define DECLARE_PRINT(r, data, i, elem)
H5P_LocalBufferManager< BT(float)> hdf5_localbufferman_float
Declaration and definition of printer base class.
void error_if_key_exists(const std::map< T, U > &m, const T &key, const std::string &tag)
Helper function to check if a VertexBuffer key already exists in a map.
Definition: hdf5printer.hpp:74
#define LOAD_PRINTER(tag,...)
Definition: baseprinter.hpp:57
unsigned long int ulong
A simple C++ wrapper for the MPI C bindings.
Declaration for class DataSetInterfaceScalar.
void increment_sync_pos()
Move head dataset sync position.
std::map< PPIDpair, unsigned long > global_index_lookup
Map from pointID,thread pairs to absolute dataset indices.
std::map< VBIDpair, VertexBufferBase * > BaseBufferMap
Helpful typedefs.
Definition: hdf5printer.hpp:70
unsigned long get_N_RApointIDs()
Get the number of RA write locations known to the primary printer NOTE: the meaning of this has chang...
std::string printer_name
Label for printer, mostly for more helpful error messages.
Sequence of all types printable by the HDF5 printer.
char access
Buffer access mode (&#39;r&#39;/&#39;w&#39;)
void insert_buffer(VBIDpair &key, VertexBufferBase &newbuffer)
Add a pointer to a new buffer to the global list.
A collection of tools for interacting with HDF5 databases.
Keeps track of vertex buffers local to a print function.
Definition: hdf5printer.hpp:92
#define HDF5_TYPES
Definition: hdf5types.hpp:25
#define HDF5_BACKEND_TYPES
Definition: hdf5types.hpp:46
H5P_LocalBufferManager< BT(ulong)> hdf5_localbufferman_ulong
pointID / process number pair Used to identify a single parameter space point
TODO: see if we can use this one:
Definition: Analysis.hpp:33
A small wrapper object for &#39;options&#39; nodes.
std::map< VBIDpair, bool > first_print
Flag to check if a print function has been run before.
std::pair< std::vector< std::string >, std::vector< size_t > > find_temporary_files(const std::string &finalfile)
Search for temporary files to be combined.
void clear_previous_points()
Clear previous points list.
BaseBufferMap all_buffers
Things which other printers need access to.
long long int longlong
MPI tag definitions for the VertexBuffer classes.
Concatenation macros.
H5P_LocalBufferManager< BT(uint)> hdf5_localbufferman_uint