gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-2191-ga4742ac
a Global And Modular Bsm Inference Tool
hdf5reader.cpp
Go to the documentation of this file.
1 // GAMBIT: Global and Modular BSM Inference Tool
2 // *********************************************
19 
24 #include "gambit/Logs/logger.hpp"
25 
26 namespace Gambit
27 {
28  namespace Printers
29  {
30 
31  // It's a little clumsy, but need to declare these type checking functions as extern templates here
32  #define externGETTYPEID(r,data,i,elem) extern template std::size_t getTypeID<elem>();
34  #undef externGETTYPEID
35 
36  hid_t openfile_read(std::string file)
37  {
38  hid_t file_id = HDF5::openFile(file);
39  if(file_id<0)
40  {
41  std::ostringstream errmsg;
42  errmsg << "Error opening HDF5 file '"<<file<<"' for reading! See HDF5 error (stderr) for further details.";
43  printer_error().raise(LOCAL_INFO, errmsg.str());
44  }
45  return file_id;
46  }
47 
48  // Post-process output of lsGroup to remove "_isvalid" datasets
49  std::vector<std::string> lsGroup_process(hid_t location_id)
50  {
51  std::vector<std::string> ls_out;
52  std::vector<std::string> ls_all = HDF5::lsGroup(location_id);
53  for(std::vector<std::string>::iterator it = ls_all.begin();
54  it != ls_all.end(); ++it)
55  {
56  if(not Utils::endsWith(*it,"_isvalid")) ls_out.push_back(*it);
57  }
58  return ls_out;
59  }
60 
61  HDF5File::HDF5File(const std::string& file, const std::string& group)
62  : file_id(openfile_read(file))
63  , location_id(HDF5::openGroup(file_id, group, true))
64  {}
65 
67  {
70  }
71 
73  : file( options.getValue<std::string>("file"))
74  , group( options.getValue<std::string>("group") )
75  , H5file(file,group)
76  , all_datasets(lsGroup_process(H5file.location_id))
77  , pointIDs (H5file.location_id, "pointID", true, 'r')
78  , pointIDs_isvalid(H5file.location_id, "pointID_isvalid", true, 'r')
79  , mpiranks (H5file.location_id, "MPIrank", true, 'r')
80  , mpiranks_isvalid(H5file.location_id, "MPIrank_isvalid", true, 'r')
81  , current_dataset_index(0)
82  , current_point(nullpoint)
83  {
84  if(all_datasets.size()<2)
85  {
86  std::ostringstream errmsg;
87  errmsg << "Error opening HDF5 datasets for reading (file="<<file<<", group="<<group<<"). Less than two datasets detected in specified group (number found = "<<all_datasets.size()<<"). Please check that the input file and group are correct." <<std::endl;
88  printer_error().raise(LOCAL_INFO, errmsg.str());
89  }
90 
91  const std::size_t dset_length = pointIDs.dset_length();
92  const std::size_t dset_length2 = pointIDs_isvalid.dset_length();
93  const std::size_t dset_length3 = mpiranks.dset_length();
94  const std::size_t dset_length4 = mpiranks_isvalid.dset_length();
95  if( (dset_length != dset_length2)
96  or (dset_length3 != dset_length4)
97  or (dset_length != dset_length3) )
98  {
99  std::ostringstream errmsg;
100  errmsg << "Error opening HDF5 datasets for reading (file="<<file<<", group="<<group<<"). Unequal dataset lengths detected in pointID and MPIrank datasets:" <<std::endl;
101  errmsg << " pointIDs.dset_length() = " << dset_length << std::endl;
102  errmsg << " pointIDs_isvalid.dset_length() = " << dset_length2 << std::endl;
103  errmsg << " mpiranks.dset_length() = " << dset_length3 << std::endl;
104  errmsg << " mpiranks_isvalid.dset_length() = " << dset_length4 << std::endl;
105  errmsg << "This most likely indicates corruption of the datasets (possibly due to unsafe shutdown).";
106  printer_error().raise(LOCAL_INFO, errmsg.str());
107  }
108  //std::cout<<"Created HDF5 reader object for file "<<file<<std::endl;
109  }
110 
112  {
113  // Need to close the datasets that aren't managed by a buffermanager object
118  //std::cout<<"Deleted HDF5 reader object for file "<<file<<std::endl;
119  }
120 
122 
125  {
128  }
129 
132  {
133  return pointIDs.dset_length();
134  }
135 
138  {
139  // New method; just move dataset index and then try to retrieve the point
142  return current_point;
143  }
144 
147  {
148  if(eoi())
149  {
150  // End of data, return nullpoint;
152  }
153  else
154  {
157  if(pvalid and mvalid)
158  {
159  unsigned long pid = pointIDs.get_entry(current_dataset_index);
161  current_point = PPIDpair(pid,mpirank);
162  }
163  else
164  {
165  // No valid data here! Up to user to check.
167  }
168  }
169  return current_point;
170  }
171 
172  // Get a linear index which corresponds to the current rank/ptID pair in the iterative sense
174  {
175  return current_dataset_index;
176  }
177 
180  {
181  bool result = current_dataset_index >= get_dataset_length();
182  //if(result) std::cout <<"eoi? index="<<current_dataset_index<<", length="<<get_dataset_length()<<std::endl;
183  return result;
184  }
185 
188  std::size_t HDF5Reader::get_type(const std::string& label)
189  {
190  hid_t datatype_id = HDF5::getH5DatasetType(H5file.location_id, label);
191  // Need to match HDF5 datatype to a printer type ID code.
192  // In principle we may like to retrieve a certain type of data in a fancy way,
193  // as with ModelParameters or vectors, however we can't really do that in an
194  // automated way because this higher-level information is lost during output.
195  // So the type matching has to be of a basic sort, i.e. individual ModelParameters
196  // elements will be identified as 'double' and so on. But if they are stored that
197  // way in the output, then we should be able to copy them that way too (which is
198  // the main usage of this get_type function), so this should be ok to do.
199  // Currently we only store data in basic types, so those are all that this
200  // function needs to retrieve.
201 
202  // Matching of HDF5 datatypes to Printer type IDs
203  // Need to use H5Tequal to check if the HDF5 type IDs are equal
204  std::size_t typeID=0;
205  #define GET_TYPE_CASES(r,data,elem) \
206  if( H5Tequal(datatype_id, get_hdf5_data_type<elem>::type()) )\
207  { \
208  typeID = getTypeID<elem>(); \
209  } \
210  else
211  BOOST_PP_SEQ_FOR_EACH(GET_TYPE_CASES, , H5_OUTPUT_TYPES)
212  #undef GET_TYPE_CASES
213  {
214  std::ostringstream err;
215  err << "Did not recognise retrieved HDF5 type for data label '"<<label<<"'! This may indicate a bug in the Reader class you are using, please report it.";
216  printer_error().raise(LOCAL_INFO,err.str());
217  }
218  if(typeID==0)
219  {
220  std::ostringstream err;
221  err << "Did not recognise retrieved Printer type for data label '"<<label<<"'! This may indicate a bug in the Printer system, please report it.";
222  printer_error().raise(LOCAL_INFO,err.str());
223  }
225  HDF5::closeType(datatype_id);
226  return typeID;
227  }
228 
230  std::set<std::string> HDF5Reader::get_all_labels()
231  {
232  std::set<std::string> out(all_datasets.begin(), all_datasets.end());
233  return out;
234  }
235 
237 
239 
242  {
243  ulong out_index=0;
244  if(ppid == current_point)
245  {
246  // Matches current point; send it out
247  out_index = current_dataset_index;
248  }
249  else if(ppid == mem_point)
250  {
251  // Matches stored point; send it out
252  out_index = mem_index;
253  }
254  else
255  {
256  // Gotta search for it.
257  std::ostringstream errmsg;
258  errmsg << "Arbitrary point access has not yet been implemented for the HDF5Reader! Currently only iterated access through a dataset works.";
259  printer_error().raise(LOCAL_INFO, errmsg.str());
260  }
261  mem_point = ppid;
262  mem_index = out_index;
263  return out_index;
264  }
265 
267 
268  }
269 }
virtual void reset()
Base class virtual interface functions.
Definition: hdf5reader.cpp:124
HDF5File(const std::string &file, const std::string &group)
Definition: hdf5reader.cpp:61
EXPORT_SYMBOLS error & printer_error()
Printer errors.
#define externGETTYPEID(r, data, i, elem)
Definition: hdf5reader.cpp:32
void closeDataSet()
Close an open dataset.
virtual ulong get_current_index()
Definition: hdf5reader.cpp:173
#define LOCAL_INFO
Definition: local_info.hpp:34
STL namespace.
hid_t openFile(const std::string &fname, bool overwrite, bool &oldfile, const char access_type='r')
File and group manipulation.
Definition: hdf5tools.cpp:182
Logging access header for GAMBIT.
BOOST_PP_SEQ_FOR_EACH_I(GETTYPEID, _, PRINTABLE_TYPES) void printAllTypeIDs(void)
For debugging; print to stdout all the typeIDs for all types.
Definition: baseprinter.cpp:36
hid_t closeType(hid_t type_id)
Release datatype identifier.
#define PRINTABLE_TYPES
const std::vector< std::string > all_datasets
Definition: hdf5reader.hpp:146
General small utility functions.
EXPORT_SYMBOLS bool endsWith(const std::string &str, const std::string &suffix)
Checks whether `str&#39; ends with `suffix&#39;.
virtual std::set< std::string > get_all_labels()
Get labels of all datasets in the linked group.
Definition: hdf5reader.cpp:230
HDF5 interface printer class declaration.
ulong get_index_from_PPID(const PPIDpair)
Private functions.
Definition: hdf5reader.cpp:241
DataSetInterfaceScalar< unsigned long, CHUNKLENGTH > pointIDs
Definition: hdf5reader.hpp:149
virtual bool eoi()
Check if &#39;current point&#39; is past the end of the datasets (and thus invalid!)
Definition: hdf5reader.cpp:179
hid_t closeFile(hid_t file)
Close hdf5 file.
Definition: hdf5tools.cpp:92
hid_t getH5DatasetType(hid_t group_id, const std::string &dset_name)
Get type of an object in a group.
Definition: hdf5tools.cpp:631
std::vector< std::string > lsGroup_process(hid_t location_id)
Definition: hdf5reader.cpp:49
std::vector< std::string > lsGroup(hid_t group_id)
List object names in a group.
Definition: hdf5tools.cpp:593
virtual PPIDpair get_next_point()
Get next rank/ptID pair in data file.
Definition: hdf5reader.cpp:137
unsigned long int ulong
hid_t openGroup(hid_t file_id, const std::string &name, bool nocreate=false)
Definition: hdf5tools.cpp:512
virtual std::size_t get_type(const std::string &label)
Get type information for a data entry, i.e.
Definition: hdf5reader.cpp:188
HDF5Reader(const Options &options)
Definition: hdf5reader.cpp:72
T get_entry(std::size_t index)
Extract a single entry from a linked dataset.
DataSetInterfaceScalar< int, CHUNKLENGTH > pointIDs_isvalid
Definition: hdf5reader.hpp:150
HDF5 printer retriever class declaration This is a class accompanying the HDF5Printer which takes car...
#define GET_TYPE_CASES(r, data, elem)
EXPORT_SYMBOLS const PPIDpair nullpoint
Define &#39;nullpoint&#39; const.
virtual PPIDpair get_current_point()
Get current rank/ptID pair in data file.
Definition: hdf5reader.cpp:146
DataSetInterfaceScalar< int, CHUNKLENGTH > mpiranks
Definition: hdf5reader.hpp:151
A collection of tools for interacting with HDF5 databases.
DataSetInterfaceScalar< int, CHUNKLENGTH > mpiranks_isvalid
Definition: hdf5reader.hpp:152
pointID / process number pair Used to identify a single parameter space point
TODO: see if we can use this one:
Definition: Analysis.hpp:33
A small wrapper object for &#39;options&#39; nodes.
#define H5_OUTPUT_TYPES
Definition: hdf5tools.hpp:244
virtual ulong get_dataset_length()
Get length of input dataset.
Definition: hdf5reader.cpp:131