gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-252-gf9a3f78
a Global And Modular Bsm Inference Tool
DataSetInterfaceBase.hpp
Go to the documentation of this file.
1 // GAMBIT: Global and Modular BSM Inference Tool
2 // *********************************************
25 
26 #ifndef __DataSetInterfaceBase_hpp__
27 #define __DataSetInterfaceBase_hpp__
28 
29 // HDF5 C bindings
30 #include <hdf5.h>
31 
32 // Gambit
34 #include "gambit/Logs/logger.hpp"
35 
36 namespace Gambit {
37 
38  namespace Printers {
39 
41  // Mostly just creates the dataset and holds metadata about it
42  // Would be nice to extend to handle writing as well, but currently
43  // I have to do it differently depending on the RANK.
44  template<class T, std::size_t RECORDRANK, std::size_t CHUNKLENGTH>
46  {
47  private:
48  static const std::size_t DSETRANK = RECORDRANK+1; // Rank of the dataset array
49  hid_t mylocation_id; // handle for where this datasets is located in the hdf5 file
50  std::string myname; // name of the dataset in the hdf5 file
51 
52  // Dimension sizes for each record.
53  // This only needs to be RECORDRANK long, however zero-size arrays are not
54  // standard compliant so we will use arrays one larger than we need and
55  // just ignore the last entry.
56  std::size_t record_dims[DSETRANK];
57 
58  // Dataset and chunk dimensions
59  hsize_t dims[DSETRANK];
60  hsize_t maxdims[DSETRANK];
61  hsize_t chunkdims[DSETRANK];
62  hsize_t slicedims[DSETRANK];
63 
64  // flag to specify whether we should try to access an existing dataset or create a new one
65  bool resume;
66 
67  // flag to record access mode for the dataset
68  char access;
69 
70  protected:
71  // Derived classes need full access to these
72 
73  // HDF5 type handle for datasets
74  static const hid_t hdftype_id;
75 
76  // Handle for wrapped dataset
78 
79  // Safe(r) getter for dataset handle
81  {
82  if(this->dset_id<0)
83  {
84  std::ostringstream errmsg;
85  errmsg << "Error getting handle for dataset with name: \""<<get_myname()<<"\". Handle id is invalid. Dataset wrapping has failed to occur correctly, and problems should have been detected before this, so this is a bug; please fix.";
86  printer_error().raise(LOCAL_INFO, errmsg.str());
87  }
88  return this->dset_id;
89  }
90 
91  // index of the beginning of the next empty slot in the output array
92  // i.e. the offset in dimension 0 of the dataset needed to select the
93  // next output hyperslab.
94  unsigned long dsetnextemptyslab;
95 
96  // currently targeted index in output dataset
97  // "Virtual" because write will not directly go to this position, it
98  // will go into the buffer, but this is where the append should end
99  // up after the buffer is written to file.
100  unsigned long virtualwriteposition;
101 
102  public:
103  // Const public data accessors
104  std::string get_myname() const { return myname; }
105  std::size_t get_dsetrank() const { return DSETRANK; }
106  std::size_t get_chunklength() const { return CHUNKLENGTH; }
107  const hsize_t* get_maxdsetdims() const { return maxdims; }
108  const hsize_t* get_chunkdims() const { return chunkdims; }
109  const hsize_t* get_slicedims() const { return slicedims; }
110  unsigned long get_nextemptyslab() const { return dsetnextemptyslab; }
111  unsigned long dset_length() const { return dims[0]; }
112  char access_mode() const { return access; }
113 
114  // To point "next write" cursor back at the beginning of a dataset, for overwriting everything
115  void reset_nextemptyslab() { dsetnextemptyslab = 0; }
116 
117  // Full accessor needed for dataset dimensions
118  // so that they can be updated when chunks are added
119  hsize_t* dsetdims() { return dims; }
120 
123  DataSetInterfaceBase(hid_t location_id, const std::string& name, const std::size_t rdims[DSETRANK], const bool resume, const char access);
124  virtual ~DataSetInterfaceBase();
125 
127  hid_t createDataSet(hid_t location_id, const std::string& name, const std::size_t rdims[DSETRANK]);
128 
130  hid_t openDataSet(hid_t location_id, const std::string& name, const std::size_t rdims[DSETRANK]);
131 
133  void closeDataSet();
134 
136  void extend_dset(const unsigned long i);
137 
138  };
139 
140 
142 
143  // Define some static members
144  template<class T, std::size_t RR, std::size_t CL>
146 
148  template<class T, std::size_t RR, std::size_t CL>
150  : mylocation_id(-1)
151  , myname()
152  , record_dims()
153  , resume(false)
154  , access('r')
155  , dset_id(-1)
156  , dsetnextemptyslab(0)
157  {}
158 
159  template<class T, std::size_t RR, std::size_t CL>
160  DataSetInterfaceBase<T,RR,CL>::DataSetInterfaceBase(hid_t location_id, const std::string& name, const std::size_t rdims[DSETRANK], const bool r, const char a)
161  : mylocation_id(location_id)
162  , myname(name)
163  , record_dims() /* doh have to copy array element by element */
164  , resume(r)
165  , access(a)
166  , dset_id(-1)
167  , dsetnextemptyslab(0)
168  {
169  if(resume)
170  {
171  dset_id = openDataSet(location_id,name,rdims);
172  }
173  else
174  {
175  dset_id = createDataSet(location_id,name,rdims);
176  }
177  if(dset_id<0)
178  {
179  std::ostringstream errmsg;
180  errmsg << "Error! Failed to attach interface to dataset '"<<this->get_myname()<<"', and problem was not caught during open or create routine! This is a bug, please fix.";
181  printer_error().raise(LOCAL_INFO, errmsg.str());
182  }
183  // copy rdims to record_dims
184  for(std::size_t i=0; i<DSETRANK; i++) { record_dims[i] = rdims[i]; }
185  }
186 
188  template<class T, std::size_t RR, std::size_t CL>
190  {
191  // TODO: Having problems with copied objects sharing dataset identifiers, and closing datasets prematurely on each other.
192  // To fix, will probably need to have a fancy copy constructor or something. Or wrap datasets in an
193  // object which itself has a fancy copy constructor. For now, just leave dataset resources lying around,
194  // probably won't cause any issues.
195  // Or could explicity tell interface to close datasets before the objects are destroyed.
196  //if(this->dset_id>=0)
197  //{
198  // herr_t status = H5Dclose(this->dset_id);
199  // if(status<0)
200  // {
201  // logger() << LogTags::printers << LogTags::err <<LogTags::repeat_to_cerr<< LOCAL_INFO << ": Error destructing DataSetInterfaceBase! Failed to close wrapped dataset! (H5Dclose failed). No exception thrown because this will behave badly when throw from a destructor. (dataset name: "<<myname<<")"<<EOM;
202  // }
203  //}
204  }
205 
207  template<class T, std::size_t RR, std::size_t CL>
209  {
210  if(this->dset_id>=0)
211  {
212  herr_t status = H5Dclose(this->dset_id);
213  if(status<0)
214  {
215  std::ostringstream errmsg;
216  errmsg << "Error closing dataset (with name: \""<<this->get_myname()<<"\") in HDF5 file. H5Dclose failed.";
217  printer_error().raise(LOCAL_INFO, errmsg.str());
218  }
219  }
220  }
221 
223  template<class T, std::size_t RECORDRANK, std::size_t CHUNKLENGTH>
224  hid_t DataSetInterfaceBase<T,RECORDRANK,CHUNKLENGTH>::createDataSet(hid_t location_id, const std::string& name, const std::size_t rdims[DSETRANK])
225  {
226  // I'd like to declare rdims as rdims[RECORDRANK], but apparantly zero length arrays are not allowed,
227  // so this would not compile in the RECORDRANK=0 case, which I need. Irritating.
228 
229  // Compute initial dataspace and chunk dimensions
230  dims[0] = 0; //1*CHUNKLENGTH; // Start off with space for 1 chunk
231  maxdims[0] = H5S_UNLIMITED; // No upper limit on number of records allowed in dataset
232  chunkdims[0] = CHUNKLENGTH;
233  slicedims[0] = 1; // Dimensions of a single record in the data space
234  std::size_t loopsize = RECORDRANK; // Just tricking the compiler so it doesn't complain in the RECORDRANK=0 case.
235  for(std::size_t i=0; i<loopsize; i++)
236  {
237  // Set other dimensions to match record size
238  dims[i+1] = rdims[i];
239  maxdims[i+1] = rdims[i];
240  chunkdims[i+1] = rdims[i];
241  slicedims[i+1] = rdims[i];
242  }
243 
244  // Create the data space
245  //H5::DataSpace dspace(DSETRANK, dims, maxdims);
246  hid_t dspace_id = H5Screate_simple(DSETRANK, dims, maxdims);
247  if(dspace_id<0)
248  {
249  std::ostringstream errmsg;
250  errmsg << "Error creating dataset (with name: \""<<name<<"\") in HDF5 file. H5Screate_simple failed.";
251  printer_error().raise(LOCAL_INFO, errmsg.str());
252  }
253 
254  // Object containing dataset creation parameters
255  //H5::DSetCreatPropList cparms;
256  //cparms.setChunk(DSETRANK, chunkdims);
257  hid_t cparms_id = H5Pcreate(H5P_DATASET_CREATE);
258  if(cparms_id<0)
259  {
260  std::ostringstream errmsg;
261  errmsg << "Error creating dataset (with name: \""<<name<<"\") in HDF5 file. H5Pcreate failed.";
262  printer_error().raise(LOCAL_INFO, errmsg.str());
263  }
264 
265  herr_t status = H5Pset_chunk(cparms_id, DSETRANK, chunkdims);
266  if(status<0)
267  {
268  std::ostringstream errmsg;
269  errmsg << "Error creating dataset (with name: \""<<name<<"\") in HDF5 file. H5Pset_chunk failed.";
270  printer_error().raise(LOCAL_INFO, errmsg.str());
271  }
272 
273  // Check if location id is invalid
274  if(location_id==-1)
275  {
276  std::ostringstream errmsg;
277  errmsg << "Error! Tried to create hdf5 dataset (with name: \""<<myname<<"\") at undefined location (location_id was -1). Please check that calling code supplied a valid location handle. This is a bug, please report it.";
278  printer_error().raise(LOCAL_INFO, errmsg.str());
279  }
280 
281  // Create the dataset
282  hid_t output_dset_id;
283  output_dset_id = H5Dcreate2(location_id, name.c_str(), hdftype_id, dspace_id, H5P_DEFAULT, cparms_id, H5P_DEFAULT);
284  //output = location->createDataSet( name.c_str(), hdf_dtype.type(), dspace, cparms);
285  if(output_dset_id<0)
286  {
287  std::ostringstream errmsg;
288  errmsg << "Error creating dataset (with name: \""<<myname<<"\") in HDF5 file. Dataset with same name may already exist";
289  printer_error().raise(LOCAL_INFO, errmsg.str());
290  }
291  return output_dset_id;
292  }
293 
296  template<class T, std::size_t RECORDRANK, std::size_t CHUNKLENGTH>
297  hid_t DataSetInterfaceBase<T,RECORDRANK,CHUNKLENGTH>::openDataSet(hid_t location_id, const std::string& name, const std::size_t rdims[DSETRANK])
298  {
299  // Open the dataset
300  hid_t out_dset_id = H5Dopen2(location_id, name.c_str(), H5P_DEFAULT);
301  if(out_dset_id<0)
302  {
303  std::ostringstream errmsg;
304  errmsg << "Error opening existing dataset (with name: \""<<name<<"\") in HDF5 file. H5Dopen2 failed." << std::endl
305  << "You may have a corrupt hdf5 file from a previous run. Try using -r, or deleting the old output.";
306  printer_error().raise(LOCAL_INFO, errmsg.str());
307  }
308 
309  // Get dataspace of the dataset.
310  //H5::DataSpace dataspace = dataset.getSpace();
311  hid_t dspace_id = H5Dget_space(out_dset_id);
312  if(dspace_id<0)
313  {
314  std::ostringstream errmsg;
315  errmsg << "Error opening existing dataset (with name: \""<<name<<"\") in HDF5 file. H5Dget_space failed.";
316  printer_error().raise(LOCAL_INFO, errmsg.str());
317  }
318 
319  // Get the number of dimensions in the dataspace.
320  //int rank = dataspace.getSimpleExtentNdims();
321  int rank = H5Sget_simple_extent_ndims(dspace_id);
322  if(rank<0)
323  {
324  std::ostringstream errmsg;
325  errmsg << "Error opening existing dataset (with name: \""<<name<<"\") in HDF5 file. H5Sget_simple_extent_ndims failed.";
326  printer_error().raise(LOCAL_INFO, errmsg.str());
327  }
328  if(rank!=DSETRANK)
329  {
330  std::ostringstream errmsg;
331  errmsg << "Error while accessing existing dataset (with name: \""<<myname<<"\") in HDF5 file. Rank of dataset ("<<rank<<") does not match the expected rank ("<<DSETRANK<<").";
332  printer_error().raise(LOCAL_INFO, errmsg.str());
333  }
334 
335  // Get the dimension size of each dimension in the dataspace
336  // now that we know ndims matches DSETRANK.
337  hsize_t dims_out[DSETRANK];
338  //int ndims = dataspace.getSimpleExtentDims( dims_out, NULL);
339  int ndims = H5Sget_simple_extent_dims(dspace_id, dims_out, NULL);
340  if(ndims<0)
341  {
342  std::ostringstream errmsg;
343  errmsg << "Error while accessing existing dataset (with name: \""<<myname<<"\") in HDF5 file. Failed to retrieve dataset extents (H5Sget_simple_extent_dims failed).";
344  printer_error().raise(LOCAL_INFO, errmsg.str());
345  }
346 
347  // Update parameters to match dataset contents
348  // Compute initial dataspace and chunk dimensions
349  dims[0] = dims_out[0]; // Set to match existing data
350  maxdims[0] = H5S_UNLIMITED; // No upper limit on number of records allowed in dataset
351  chunkdims[0] = CHUNKLENGTH;
352  slicedims[0] = 1; // Dimensions of a single record in the data space
353  std::size_t loopsize = RECORDRANK; // Just tricking the compiler so it doesn't complain in the RECORDRANK=0 case.
354  for(std::size_t i=0; i<loopsize; i++)
355  {
356  // Set other dimensions to match record size
357  dims[i+1] = rdims[i];
358  maxdims[i+1] = rdims[i];
359  chunkdims[i+1] = rdims[i];
360  slicedims[i+1] = rdims[i];
361 
362  // Check that record size matches the existing dataset
363  if(dims[i+1] != dims_out[i+1])
364  {
365  std::ostringstream errmsg;
366  errmsg << "Error while accessing existing dataset (with name: \""<<myname<<"\") in HDF5 file. Size of dimension "<<i+1<<" ("<<dims_out[i+1]<<") does not match the expected size ("<<dims[i+1]<<")";
367  printer_error().raise(LOCAL_INFO, errmsg.str());
368  }
369  }
370 
371  // Update the variables which control where the next write will occur
372  // Index of first element in next target hyperslab (assumes that
373  // existing dataset has been written up to a complete chunk)
374  dsetnextemptyslab = dims[0];
375 
376  return out_dset_id;
377  }
378 
379 
381  template<class T, std::size_t RR, std::size_t CHUNKLENGTH>
382  void DataSetInterfaceBase<T,RR,CHUNKLENGTH>::extend_dset(const unsigned long min_length)
383  {
384  std::size_t current_length = this->dsetdims()[0];
385  if( min_length > current_length )
386  {
387  // Extend the dataset to the nearest multiple of CHUNKLENGTH above min_length,
388  // unless min_length is itself a multiple of CHUNKLENGTH.
389  std::size_t remainder = min_length % CHUNKLENGTH;
390  std::size_t newlength;
391  if(remainder==0) { newlength = min_length; }
392  else { newlength = min_length - remainder + CHUNKLENGTH; }
393  #ifdef HDF5_DEBUG
394  std::cout << "Requested min_length ("<<min_length<<") larger than current dataset length ("<<current_length<<") (dset name="<<this->get_myname()<<")" << std::endl
395  << "Extending dataset to newlength="<<newlength<<std::endl;
396  #endif
397  this->dsetdims()[0] = newlength;
398  //this->my_dataset.extend( this->dsetdims() );
399  herr_t status = H5Dset_extent( this->get_dset_id(), this->dsetdims());
400  if(status<0)
401  {
402  std::cout<<this->get_dset_id()<<std::endl;
403  std::ostringstream errmsg;
404  errmsg << "Failed to extend dataset (with name: \""<<myname<<"\") from length "<<current_length<<" to length "<<newlength<<"!";
405  printer_error().raise(LOCAL_INFO, errmsg.str());
406  }
407  }
408  }
410 
411  }
412 }
413 #endif
414 
hid_t openDataSet(hid_t location_id, const std::string &name, const std::size_t rdims[DSETRANK])
Open an existing dataset.
void extend_dset(const unsigned long i)
Extend dataset to nearest multiple of CHUNKLENGTH above supplied length.
EXPORT_SYMBOLS error & printer_error()
Printer errors.
void closeDataSet()
Close an open dataset.
#define LOCAL_INFO
Definition: local_info.hpp:34
Logging access header for GAMBIT.
Wrapper object to manage a single dataset.
static const hid_t hdftype_id
DataSetInterfaceBase class member definitions.
START_MODEL dNur_CMB r
virtual ~DataSetInterfaceBase()
Do cleanup (close dataset)
Exception objects required for standalone compilation.
hid_t createDataSet(hid_t location_id, const std::string &name, const std::size_t rdims[DSETRANK])
Create a (chunked) dataset.
Base template is left undefined in order to raise a compile error if specialisation doesn&#39;t exist...
Definition: hdf5tools.hpp:77
TODO: see if we can use this one:
Definition: Analysis.hpp:33