gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-2191-ga4742ac
a Global And Modular Bsm Inference Tool
DataSetInterfaceScalar.hpp
Go to the documentation of this file.
1 // GAMBIT: Global and Modular BSM Inference Tool
2 // *********************************************
22 
23 #ifndef __DataSetInterfaceScalar_hpp__
24 #define __DataSetInterfaceScalar_hpp__
25 
26 #include <sstream>
27 #include <iostream>
28 
29 // HDF5 C bindings
30 #include <hdf5.h>
31 
32 // Gambit
35 #include "gambit/Logs/logger.hpp"
36 
37 namespace Gambit {
38 
39  namespace Printers {
40 
43  template<class T, std::size_t CHUNKLENGTH>
44  class DataSetInterfaceScalar : public DataSetInterfaceBase<T,0,CHUNKLENGTH>
45  {
46  private:
47  static const std::size_t empty_rdims[1]; // just to satisfy base class constructor, not used.
48  static const std::size_t DSETRANK=1;
49 
50  std::vector<T> read_buffer; // Buffer to store a chunk of the linked dataset (during read operations)
51  std::size_t read_buffer_start; // Index of start of read buffer
52 
53  public:
56  DataSetInterfaceScalar(hid_t location_id, const std::string& name, const bool resume, const char access);
57 
59  std::pair<hid_t,hid_t> select_chunk(std::size_t offset, std::size_t length) const;
60 
62  void writenewchunk(const T (&chunkdata)[CHUNKLENGTH]);
63 
66  void RA_write(const T (&values)[CHUNKLENGTH], const hsize_t (&coords)[CHUNKLENGTH], std::size_t npoints);
67 
69  void zero();
70 
72 
73  // Extracts the ith chunk of length CHUNKLENGTH from the dataset
74  std::vector<T> get_chunk(std::size_t i, std::size_t length) const;
75 
76  // Extract entry at given index from dataset
77  T get_entry(std::size_t index);
78 
80 
81  };
82 
84 
85  // Define some static members
86  template<class T, std::size_t CL>
87  const std::size_t DataSetInterfaceScalar<T,CL>::empty_rdims[1] = {};
88 
90  template<class T, std::size_t CL>
92  : DataSetInterfaceBase<T,0,CL>()
93  {}
94 
95  template<class T, std::size_t CL>
96  DataSetInterfaceScalar<T,CL>::DataSetInterfaceScalar(hid_t location_id, const std::string& name, const bool resume, const char access)
97  : DataSetInterfaceBase<T,0,CL>(location_id, name, empty_rdims, resume, access)
98  {}
99 
100  template<class T, std::size_t CHUNKLENGTH>
101  void DataSetInterfaceScalar<T,CHUNKLENGTH>::writenewchunk(const T (&chunkdata)[CHUNKLENGTH])
102  {
103  #ifdef HDF5_DEBUG
104  std::cout << "Preparing to write new chunk to dataset "<<this->get_myname()<<std::endl;
105  #endif
106  // Extend the dataset if needed. Usually dataset on disk just becomes 1 chunk larger.
107  this->extend_dset(this->dsetnextemptyslab+CHUNKLENGTH);
108 
109  // Select a hyperslab.
110  std::size_t offset = this->dsetnextemptyslab;
111  std::pair<hid_t,hid_t> selection_ids = select_chunk(offset,CHUNKLENGTH);
112  hid_t memspace_id = selection_ids.first;
113  hid_t dspace_id = selection_ids.second;
114 
115  // Write the data to the hyperslab.
116  herr_t status = H5Dwrite(this->get_dset_id(), this->hdftype_id, memspace_id, dspace_id, H5P_DEFAULT, chunkdata);
117  //this->my_dataset.write( chunkdata, this->hdf_dtype.type(), memspace, filespace );
118  if(status<0)
119  {
120  std::ostringstream errmsg;
121  errmsg << "Error writing new chunk to dataset (with name: \""<<this->get_myname()<<"\") in HDF5 file. H5Dwrite failed." << std::endl;
122  printer_error().raise(LOCAL_INFO, errmsg.str());
123  }
124  #ifdef HDF5_DEBUG
125  std::cout<<"Chunk written to dataset \""<<this->get_myname()<<"\"! Incrementing chunk offset:"
126  <<this->dsetnextemptyslab<<" --> "<<this->dsetnextemptyslab+CHUNKLENGTH<<std::endl;
127  #endif
128  this->dsetnextemptyslab += CHUNKLENGTH;
129  H5Sclose(dspace_id);
130  H5Sclose(memspace_id);
131  }
132 
134  template<class T, std::size_t CHUNKLENGTH>
136  {
140  //std::cout<<"Zeroing dataset "<<this->get_myname()<<std::endl;
141 
142  T zero_buffer[CHUNKLENGTH] = {}; // Should set everything to zero I believe... at least for primitive T.
143 
144  unsigned long orig_nextslab = this->dsetnextemptyslab;
145 
147  //std::size_t Nslabs = this->dsetnextemptyslab / CHUNKLENGTH; //no good for RA datasets
148  std::size_t Nslabs = this->dset_length() / CHUNKLENGTH; //should be ok since length is constrained to multiples of CHUNKLENGTH
149 
155  this->dsetnextemptyslab = 0;
156 
157  for(std::size_t i=0; i<Nslabs; i++)
158  {
159  writenewchunk(zero_buffer);
160  }
161 
162  // hyperslab selector should automatically end up pointing back to the
163  // correct place for sync buffers, but since this should be a RA dataset
164  // we shall put it back to whatever value it had (which should probably
165  // be zero).
166  this->dsetnextemptyslab = orig_nextslab;
167  }
168 
169 
172  template<class T, std::size_t CHUNKLENGTH>
173  void DataSetInterfaceScalar<T,CHUNKLENGTH>::RA_write(const T (&values)[CHUNKLENGTH], const hsize_t (&coords)[CHUNKLENGTH], std::size_t npoints)
174  {
175  if(npoints>CHUNKLENGTH)
176  {
177  std::ostringstream errmsg;
178  errmsg << "Error! Received npoints ("<<npoints<<") greater than CHUNKLENGTH ("<<CHUNKLENGTH<<") while tring to perform RA_write for dataset (name="<<this->get_myname()<<"). The input to this function is therefore invalid.";
179  printer_error().raise(LOCAL_INFO, errmsg.str());
180  }
181  if(npoints==0)
182  {
183  std::ostringstream errmsg;
184  errmsg << "Error! Received npoints=0! This will cause an error when trying to select element for writing, and there is no point calling this function with no points to write anyway. Please review the input to this function (error occurred while tring to perform RA_write for dataset (name="<<this->get_myname()<<"))";
185  printer_error().raise(LOCAL_INFO, errmsg.str());
186  }
187 
188  bool error_occurred = false; // simple error flag
189 
190  // Extend the dataset if needed
191  // To do this need to know largest target coordinate
192  unsigned long max_coord = *std::max_element(coords,coords+npoints);
193 
194  this->extend_dset(max_coord);
195 
196  // Dataset size in memory
197  static const std::size_t MDIM_RANK = 1;
198  hsize_t mdim[] = {npoints};
199 
200  // Dataspace for the output values
201  hid_t dspace = H5Screate_simple(MDIM_RANK, mdim, NULL);
202  if(dspace<0) error_occurred = true;
203 
204  // Get the C interface identifier for a copy of the dataspace
205  // of the dataset (confusing...)
206  hid_t dspace_id = H5Dget_space(this->get_dset_id());
207  if(dspace_id<0) error_occurred = true;
208 
209  // Select the target write points in the file dataspace
210  hid_t errflag = H5Sselect_elements(dspace_id, H5S_SELECT_SET, npoints, coords);
211  if(errflag<0) error_occurred = true;
212 
213  // Get the C interface identifier for the type of the output dataset
214  hid_t expected_dtype = this->hdftype_id;
215 
216  //hid_t dtype = H5::PredType::NATIVE_DOUBLE.getId(); //the above does something like this
217  hid_t dtype = H5Dget_type(this->get_dset_id()); // type with which the dset was created
218  if(not H5Tequal(dtype, expected_dtype))
219  {
220  std::ostringstream errmsg;
221  errmsg << "Error! Tried to write to dataset (name="<<this->get_myname()<<") with type id "<<dtype<<" but expected it to have type id "<<expected_dtype<<". This is a bug in the DataSetInterfaceScalar class, please report it.";
222  printer_error().raise(LOCAL_INFO, errmsg.str());
223  }
224 
225  // Write data to selected points
226  // (H5P_DEFAULT specifies some transfer properties for the I/O
227  // operation. These are the default values, probably are ok.)
228  hid_t errflag2 = H5Dwrite(this->get_dset_id(), dtype, dspace, dspace_id, H5P_DEFAULT, values);
229 
230  if(errflag2<0) error_occurred = true;
231 
232  if(error_occurred)
233  {
234  std::ostringstream errmsg;
235  errmsg << "Error! Failed to write desynchronised buffer data to file! (dataset name="<<this->get_myname()<<")"<<std::endl
236  << "Error flags were:" << std::endl
237  << " dspace : " << dspace << std::endl
238  << " dspace_id: " << dspace_id << std::endl
239  << " errflag : " << errflag << std::endl
240  << " errflag2 : " << errflag2 << std::endl
241  << "Variables:" << std::endl
242  << " dtype = " << dtype;
243  printer_error().raise(LOCAL_INFO, errmsg.str());
244  }
245 
246  H5Tclose(dtype);
247  H5Sclose(dspace_id);
248  H5Sclose(dspace);
249 
250  // hsize_t offsets[DSETRANK];
251 
252  // // Put data into a length 1 array, for writing as a length 1 "slab"
253  // //T data_slice[1];
254  // //data_slice[0] = value;
255 
256  // // Check if dataset needs extending; we may be trying to write to a point that wasn't
257  // // yet written to by a buffer. Can write up to 1 chunk-length beyond the current
258  // // position
259 
260  // // Extend the dataset. Dataset on disk becomes 1 chunk larger.
261  // if( dset_index >= this->dsetdims()[0] )
262  // {
263  // //if( dset_index >= this->dsetdims()[0] + CHUNKLENGTH )
264  // //{
265  // // // Too far; error.
266  // // std::ostringstream errmsg;
267  // // errmsg << "Error writing RA value to dataset (with name: \""<<this->get_myname()<<"\") in HDF5 file. Requested write position ("<<dset_index<<") is more than one chunk-length ("<<CHUNKLENGTH<<") beyond the present end of the dataset ("<<this->dsetdims()[0]<<")";
268  // // printer_error().raise(LOCAL_INFO, errmsg.str());
269  // //}
270  // //
271  // //// Ok to extend.
272  // //this->dsetdims()[0] += CHUNKLENGTH; // extend dataset by 1 chunk
273 
274  // // Extend the dataset to the nearest multiple of CHUNKLENGTH above dset_index
275  // std::size_t remainder = dset_index % CHUNKLENGTH;
276  // this->dsetdims()[0] = dset_index - remainder + CHUNKLENGTH;
277 
278  // // newsize[1] = dims[1]; // don't need: only 1D for now.
279  // this->my_dataset.extend( this->dsetdims() );
280  // }
281 
282  // // Select hyperslab starting at dset_index
283  // H5::DataSpace filespace = this->my_dataset.getSpace();
284  // offsets[0] = dset_index;
285  // //offsets[1] = 0; // don't need: only 1D for now.
286  // filespace.selectHyperslab( H5S_SELECT_SET, this->get_slicedims(), offsets );
287  //
288  // // Define memory space
289  // H5::DataSpace memspace( DSETRANK, this->get_slicedims() );
290  //
291  // #ifdef HDF5_DEBUG
292  // std::cout << "Debug variables:" << std::endl
293  // << " dsetdims()[0] = " << this->dsetdims()[0] << std::endl
294  // << " offsets[0] = " << offsets[0] << std::endl
295  // << " get_slicedims()[0] = " << this->get_slicedims()[0] << std::endl;
296  // #endif
297 
298  // // Write the data to the hyperslab.
299  // try {
300  // this->my_dataset.write( &value, this->hdf_dtype.type(), memspace, filespace );
301  // } catch(const H5::Exception& e) {
302  // std::ostringstream errmsg;
303  // errmsg << "Error writing RA value to dataset (with name: \""<<this->get_myname()<<"\") in HDF5 file. Message was: "<<e.getDetailMsg() << std::endl;
304  // printer_error().raise(LOCAL_INFO, errmsg.str());
305  // }
306 
307  }
308 
311  template<class T, std::size_t CHUNKLENGTH>
312  std::pair<hid_t,hid_t> DataSetInterfaceScalar<T,CHUNKLENGTH>::select_chunk(std::size_t offset, std::size_t length) const
313  {
314  #ifdef HDF5_DEBUG
315  std::cout << "Selecting chunk in dataset "<<this->get_myname()<<" with offset "<<offset<<std::endl;
316  #endif
317 
318  // Make sure that this chunk lies within the dataset extents
319  if(offset + length > this->dset_length())
320  {
321  std::ostringstream errmsg;
322  errmsg << "Error selecting chunk from dataset (with name: \""<<this->get_myname()<<") in HDF5 file. Tried to select a hyperslab which extends beyond the dataset extents:" << std::endl;
323  errmsg << " offset = " << offset << std::endl;
324  errmsg << " offset+length = " << length << std::endl;
325  errmsg << " dset_length() = "<< this->dset_length() << std::endl;
326  printer_error().raise(LOCAL_INFO, errmsg.str());
327  }
328 
329  // Select a hyperslab.
330  //H5::DataSpace filespace = this->my_dataset.getSpace();
331  hid_t dspace_id = H5Dget_space(this->get_dset_id());
332  if(dspace_id<0)
333  {
334  std::ostringstream errmsg;
335  errmsg << "Error selecting chunk from dataset (with name: \""<<this->get_myname()<<"\") in HDF5 file. H5Dget_space failed." << std::endl;
336  printer_error().raise(LOCAL_INFO, errmsg.str());
337  }
338 
339  hsize_t offsets[DSETRANK];
340  offsets[0] = offset;
341  //offsets[1] = 0; // don't need: only 1D for now.
342 
343  hsize_t selection_dims[DSETRANK]; // Set same as output chunks, but may have a different length
344  for(std::size_t i=0; i<DSETRANK; i++) { selection_dims[i] = this->get_chunkdims()[i]; }
345  selection_dims[0] = length; // Adjust chunk length to input specification
346 
347  herr_t err_hs = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, offsets, NULL, selection_dims, NULL);
348 
349  if(err_hs<0)
350  {
351  std::ostringstream errmsg;
352  errmsg << "Error selecting chunk from dataset (with name: \""<<this->get_myname()<<"\", offset="<<offset<<", length="<<selection_dims[0]<<") in HDF5 file. H5Sselect_hyperslab failed." << std::endl;
353  printer_error().raise(LOCAL_INFO, errmsg.str());
354  }
355 
356  // Define memory space
357  //H5::DataSpace memspace( DSETRANK, this->get_chunkdims() );
358  hid_t memspace_id = H5Screate_simple(DSETRANK, selection_dims, NULL);
359 
360  #ifdef HDF5_DEBUG
361  std::cout << "Debug variables:" << std::endl
362  << " dsetdims()[0] = " << this->dsetdims()[0] << std::endl
363  << " offsets[0] = " << offsets[0] << std::endl
364  << " CHUNKLENGTH = " << CHUNKLENGTH << std::endl
365  << " selection_dims[0] = " << selection_dims[0] << std::endl;
366  #endif
367 
368  return std::make_pair(memspace_id, dspace_id); // Be sure to close these identifiers after using them!
369  }
370 
372 
374  template<class T, std::size_t CHUNKLENGTH>
375  std::vector<T> DataSetInterfaceScalar<T,CHUNKLENGTH>::get_chunk(std::size_t offset, std::size_t length) const
376  {
377  // Buffer to receive data (and return from function)
378  std::vector<T> chunkdata(length);
379 
380  // Select hyperslab
381  std::pair<hid_t,hid_t> selection_ids = select_chunk(offset,length);
382  hid_t memspace_id = selection_ids.first;
383  hid_t dspace_id = selection_ids.second;
384 
385  // Buffer to receive data
386  void* buffer = &chunkdata[0]; // pointer to contiguous memory within the buffer vector
387 
388  // Get the data from the hyperslab.
389  herr_t err_read = H5Dread(this->get_dset_id(), this->hdftype_id, memspace_id, dspace_id, H5P_DEFAULT, buffer);
390 
391  if(err_read<0)
392  {
393  std::ostringstream errmsg;
394  errmsg << "Error retrieving chunk (offset="<<offset<<", length="<<length<<") from dataset (with name: \""<<this->get_myname()<<"\") in HDF5 file. H5Dread failed." << std::endl;
395  errmsg << " offset+length = "<< length << std::endl;
396  errmsg << " dset_length() = "<< this->dset_length() << std::endl;
397  printer_error().raise(LOCAL_INFO, errmsg.str());
398  }
399 
400  H5Sclose(dspace_id);
401  H5Sclose(memspace_id);
402 
403  return chunkdata;
404  }
405 
407  template<class T, std::size_t CHUNKLENGTH>
409  {
410  // Figure out relevant chunk start index and position of desired entry in the chunk.
411  std::size_t chunk_start = (index / CHUNKLENGTH) * CHUNKLENGTH;
412  std::size_t chunk_relative_index = index % CHUNKLENGTH;
413 
414  #ifdef HDF5_DEBUG
415  std::cout << "index :" << index << std::endl;
416  std::cout << "chunk_start:" << chunk_start << std::endl;
417  std::cout << "chunk_rel :" << chunk_relative_index << std::endl;
418  #endif
419 
420  // Figure out whether entry is already in the read buffer
421  if(read_buffer.size()==0 or read_buffer_start!=chunk_start)
422  {
423  // Nope, don't have it, read in the appropriate chunk
424  #ifdef HDF5_DEBUG
425  std::cout << "extracting new chunk starting from "<<chunk_start<< std::endl;
426  #endif
427  // Make sure we don't try to read past the end of the dataset
428  std::size_t length = CHUNKLENGTH;
429  if(chunk_start+length > this->dset_length())
430  {
431  length = this->dset_length() - chunk_start;
432  }
433  read_buffer = get_chunk(chunk_start, length);
434  read_buffer_start = chunk_start;
435  }
436 
437  return read_buffer.at(chunk_relative_index);
438  }
439 
441 
443  }
444 }
445 
446 
448 
456 
460 
463 
465 
476 
480 
498 
499 
507 
508 
510 
512 
514 
515 
516 
522 
524 
525 
527 
528 #endif
529 
EXPORT_SYMBOLS error & printer_error()
Printer errors.
#define LOCAL_INFO
Definition: local_info.hpp:34
Logging access header for GAMBIT.
Wrapper object to manage a single dataset.
Declaration for class DataSetInterfaceBase.
std::pair< hid_t, hid_t > select_chunk(std::size_t offset, std::size_t length) const
Select a hyperslab chunk in the hosted dataset.
void RA_write(const T(&values)[CHUNKLENGTH], const hsize_t(&coords)[CHUNKLENGTH], std::size_t npoints)
Perform desynchronised ("random access") dataset writes to previous scan iterations from a queue...
void zero()
Set all elements of the dataset to zero.
void writenewchunk(const T(&chunkdata)[CHUNKLENGTH])
Write data to a new chunk in the hosted dataset.
Exception objects required for standalone compilation.
Funk zero(Args... argss)
Definition: daFunk.hpp:604
Derived dataset interface, with methods for writing scalar records (i.e.
T get_entry(std::size_t index)
Extract a single entry from a linked dataset.
static const std::size_t empty_rdims[1]
DataSetInterfaceScalar member definitions.
TODO: see if we can use this one:
Definition: Analysis.hpp:33
std::vector< T > get_chunk(std::size_t i, std::size_t length) const
READ methods (perhaps can generalise to non-scalar case, but this doesn&#39;t exist yet for writing anywa...