gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-2191-ga4742ac
a Global And Modular Bsm Inference Tool
postprocessor_object.cpp
Go to the documentation of this file.
1 // GAMBIT: Global and Modular BSM Inference Tool
2 // *********************************************
18 
19 #include "gambit/Logs/logger.hpp"
24 
25 // Need this to allow Master process to manually check for shutdown signals without calling the likelihood container
27 
29 
30 namespace Gambit
31 {
32  namespace PostProcessor
33  {
34 
36 
38  bool point_done(const ChunkSet done_chunks, size_t index)
39  {
40  bool answer = false;
41  for(ChunkSet::const_iterator it=done_chunks.begin();
42  it!=done_chunks.end(); ++it)
43  {
44  if(it->iContain(index))
45  {
46  answer = true;
47  break;
48  }
49  }
50  return answer;
51  }
52 
56  Chunk get_effective_chunk(const std::size_t total_length, const unsigned int rank, const unsigned int numtasks)
57  {
58  // Compute which points this process is supposed to process. Divide total
59  // by number of MPI tasks.
60  unsigned long long my_length = total_length / numtasks;
61  unsigned long long r = total_length % numtasks;
62  // Offset from beginning for this task assuming equal lengths in each task
63  unsigned long long start = my_length * rank;
64  // Divide up the remainder amongst the tasks and adjust offsets to account for these
65  if(rank<r)
66  {
67  my_length++;
68  start+=rank;
69  }
70  else
71  {
72  start+=r;
73  }
74  unsigned long long end = start + my_length - 1; // Minus 1 for the zero indexing
75  return Chunk(start,end);
76  }
77 
80  {
81  ChunkSet done_chunks;
82 
83  // Need to iterate through the pre-existing output and figure out what points it
84  // has processed. We cannot tell what points were purposefully skipped (if the user
85  // chose not to copy them into the output), but that shouldn't be a big deal since deciding
86  // to skip a point doesn't cost much CPU, so we can just do it again.
87 
88  // We build up the set of "done" points as chunks.
89 
90  std::size_t previous_index = 0;
91  bool building_chunk = false;
92  std::size_t chunk_start;
93  std::size_t chunk_end;
94  while(not resume_reader.eoi()) // while not end of input
95  {
96  std::size_t input_index;
97  bool is_valid = resume_reader.retrieve(input_index, "input_dataset_index");
98 
99  if(is_valid)
100  {
101  if(not building_chunk)
102  {
103  // Not building a chunk, and this point is valid, so start new (will be the first) chunk
104  building_chunk = true;
105  chunk_start = input_index;
106  }
107  else if(input_index==(previous_index+1))
108  {
109  // Point is just an increment by one, so still part of this chunk
110  // Do nothing.
111  }
112  else if(input_index==previous_index)
113  {
114  // Reader didn't progress, error.
115  std::ostringstream err;
116  err << "'resume_reader' object returned the same value for 'input_dataset_index' twice ('"<<input_index<<"')! This means that it either didn't increment properly during this postprocessor run, or the input dataset contains the same point twice! Either case indicates a bug in the postprocessor, please report it.";
117  Scanner::scan_error().raise(LOCAL_INFO,err.str());
118  }
119  else
120  {
121  // Non-incremental change in input_index! Could be higher or lower, either way, we
122  // close the previous chunk and start a new one.
123  chunk_end = previous_index;
124  done_chunks.insert(Chunk(chunk_start,chunk_end));
125  chunk_start = input_index;
126  }
127 
128  previous_index = input_index;
129  }
130 
131  resume_reader.get_next_point(); // Move reader to next previously processed point
132  }
133  // Need to close off last chunk
134  if(building_chunk)
135  {
136  chunk_end = previous_index;
137  done_chunks.insert(Chunk(chunk_start,chunk_end));
138  }
139 
140  return merge_chunks(done_chunks); // Simplify the chunks and return them
141  }
142 
144  ChunkSet merge_chunks(const ChunkSet& input_chunks)
145  {
146  ChunkSet merged_chunks;
147  if(input_chunks.size()>0)
148  {
149  Chunk new_chunk;
150  std::size_t prev_chunk_end = 0;
151  new_chunk.start = input_chunks.begin()->start; // Start of first chunk
152  for(ChunkSet::const_iterator it=input_chunks.begin();
153  it!=input_chunks.end(); ++it)
154  {
155  if(prev_chunk_end!=0 and it->start > prev_chunk_end+1)
156  {
157  // Gap detected; close the existing chunk and start a new one.
158  new_chunk.end = prev_chunk_end;
159  merged_chunks.insert(new_chunk);
160  new_chunk.start = it->start;
161  }
162 
163  if(it->end > prev_chunk_end)
164  {
165  prev_chunk_end = it->end;
166  }
167  }
168  // No more chunks, close the last open chunk
169  new_chunk.end = prev_chunk_end;
170  merged_chunks.insert(new_chunk);
171  // Sanity check; Starts and ends of merged chunks should match some start/end in the input chunks
172  for(ChunkSet::const_iterator it=merged_chunks.begin();
173  it!=merged_chunks.end(); ++it)
174  {
175  bool found_start = false;
176  bool found_end = false;
177  for(ChunkSet::const_iterator jt=input_chunks.begin();
178  jt!=input_chunks.end(); ++jt)
179  {
180  if(it->start==jt->start) found_start = true;
181  if(it->end==jt->end) found_end = true;
182  }
183  if(not found_start or not found_end)
184  {
185  std::ostringstream err;
186  err << "Error, merged 'done_chunks' are not consistent with the originally input done_chunks! This indicates a bug in the merge_chunks routine of the postprocessor, please report it. Debug output:" << endl;
187  err << "Problem merged chunk was ["<<it->start<<","<<it->end<<"]"<<endl;
188  Scanner::scan_error().raise(LOCAL_INFO,err.str());
189  }
190  // else fine, move to next merged chunk
191  }
192  }
193  // else there are no input chunks, just return an empty ChunkSet
194  return merged_chunks;
195  }
196 
197  // Gather a bunch of ints from all processes (COLLECTIVE OPERATION)
198  #ifdef WITH_MPI
199  std::vector<int> allgather_int(int myval, GMPI::Comm& comm)
200  {
201  const MPI_Datatype datatype = GMPI::get_mpi_data_type<int>::type(); // datatype for ints
202  int sendbuf = myval;
203  std::vector<int> all_vals(comm.Get_size(),0);
204  MPI_Allgather(
205  &sendbuf, /* send buffer */
206  1, /* send count */
207  datatype, /* send datatype */
208  &all_vals[0], /* recv buffer */
209  1, /* recv count */
210  datatype, /* recv datatype */
211  *(comm.get_boundcomm()) /* communicator */
212  );
213  return all_vals;
214  }
215  #endif
216 
219 
222  : reader(NULL)
223  , printer(NULL)
224  , LogLike()
225  , new_params()
226  , req_models()
227  , longname()
228  , total_length()
229  , next_point(0)
230  , chunksize()
231  , done_chunks()
232  , all_params()
233  , data_labels()
234  , data_labels_copy()
235  , add_to_logl()
236  , subtract_from_logl()
237  , renaming_scheme()
238  , cut_less_than()
239  , cut_greater_than()
240  , discard_points_outside_cuts()
241  , update_interval()
242  , discard_old_logl()
243  , logl_purpose_name()
244  , reweighted_loglike_name()
245  , root()
246  , rank()
247  #ifdef WITH_MPI
248  , comm(NULL)
249  #endif
250  , verbose(false)
251  {}
252 
255  Printers::BaseBaseReader* const r
256  , Printers::BaseBasePrinter* const p
257  , Scanner::like_ptr const l
258  , const PPOptions& o
259  )
260  : reader(r)
261  , printer(p)
262  , LogLike(l)
263  , new_params()
264  , req_models()
265  , longname()
266  , total_length(getReader().get_dataset_length())
267  , next_point(0)
268  , chunksize(o.chunksize)
269  , done_chunks()
270  , all_params (o.all_params )
271  , data_labels (o.data_labels )
272  , data_labels_copy (o.data_labels_copy )
273  , add_to_logl (o.add_to_logl )
274  , subtract_from_logl (o.subtract_from_logl )
275  , renaming_scheme (o.renaming_scheme )
276  , cut_less_than (o.cut_less_than )
277  , cut_greater_than (o.cut_greater_than )
278  , discard_points_outside_cuts(o.discard_points_outside_cuts)
279  , update_interval (o.update_interval )
280  , discard_old_logl (o.discard_old_logl )
281  , logl_purpose_name (o.logl_purpose_name )
282  , reweighted_loglike_name (o.reweighted_loglike_name )
283  , root (o.root )
284  , rank (o.rank )
285  #ifdef WITH_MPI
286  , comm (o.comm )
287  #endif
288  , verbose (o.verbose )
289  {
290  // Retrieve "visibile" parameter and model names
291  // This will ignore parameters with fixed values in the yaml file,
292  // allowing those to be input or overridden manually
293  std::vector<std::string> keys = getLogLike()->getPrior().getShownParameters();
294 
295  // Pull the keys apart into model-name, parameter-name pairs
296  if(rank==0) std::cout << "Number of model parameters to be retrieved from previous output: "<< keys.size() <<std::endl;
297  for(auto it=keys.begin(); it!=keys.end(); ++it)
298  {
299  if(rank==0) std::cout << " " << *it << std::endl;
300  std::vector<std::string> splitkey = Utils::delimiterSplit(*it, "::");
301  std::string model = splitkey[0];
302  std::string par = splitkey[1];
303  req_models[model].push_back(par);
304  longname[model][par] = *it;
305  }
306  #ifdef WITH_MPI
307  if(comm==NULL)
308  {
309  std::ostringstream err;
310  err << "No MPI communicator supplied to postprocessor driver object! This is a bug in the postprocessor scanner plugin, please report it.";
311  Scanner::scan_error().raise(LOCAL_INFO,err.str());
312  }
313  #endif
314  }
315 
318  {
319  if(reader==NULL)
320  {
321  std::ostringstream err;
322  err << "Postprocessor tried to access reader object, but found only a NULL pointer! The postprocessor has therefore not been set up correctly, please report this bug.";
323  Scanner::scan_error().raise(LOCAL_INFO,err.str());
324  }
325  return *reader;
326  }
327 
329  {
330  if(printer==NULL)
331  {
332  std::ostringstream err;
333  err << "Postprocessor tried to access printer object, but found only a NULL pointer! The postprocessor has therefore not been set up correctly, please report this bug.";
334  Scanner::scan_error().raise(LOCAL_INFO,err.str());
335  }
336  return *printer;
337  }
338 
340  {
341  // Actually it is a strange Greg-pointer, can't set it to NULL it seems.
342  // if(LogLike==NULL)
343  // {
344  // std::ostringstream err;
345  // err << "Postprocessor tried to access LogLike object, but found only a NULL pointer! The postprocessor has therefore not been set up correctly, please report this bug.";
346  // Scanner::scan_error().raise(LOCAL_INFO,err.str());
347  // }
348  return LogLike;
349  }
351 
354  {
355  new_params = all_params; // Parameters not present in the input file; to be deduced here. Start from everything and cut out those in the input file.
356 
357  if(not discard_old_logl)
358  {
359  if(std::find(data_labels.begin(), data_labels.end(), logl_purpose_name)
360  != data_labels.end())
361  {
362  std::ostringstream err;
363  err << "Error starting postprocessing run! The 'purpose' name selected for the likelihood to be computed ('"<<logl_purpose_name<<"') collides with an entry in the chosen input data. Please either change the name given in the scanner option 'like', or set 'permit_discard_old_likes' to 'true' to allow the old data to be replaced in the new output.";
364  Scanner::scan_error().raise(LOCAL_INFO,err.str());
365  }
366  if(std::find(data_labels.begin(), data_labels.end(), reweighted_loglike_name)
367  != data_labels.end())
368  {
369  std::ostringstream err;
370  err << "Error starting postprocessing run! The label name selected for the result of likelihood weighting ('"<<reweighted_loglike_name<<"') collides with an entry in the chosen input data. Please either change the name given in the scanner option 'reweighted_like', or set 'permit_discard_old_likes' to 'true' to allow the old data to be replaced in the new output.";
371  Scanner::scan_error().raise(LOCAL_INFO,err.str());
372  }
373  }
374 
378  for(std::map<std::string,std::string>::iterator it = renaming_scheme.begin(); it!=renaming_scheme.end(); ++it)
379  {
380  std::string in_label = it->first;
381  std::string out_label = it->second;
382 
383  // Make sure input label actually exists
384  if(std::find(data_labels.begin(), data_labels.end(), in_label)
385  == data_labels.end())
386  {
387  //Whoops, could not find this label in the input data
388  std::ostringstream err;
389  err << "Could not find data labelled '"<<in_label<<"' in the input dataset for postprocessing! In your master YAML file you have requested this data to be relabelled to '"<<out_label<<"', however it could not be found under the specified input label.";
390  Scanner::scan_error().raise(LOCAL_INFO,err.str());
391  }
392 
393  // Make sure chosen output name is not already claimed by the printer
394  if(std::find(all_params.begin(), all_params.end(), out_label)
395  != all_params.end())
396  {
397  //Whoops, name already in use by something else!
398  std::ostringstream err;
399  err << "Cannot rename dataset '"<<in_label<<"' to '"<<out_label<<"'! The requested output label has already been claimed by some other component in the scan. Please choose a different output label for this dataset in the master YAML file, or remove it from the 'rename' options for the postprocessor scanner plugin";
400  Scanner::scan_error().raise(LOCAL_INFO,err.str());
401  }
402 
403  // Make sure chosen output name doesn't clash with an un-renamed item to be copied
404  std::set<std::string>::iterator jt = std::find(data_labels.begin(), data_labels.end(), out_label);
405  if(jt != data_labels.end())
406  {
407  // Potential clash; check if the name is going to be changed
408  std::map<std::string,std::string>::iterator kt = renaming_scheme.find(*jt);
409  if(kt == renaming_scheme.end())
410  {
411  // Not getting renamed! Error
412  std::ostringstream err;
413  err << "Cannot rename dataset '"<<in_label<<"' to '"<<out_label<<"'! The requested output label clashes with an item in the input dataset (which isn't getting renamed). Please choose a different output label for this dataset in the master YAML file, remove it from the 'rename' options for the postprocessor scanner plugin, or request for the conflicting input label to be renamed.";
414  Scanner::scan_error().raise(LOCAL_INFO,err.str());
415  }
416  // Could still be a problem if the renamed name clashes, but we will check for that separately
417  }
418 
419  // Make sure chosen output name doesn't clash with another renamed name
420  for(std::map<std::string,std::string>::iterator jt = renaming_scheme.begin();
421  jt!=renaming_scheme.end(); ++jt)
422  {
423  if((jt->second==it->second) and (jt->first!=it->first))
424  {
425  // If the out_labels match (and we aren't just clashing with ourselves)
426  // Then this is forbidden
427  std::ostringstream err;
428  err << "Cannot rename dataset '"<<in_label<<"' to '"<<out_label<<"'! The requested output label has already been claimed by some another item in the renaming scheme (you requested '"<<jt->first<<"' to also be renamed to '"<<jt->second<<"'). Please choose a different output label for one of these datasets in the master YAML file, or remove one of them from the 'rename' options for the postprocessor scanner plugin";
429  Scanner::scan_error().raise(LOCAL_INFO,err.str());
430  }
431  }
432 
433  // Make sure the user isn't trying to rename a protected name (MPIrank, pointID)
434  if(in_label=="MPIrank" or in_label=="pointID")
435  {
436  std::ostringstream err;
437  err << "Cannot rename dataset '"<<in_label<<"' to '"<<out_label<<"'! The input dataset has a special purpose so renaming it is forbidden. Please remove it from the 'rename' options for the postprocessor scanner plugin in your master YAML file.";
438  Scanner::scan_error().raise(LOCAL_INFO,err.str());
439  }
440  }
441 
442  // Check that the cut maps refer to input datasets that actually exist
443  for(std::map<std::string,double>::iterator it = cut_less_than.begin(); it!=cut_less_than.end(); ++it)
444  {
445  std::string in_label = it->first;
446  double cut_value = it->second;
447 
448  // Make sure input label actually exists
449  if(std::find(data_labels.begin(), data_labels.end(), in_label)
450  == data_labels.end())
451  {
452  //Whoops, could not find this label in the input data
453  std::ostringstream err;
454  err << "Could not find data labelled '"<<in_label<<"' in the input dataset for postprocessing! In your master YAML file you have requested to only postprocess points satisfying the criteria '"<<in_label<<"' <= "<<cut_value<<", however the requested dataset for cutting could not be found under the specified input label. Please fix the label or remove this entry from the 'cut_less_than' list.";
455  Scanner::scan_error().raise(LOCAL_INFO,err.str());
456  }
457 
458  // Make sure it has type 'double'
459  if(getReader().get_type(in_label) != Printers::getTypeID<double>())
460  {
461  std::ostringstream err;
462  err << "Type of input dataset '"<<in_label<<"' is not 'double'! In your master YAML file you have requested to only postprocess points satisfying the criteria '"<<in_label<<"' <= "<<cut_value<<", however the requested dataset for cutting cannot be retrieved as type 'double'. Currently cuts can only be applied to datasets stored as doubles, sorry! Please remove this entry from the 'cut_less_than' list.";
463  // DEBUG
464  err << std::endl << "input type ID:" << getReader().get_type(in_label) << std::endl;
465  err << "double type ID:" << Printers::getTypeID<double>() << std::endl;
466  Scanner::scan_error().raise(LOCAL_INFO,err.str());
467  }
468  }
469  for(std::map<std::string,double>::iterator it = cut_greater_than.begin(); it!=cut_greater_than.end(); ++it)
470  {
471  std::string in_label = it->first;
472  double cut_value = it->second;
473 
474  // Make sure input label actually exists
475  if(std::find(data_labels.begin(), data_labels.end(), in_label)
476  == data_labels.end())
477  {
478  //Whoops, could not find this label in the input data
479  std::ostringstream err;
480  err << "Could not find data labelled '"<<in_label<<"' in the input dataset for postprocessing! In your master YAML file you have requested to only postprocess points satisfying the criteria '"<<in_label<<"' >= "<<cut_value<<", however the requested dataset for cutting could not be found under the specified input label. Please fix the label or remove this entry from the 'cut_greater_than' list.";
481  Scanner::scan_error().raise(LOCAL_INFO,err.str());
482  }
483 
484  // Make sure it has type 'double'
485  if(getReader().get_type(in_label) != Printers::getTypeID<double>())
486  {
487  std::ostringstream err;
488  err << "Type of input dataset '"<<in_label<<"' is not 'double'! In your master YAML file you have requested to only postprocess points satisfying the criteria '"<<in_label<<"' <= "<<cut_value<<", however the requested dataset for cutting cannot be retrieved as type 'double'. Currently cuts can only be applied to datasets stored as doubles, sorry! Please remove this entry from the 'cut_greater_than' list.";
489  Scanner::scan_error().raise(LOCAL_INFO,err.str());
490  }
491  }
492 
493 
494  // Check what data is to be copied and what is to be recomputed
495  if(rank==0) std::cout << "Determining which data is to be copied from input file to new output file, and which will be recomputed..." <<std::endl;
496  if(rank==0) std::cout << " Datasets found in input file: " << std::endl;
497  for(auto it = data_labels.begin(); it!=data_labels.end(); ++it)
498  {
499  // Check if any parameters we plan to copy have already been registered by the
500  // printer system.
501  // This is actually a little tricky, since names of parameters can be modified
502  // in the output depending on what printer was used. So far we have kept a certain
503  // consistency that can be exploited, but it isn't enforced. Should note this somewhere
504  // in the printer documentation.
505  // For example, when printing ModelParameters, they have their actual parameter names
506  // appended and they are output as separate datasets/columns. Likewise for vector
507  // components. But this appending rule is so far consistent, so I think we can just
508  // check that no prefix substring of the proposed copy has already been registered.
509  // Not sure if this has a danger of observable names just by accident being prefixes of
510  // some other name?
511  bool is_new = true;
512  for(auto jt = all_params.begin(); jt!=all_params.end(); ++jt)
513  {
514  if( ( (*it)==(*jt) )
515  or Gambit::Utils::startsWith(*it,(*jt)+":")
516  or Gambit::Utils::startsWith(*it,(*jt)+"[")
517  or Gambit::Utils::startsWith(*it,(*jt)+"{")
518  or Gambit::Utils::startsWith(*it,(*jt)+"%")
519  or Gambit::Utils::startsWith(*it,(*jt)+"#")
520  ) // if not [input data label] starts with [existing parameter] (plus append seperator character, for extra info like parameter name or index)
521  {
522  // Then it is not new. Not allowed to copy this, the likelihood container is already printing it anew.
523  new_params.erase(*jt);
524  is_new = false;
525  break;
526  }
527  }
528 
529  if(is_new)
530  {
531  data_labels_copy.insert(*it); // Not otherwise printed; schedule for copying
532  if(rank==0) std::cout << " copy : "<< (*it) <<std::endl;
533  // Check if it is getting relabelled
534  std::map<std::string,std::string>::iterator jt = renaming_scheme.find(*it);
535  if(jt != renaming_scheme.end())
536  {
537  // Yep, getting relabelled
538  if(rank==0) std::cout << " to --> : "<< jt->second <<std::endl;
539  }
540  }
541  else
542  {
543  if(rank==0) std::cout << " recompute: "<< (*it) <<std::endl;
544  // Check if it is getting relabelled
545  std::map<std::string,std::string>::iterator jt = renaming_scheme.find(*it);
546  if(jt != renaming_scheme.end())
547  {
548  // Yep, getting relabelled
549  data_labels_copy.insert(*it); // Allowed to copy this after all since the name will be changed
550  if(rank==0)
551  {
552  std::cout << " with old data copied"<<std::endl;
553  std::cout << " to --> : "<< jt->second <<std::endl;
554  }
555  }
556  }
557  // Check if a cut is being applied on this input dataset
558  if(rank==0)
559  {
560  std::map<std::string,double>::iterator jt = cut_less_than.find(*it);
561  if(jt != cut_less_than.end())
562  {
563  std::cout << " with cut <= "<< jt->second <<std::endl;
564  }
565  std::map<std::string,double>::iterator kt = cut_greater_than.find(*it);
566  if(kt != cut_greater_than.end())
567  {
568  std::cout << " with cut >= "<< kt->second <<std::endl;
569  }
570  }
571  }
572  // Might as well also list what new stuff is listed for creation
573  if(rank==0)
574  {
575  std::cout << " New datasets to be added: " << std::endl;
576  for(auto it = new_params.begin(); it!=new_params.end(); ++it)
577  {
578  std::cout << " " << *it << std::endl;
579  }
580  }
581  if(rank==0) std::cout << "Copy analysis complete." <<std::endl;
583 
584 
586  if(not discard_old_logl)
587  {
588  // Check if any of the likelihood components being added or subtracted from the likelihood
589  // are going to be replaced in the new output. User must set 'permit_discard_old_likes" to explictly allow this.
590  for(auto it=add_to_logl.begin(); it!=add_to_logl.end(); ++it)
591  {
592  if(std::find(all_params.begin(), all_params.end(), *it)
593  != all_params.end())
594  {
595  std::ostringstream err;
596  err << "Error starting postprocessing run! One of the data entries listed in the option 'add_to_like' is scheduled to be recalculated during postprocessing ("<<*it<<"). This is permitted; the old value will be added to 'like' and then discarded and replaced by the new value, however you must explicitly permit this to occur by setting 'permit_discard_old_likes' to 'true'.";
597  Scanner::scan_error().raise(LOCAL_INFO,err.str());
598  }
599  }
600 
601  for(auto it=subtract_from_logl.begin(); it!=subtract_from_logl.end(); ++it)
602  {
603  if(std::find(all_params.begin(), all_params.end(), *it)
604  != all_params.end())
605  {
606  std::ostringstream err;
607  err << "Error starting postprocessing run! One of the data entries listed in the option 'subtract_from_like' is scheduled to be recalculated during postprocessing ("<<*it<<"). This is permitted; the old value will be subtracted from 'like' and then discarded and replaced by the new value, however you must explicitly permit this to occur by setting 'permit_discard_old_likes' to 'true'.";
608  Scanner::scan_error().raise(LOCAL_INFO,err.str());
609  }
610 
611  }
612  }
613 
614  }
615 
617  int PPDriver::run_main_loop(const Chunk& mychunk)
618  {
619  bool quit = false; // Flag to abort 'scan' early.
620  std::size_t loopi = getReader().get_current_index(); // track true index of input file
621  std::size_t ppi = 0; // track number of points actually processed
622  std::size_t n_passed = 0; // Number which have passed any user-specified cuts.
623  bool found_chunk_start = false; // Make sure we start processing from the correct place
624 
625  //std::cout << "Chunk to process: "<<mychunk.start<<" -> "<<mychunk.end<<std::endl;
626 
627  if(mychunk.eff_length==0)
628  {
629  // Don't bother doing any processing for zero length chunks
630  // Just check whether the calling code wants us to shut down early
631  // NOTE: A trick here is that the Master process never runs the likelihood container
632  // in this Master/Slave setup. So we have to manually check for the signal,
633  // which is a little clumsy because I ideally wanted to leave this up to the
634  // likelihood container. But doing this locks the postprocessor into using
635  // the GAMBIT signal handling methods. TODO: is there another way?
636 
637  //if(verbose) logger() << LogTags::debug << LogTags::scanner << "Chunk to process has length zero! Will check for shutdown signals and then exit chunk process loop" << EOM;
638 
640  if(not quit)
641  {
642  // Inelegant bit @{
643  if(signaldata().check_if_shutdown_begun())
644  {
646  quit = true;
647  }
648  // @}
649  }
650 
651  loopi=mychunk.end; // Set loop counter to end of batch to satisfy checks at end of this function
652  }
653  else
654  {
655  PPIDpair current_point = getReader().get_current_point();
656  loopi = getReader().get_current_index();
657 
658  //iif(verbose) logger() << LogTags::debug << LogTags::scanner << "Starting loop over old points ("<<total_length<<" in total)" << std::endl;
659  //std::cout << "This task (rank "<<rank<<" of "<<numtasks<<"), will process iterations "<<mychunk.start<<" through to "<<mychunk.end<<", excluding any points that may have already been processed as recorded by resume data. This leaves "<<mychunk.eff_length<<" points for this rank to process."<<std::endl;
660 
661  // Disable auto-incrementing of pointID's in the likelihood container. We will set these manually.
663 
664  bool stop_loop = false;
665 
666  if(getReader().eoi())
667  {
668  std::cout << "Postprocessor (rank "<<rank<<") immediately reached end of input file! Skipping execution of main loop, ..."<<std::endl;
669  // We should exit with the "unexpected finish" error code if this has happened.
670  }
671 
672  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Searching dataset for chunk ["<<mychunk.start<<" -> "<<mychunk.end<<"] (effective length "<<mychunk.eff_length<<")"<<EOM;
673  ChunkSet::iterator current_done_chunk=done_chunks.begin(); // Used to skip past points that are already done
674  while(not stop_loop) // while not end of input
675  {
676  // std::cout << "Current index: "<<getReader().get_current_index()<<std::endl;
677  // std::cout << "Current loopi: "<<loopi<<std::endl;
678  // std::cout << "Current printer pointID: "<<Gambit::Printers::get_point_id()<<std::endl;
679  // std::cout << "eoi?: "<<getReader().eoi()<<std::endl;
680 
681  // Cancel processing of iterations beyond our assigned range
682  if(loopi>mychunk.end)
683  {
684  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Reached the end of our batch, stopping iteration. (loopi:"<<loopi<<", mychunk.end:"<<mychunk.end<<")" << EOM;
685  loopi--; // Return counter to the last index that we actually processed.
686  break; // Exit the loop
687  }
688 
689  // Send early quit signal if we unexpectedly hit the end of the input file
690  if(getReader().eoi())
691  {
692  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Unexpectedly hit end of input file!" <<EOM;
693  quit = true;
694  }
695 
696  // Inelegant signal checking. TODO: Think about how this can be shifted over to ScannerBit
697  if(not quit)
698  {
700  }
701 
702  if(not quit)
703  {
704  // Inelegant bit @{
705  if(signaldata().check_if_shutdown_begun())
706  {
708  quit = true;
709  }
710  // @}
711  }
712 
713  if(not quit)
714  {
715  unsigned int MPIrank = current_point.rank;
716  unsigned long long pointID = current_point.pointID;
717 
719  << "Current point: "<<MPIrank<<", "<<pointID<<std::endl
720  << "Current index: "<<getReader().get_current_index()<<std::endl
721  << "Current loopi: "<<loopi<<EOM;
722 
723  // Make sure we didn't somehow get desynchronised from the reader's internal index
724  if(loopi!=getReader().get_current_index())
725  {
726  std::ostringstream err;
727  err << "The postprocessor has become desynchronised with its assigned reader object! The postprocessor index is "<<loopi<<" but the reader index is "<<getReader().get_current_index()<<"! This indicates a bug in either the postprocessor or the reader, please report it";
728  Scanner::scan_error().raise(LOCAL_INFO,err.str());
729  }
730 
731  // Check whether the calling code wants us to shut down early
733  if(quit)
734  {
735  // Need to save data about which points have been processed, so we
736  // can resume processing from here.
737  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Postprocessor (rank "<<rank<<") received quit signal! Aborting run." << EOM;
738  stop_loop = true;
739  }
740 
741  // If we have moved past the end of the currently selected batch of "done"
742  // points, then select the next batch (if there are any left)
743  // if(current_done_chunk!=done_chunks.end()) std::cout << "Rank "<<rank<<": loopi="<<loopi<<", current_done_chunk=["<<current_done_chunk->start<<","<<current_done_chunk->end<<"]"<<std::endl;
744  while(current_done_chunk!=done_chunks.end() and loopi > current_done_chunk->end)
745  {
746  //std::cout<<"Rank "<<rank<<": loopi > current_done_chunk->end ("<<loopi<<" > "<<current_done_chunk->end<<"). Moving to next done chunk..."<<std::endl;
747  ++current_done_chunk;
748  //std::cout<<"Rank "<<rank<<": ...which is ["<<current_done_chunk->start<<","<<current_done_chunk->end<<"]"<<std::endl;
749  }
750 
751  // Skip loop ahead to the batch of points we are assigned to process,
752  // and skip any points that are already processed;
753  if(loopi<mychunk.start or (current_done_chunk!=done_chunks.end() and current_done_chunk->iContain(loopi)))
754  {
755  //std::cout<<"Skipping point (not in our batch)"<<std::endl;
756  //std::cout<<"(loopi=="<<loopi<<", mychunk.start="<<mychunk.start<<", current_done_chunk.start="<<current_done_chunk->start<<", current_done_chunk.end="<<current_done_chunk->end<<")"<<std::endl;
757  current_point = getReader().get_next_point();
758  loopi++;
759  continue;
760  }
761 
762  // Make sure that the first point we *don't* skip is the correct starting point
763  if(not found_chunk_start)
764  {
765  if(loopi==mychunk.start)
766  {
767  found_chunk_start=true;
768  }
769  else
770  {
771  std::ostringstream err;
772  err<<"The first point in this batch to be processed does not have the correct index! (mychunk.start="<<mychunk.start<<", but loopi="<<loopi<<"). This is a bug, please report it.";
773  Scanner::scan_error().raise(LOCAL_INFO,err.str());
774  }
775  }
776 
777  if((ppi % update_interval) == 0 and ppi!=0)
778  {
779  // Progress report
780  std::cout << "Rank "<<rank<<" has processed "<<ppi<<" of "<<mychunk.eff_length<<" points ("<<100*ppi/mychunk.eff_length<<"%, with "<<100*n_passed/ppi<<"% passing all cuts)"<<std::endl;
781  }
782  ppi++; // Processing is go, update counter.
783 
784  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Found candidate point for postprocessing:"<<std::endl
785  << " loopi = "<<loopi<<", point = ("<<current_point.rank<<", "<<current_point.pointID<<")"<<EOM;
786 
787  // Data about current point in input file
788  if(current_point == Printers::nullpoint)
789  {
790  // No valid data here, go to next point
791  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Skipping point (no valid data here)"<<EOM;
792  current_point = getReader().get_next_point();
793  loopi++;
794  continue;
795  }
796  //std::cout << "Rank: "<<rank<<", Ready to process! current iteration: "<<loopi<<", current point:" << MPIrank << ", " << pointID << std::endl;
797 
799 
800  // Storage for retrieved parameters
801  std::unordered_map<std::string, double> outputMap;
802 
803  // Extract the model parameters
804  bool valid_modelparams = get_ModelParameters(outputMap);
805 
806  // Check if valid model parameters were extracted. If not, something may be wrong with the input file, or we could just be at the end of a buffer (e.g. in HDF5 case). Can't tell the difference, so just skip the point and continue.
807  if(not valid_modelparams)
808  {
809  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Skipping point as it has no valid ModelParameters" <<EOM;
810  current_point = getReader().get_next_point();
811  loopi++;
812  continue;
813  }
814 
816 
817  // Determine if model point passes the user-requested cuts
818  // This is a little tricky because we don't know the type of the input dataset.
819  // For now we will restrict the system so that it only works for datasets with
820  // type 'double' (which is most stuff). We check for this earlier, so here we
821  // can just assume that the requested datasets have the correct type.
822 
823  bool cuts_passed = true; // Will be set to false if any cut is failed, or a required entry is invalid
824  for(std::map<std::string,double>::iterator it = cut_less_than.begin();
825  it!=cut_less_than.end(); ++it)
826  {
827  if(cuts_passed)
828  {
829  std::string in_label = it->first;
830  double cut_value = it->second;
831  double buffer;
832  bool valid = getReader().retrieve(buffer, in_label);
833  if(valid)
834  {
835  cuts_passed = (buffer <= cut_value);
836  }
837  else
838  {
839  cuts_passed = false;
840  }
841  }
842  }
843 
844  for(std::map<std::string,double>::iterator it = cut_greater_than.begin();
845  it!=cut_greater_than.end(); ++it)
846  {
847  if(cuts_passed)
848  {
849  std::string in_label = it->first;
850  double cut_value = it->second;
851  double buffer;
852  bool valid = getReader().retrieve(buffer, in_label);
853  if(valid)
854  {
855  cuts_passed = (buffer >= cut_value);
856  }
857  else
858  {
859  cuts_passed = false;
860  }
861  }
862  }
863 
864  if(cuts_passed) // Else skip new calculations and go straight to copying old results
865  {
866  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Point passed all cuts, beginning new observable/likelihood calculations"<<EOM;
867  n_passed += 1; // Counter for number of points which have passed all the cuts.
868  // Before calling the likelihood function, we need to set up the printer to
869  // output correctly. The auto-incrementing of pointID's cannot be used,
870  // because we need to match the old scan results. So we must set it manually.
871  // This is currently a little clunky but it works. Make sure to have turned
872  // off auto incrementing (see above).
873  // The printer should still print to files split according to the actual rank, this
874  // should only change the assigned pointID pair tag. Which should already be
875  // properly unambiguous if the original scan was done properly.
876  // Note: This might fail for merged datasets from separate runs. Not sure what the solution
877  // for that is.
878  getLogLike()->setRank(MPIrank); // For purposes of printing only
879  getLogLike()->setPtID(pointID);
880 
881  // We feed the unit hypercube and/or transformed parameter map into the likelihood container. ScannerBit
882  // interprets the map values as post-transformation and not apply a prior to those, and ensures that the
883  // length of the cube plus number of transformed parameters adds up to the total number of parameter.
884  double new_logL = getLogLike()(outputMap); // Here we supply *only* the map; no parameters to transform.
885 
886  // Print the index of the point in the input dataset, so that we can easily figure out later which ones
887  // were postprocessed
888  //std::cout<<"Rank "<<rank<<": Printing new data for point ("<<MPIrank<<", "<<pointID<<")"<<std::endl;
889  getPrinter().print(loopi, "input_dataset_index", MPIrank, pointID);
890 
891  // Add old likelihood components as requested in the inifile
892  if (not add_to_logl.empty() or not subtract_from_logl.empty())
893  {
894 
895  double combined_logL = new_logL;
896  bool is_valid(true);
897 
898  for(auto it=add_to_logl.begin(); it!=add_to_logl.end(); ++it)
899  {
900  std::string old_logl = *it;
901  if(std::find(data_labels.begin(), data_labels.end(), old_logl)
902  == data_labels.end())
903  {
904  std::ostringstream err;
905  err << "In the input YAML file, you requested to 'add_to_like' the component '"<<old_logl<<"' from your input data file, however this does not match any of the data labels retrieved from the input data file you specified. Please check the spelling, path, etc. and try again.";
906  Scanner::scan_error().raise(LOCAL_INFO,err.str());
907  }
908  if(getReader().get_type(*it) != Gambit::Printers::getTypeID<double>())
909  {
910  std::ostringstream err;
911  err << "In the input YAML file, you requested 'add_to_like' component '"<<old_logl<<"' from your input data file, however this data cannot be retrieved as type 'double', therefore it cannot be used as a likelihood component. Please enter a different data label and try again.";
912  Scanner::scan_error().raise(LOCAL_INFO,err.str());
913  }
914 
915  double old_logl_value;
916  is_valid = is_valid and getReader().retrieve(old_logl_value, old_logl);
917  if(is_valid)
918  {
919  // Combine with the new logL component
920  combined_logL += old_logl_value;
921  }
922  // Else old likelihood value didn't exist for this point; cannot combine with non-existent likelihood, so don't print the reweighted value.
923  }
924 
925  // Now do the same thing for the components we want to subtract.
926  for(auto it=subtract_from_logl.begin(); it!=subtract_from_logl.end(); ++it)
927  {
928  std::string old_logl = *it;
929  if(std::find(data_labels.begin(), data_labels.end(), old_logl)
930  == data_labels.end())
931  {
932  std::ostringstream err;
933  err << "In the input YAML file, you requested to 'subtract_from_like' the component '"<<old_logl<<"' from your input data file, however this does not match any of the data labels retrieved from the input data file you specified. Please check the spelling, path, etc. and try again.";
934  Scanner::scan_error().raise(LOCAL_INFO,err.str());
935  }
936  if(getReader().get_type(*it) != Gambit::Printers::getTypeID<double>())
937  {
938  std::ostringstream err;
939  err << "In the input YAML file, you requested 'subtract_from_like' component '"<<old_logl<<"' from your input data file, however this data cannot be retrieved as type 'double', therefore it cannot be used as a likelihood component. Please enter a different data label and try again.";
940  Scanner::scan_error().raise(LOCAL_INFO,err.str());
941  }
942 
943  double old_logl_value;
944  is_valid = is_valid and getReader().retrieve(old_logl_value, old_logl);
945  if(is_valid)
946  {
947  // Combine with the new logL component, subtracting this time
948  combined_logL -= old_logl_value;
949  }
950  // Else old likelihood value didn't exist for this point; cannot combine with non-existent likelihood, so don't print the reweighted value.
951  }
952 
953  // Output the new reweighted likelihood (if all components were valid)
954  if(is_valid) getPrinter().print(combined_logL, reweighted_loglike_name, MPIrank, pointID);
955 
956  }
957 
970  }
971  else if(not discard_points_outside_cuts)
972  {
973  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Point outside cuts, but discard_points_outside_cuts is false, so we will copy the old data for this point. But no new calculations will be done."<<EOM;
974 
978  //std::cout<<"Rank "<<rank<<": Copying existing data for point ("<<MPIrank<<", "<<pointID<<")"<<std::endl;
979  getPrinter().print(MPIrank, "MPIrank", MPIrank, pointID);
980  getPrinter().print(pointID, "pointID", MPIrank, pointID);
981 
982  // Print the index of the point in the input dataset, so that we can easily figure out later which ones
983  // were postprocessed
984  getPrinter().print(loopi, "input_dataset_index", MPIrank, pointID);
985 
986  // Now the modelparameters
987  for(auto it=req_models.begin(); it!=req_models.end(); ++it)
988  {
989  ModelParameters modelparameters;
990  std::string model = it->first;
991  bool is_valid = getReader().retrieve(modelparameters, model);
992  if(is_valid)
993  {
994  // Use the OutputName set by the reader to preserve the original naming of the modelparameters.
995  getPrinter().print(modelparameters, modelparameters.getOutputName(), MPIrank, pointID);
996  }
997  }
998  }
999  else
1000  {
1001  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Skipping point (outside cuts, no data to be copied)"<<EOM;
1002  }
1003 
1005  if(not cuts_passed and discard_points_outside_cuts)
1006  {
1007  // Don't copy in this case, just discard the old data.
1008  //std::cout<<"Rank "<<rank<<": Discarding old data for point ("<<MPIrank<<", "<<pointID<<") (didn't pass the cuts)"<<std::endl;
1009  }
1010  else
1011  {
1012 
1013  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Copying old data for this point to new output file" << EOM;
1014 
1015  //std::cout<<"Rank "<<rank<<": Copying existing data for point ("<<MPIrank<<", "<<pointID<<")"<<std::endl;
1016 
1017  for(std::set<std::string>::iterator it = data_labels_copy.begin(); it!=data_labels_copy.end(); ++it)
1018  {
1019  // Check if this input label has been mapped to a different output label.
1020  std::string in_label = *it;
1021  std::map<std::string,std::string>::iterator jt = renaming_scheme.find(in_label);
1022  if(jt != renaming_scheme.end())
1023  {
1024  // Found match! Do the renaming
1025  std::string out_label = jt->second;
1026  //std::cout << "Copying data from "<<in_label<<", to output name "<<out_label<<", for point ("<<MPIrank<<", "<<pointID<<")" <<std::endl;
1027  getReader().retrieve_and_print(in_label,out_label,getPrinter(), MPIrank, pointID);
1028  }
1029  else
1030  {
1031  // No match, keep the old name
1032  //std::cout << "Copying data from "<<in_label<<" for point ("<<MPIrank<<", "<<pointID<<")" <<std::endl;
1033  getReader().retrieve_and_print(in_label,getPrinter(), MPIrank, pointID);
1034  }
1035  }
1036  }
1037 
1039  if(not stop_loop)
1040  {
1041  current_point = getReader().get_next_point();
1042  loopi++;
1043  }
1044  }
1045  else
1046  {
1047  stop_loop = true;
1048  }
1049  }
1050  }
1051 
1052  // Check if we finished because of reaching the end of the input
1053  if(getReader().eoi() and loopi!=mychunk.end)
1054  {
1055  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Postprocessor (rank "<<rank<<") reached the end of the input file! (debug: was this the end of our batch? (loopi="<<loopi<<", mychunk.end="<<mychunk.end<<", total_length = "<<total_length<<")"<<EOM;
1056 
1057  //std::cout << "Postprocessor (rank "<<rank<<") reached the end of the input file! (debug: was this the end of our batch? (loopi="<<loopi<<", mychunk.end="<<mychunk.end<<", total_length = "<<total_length<<")"<<std::endl;
1058 
1059  }
1060 
1061  // We now set the return code to inform the calling code of why we stopped.
1062  // 0 - Finished processing all the points we were assigned
1063  // 1 - Saw quit flag and so stopped prematurely
1064  // 2 - Encountered end of input file unexpectedly
1065  int exit_code = 0;
1066  if(quit)
1067  {
1068  exit_code = 1;
1069  if(verbose) logger() << LogTags::debug << LogTags::scanner << "Postprocessor (rank "<<rank<<") received quit signal! Aborting run." << EOM;
1070  }
1071  else if(getReader().eoi() and loopi!=mychunk.end)
1072  {
1073  exit_code = 2;
1074  }
1075  if(exit_code==0)
1076  {
1077  // Make sure the exit state makes sense
1078  if(loopi!=mychunk.end)
1079  {
1080  std::ostringstream err;
1081  err<<"According to the exit code, out batch is supposedly finished correctly, however the loopi counter is not equal to the proper end of our batch (loopi="<<loopi<<", mychunk.end="<<mychunk.end<<")";
1082  Scanner::scan_error().raise(LOCAL_INFO,err.str());
1083  }
1084  }
1085  return exit_code;
1086  }
1087 
1088  // Extract model parameters from the reader
1089  bool PPDriver::get_ModelParameters(std::unordered_map<std::string, double>& outputMap)
1090  {
1091  bool valid_modelparams = true;
1092  for(auto it=req_models.begin(); it!=req_models.end(); ++it)
1093  {
1094 
1095  ModelParameters modelparameters;
1096  std::string model = it->first;
1097  bool is_valid = getReader().retrieve(modelparameters, model);
1098  if(not is_valid)
1099  {
1100  valid_modelparams = false;
1101  //std::cout << "ModelParameters marked 'invalid' for model "<<model<<"; point will be skipped." << std::endl;
1102  }
1104  //std::cout << "Retrieved parameters for model '"<<model<<"' at point:" << std::endl;
1105  //std::cout << " ("<<MPIrank<<", "<<pointID<<") (rank,pointID)" << std::endl;
1106  //const std::vector<std::string> names = modelparameters.getKeys();
1107  //for(std::vector<std::string>::const_iterator
1108  // kt = names.begin();
1109  // kt!= names.end(); ++kt)
1110  //{
1111  // std::cout << " " << *kt << " : " << modelparameters[*kt] << std::endl;
1112  //}
1114 
1115  // Check that all the required parameters were retrieved
1116  // Could actually do this in the constructor for the scanner plugin, would be better, but a little more complicated. TODO: do this later.
1117  std::vector<std::string> req_pars = it->second;
1118  std::vector<std::string> retrieved_pars = modelparameters.getKeys();
1119  for(auto jt = req_pars.begin(); jt != req_pars.end(); ++jt)
1120  {
1121  std::string par = *jt;
1122  if(std::find(retrieved_pars.begin(), retrieved_pars.end(), par)
1123  == retrieved_pars.end())
1124  {
1125  std::ostringstream err;
1126  err << "Error! Reader could not retrieve the required paramater '"<<par<<"' for the model '"<<model<<"' from the supplied data file! Please check that this parameter indeed exists in that file." << std::endl;
1127  Scanner::scan_error().raise(LOCAL_INFO,err.str());
1128  }
1129 
1130  // If it was found, add it to the return map
1131  outputMap[ longname[model][par] ] = modelparameters[par];
1132  }
1133  }
1134  return valid_modelparams;
1135  }
1136 
1137  // Define the set of points that can be auto-skipped
1138  void PPDriver::set_done_chunks(const ChunkSet& in_done_chunks)
1139  {
1140  done_chunks = in_done_chunks;
1141  }
1142 
1145  {
1146  std::size_t chunk_start = next_point;
1147  std::size_t chunk_end = next_point;
1148  std::size_t chunk_length = 0;
1149  bool stop = false;
1150  bool found_start = false;
1151 
1152  if(next_point > total_length)
1153  {
1154  // Do nothing, no points left to process. Return special stop-signal chunk.
1155  chunk_start = 0;
1156  chunk_end = 0;
1157  }
1158  else
1159  {
1160  // Build chunk to pre-set size. We select a new chunk by moving forward
1161  // through the dataset, but skipping points that have already been processed.
1162  while(not stop)
1163  {
1164  bool point_is_done = false;
1165  for(ChunkSet::const_iterator donechunk=done_chunks.begin();
1166  donechunk!=done_chunks.end(); ++donechunk)
1167  {
1168  // Check if the next scheduled point has been processed previously
1169  if(donechunk->iContain(next_point)) point_is_done = true;
1170  }
1171 
1172  if(not point_is_done)
1173  {
1174  chunk_length++; // Point needs to be processed, count it towards total processing length
1175  if(not found_start)
1176  {
1177  chunk_start = next_point; // Marking the starting point if this is the first unprocessed point to be found
1178  found_start = true;
1179  }
1180  }
1181 
1182  if(next_point == total_length)
1183  {
1184  // Stop early because we hit the end of the dataset
1185  chunk_end = total_length;
1186  stop = true;
1187  }
1188  else if(chunk_length == chunksize)
1189  {
1190  // Chunk contains enough unprocessed points; stop adding more.
1191  chunk_end = next_point;
1192  stop = true;
1193  }
1194  else if(next_point > total_length)
1195  {
1196  std::ostringstream err;
1197  err << "Error generating chunk to be processed; next_point exceeds total length of dataset. Something has gone wrong for this to happen, please report this as a postprocessor bug." << std::endl;
1198  Scanner::scan_error().raise(LOCAL_INFO,err.str());
1199  }
1200  else if(chunk_length > chunksize)
1201  {
1202  std::ostringstream err;
1203  err << "Error generating chunk to be processed; length of generated chunk exceeds allocated size. Something has gone wrong for this to happen, please report this as a postprocessor bug." << std::endl;
1204  Scanner::scan_error().raise(LOCAL_INFO,err.str());
1205  }
1206 
1207  next_point++;
1208  }
1209  }
1210 
1211  // Return to the chunk to be processed
1212  //std::cout<<"chunk_start :"<<chunk_start<<std::endl;
1213  //std::cout<<"chunk_end :"<<chunk_end<<std::endl;
1214  //std::cout<<"chunk_length:"<<chunk_length<<std::endl;
1215  return Chunk(chunk_start,chunk_end,chunk_length);
1216  }
1217 
1219  unsigned long long PPDriver::next_point_index()
1220  {
1221  return next_point;
1222  }
1223 
1225  unsigned long long PPDriver::get_total_length()
1226  {
1227  return total_length;
1228  }
1229 
1230 
1232  }
1233 }
std::set< std::string > new_params
Names of new output to be printed, i.e. output labels not present in the input file.
void print(T const &in, const std::string &label, const int vertexID, const uint rank, const ulong pointID)
std::size_t update_interval
Number of iterations between progress reports. &#39;0&#39; means no updates.
std::vector< std::string > subtract_from_logl
List of likelihoods in old output to be subtracted from the newly computed likelihood.
std::string reweighted_loglike_name
The label to assign to the results of add_to_like and subtract_from_like operations.
Printers::BaseBasePrinter * printer
The printer for the primary output stream of the scan.
Printers::BaseBaseReader & getReader()
Safe accessors for pointer data.
unsigned long long int pointID
unsigned int rank
std::vector< std::string > subtract_from_logl
unsigned long long total_length
Total length of input dataset.
Chunk get_new_chunk()
Compute start/end indices for a given rank process, given previous "done_chunk" data.
PPDriver()
PPDriver member function definitions.
Struct to describe start and end indices for a chunk of data.
Definition: chunks.hpp:21
std::set< std::string > data_labels_copy
ChunkSet get_done_points(const std::string &filebase)
Read through resume data files and reconstruct which chunks of points have already been processed...
std::map< std::string, double > cut_greater_than
void check_settings()
Check postprocessor settings for consistency and general validity.
virtual PPIDpair get_current_point()=0
bool discard_points_outside_cuts
std::set< Chunk, ChunkLess > ChunkSet
Definition: chunks.hpp:69
Printers::BaseBasePrinter & getPrinter()
std::map< std::string, std::string > renaming_scheme
Map for renaming old datasets in the new output Keys are "in_label", values are "out_label".
#define LOCAL_INFO
Definition: local_info.hpp:34
std::string logl_purpose_name
Label assigned to the output of the likelihood container.
std::size_t eff_length
Definition: chunks.hpp:25
std::size_t end
Definition: chunks.hpp:24
bool verbose
bool discard_old_logl
Allow old likelihood components to be overwritten by newly calculated values?
Logging access header for GAMBIT.
EXPORT_SYMBOLS bool & auto_increment()
Global flag to indicate if auto-incrementing of the PointID by the likelihood container is allowed...
Definitions of new MPI datatypes needed by printers.
std::map< std::string, std::vector< std::string > > req_models
Models required by the scan.
virtual PPIDpair get_next_point()=0
"Postprocessing" scanner plugin.
GAMBIT signal handling functions.
int run_main_loop(const ChunkSet &done_chunks)
The main run loop.
General small utility functions.
std::string logl_purpose_name
bool verbose
If true, create lots of log messages explaining exactly what the postprocessor is doing...
EXPORT_SYMBOLS bool startsWith(const std::string &str, const std::string &prefix, bool case_sensitive=true)
Checks whether `str&#39; begins with `prefix&#39;.
std::size_t chunksize
Chunk get_effective_chunk(const std::size_t total_length, const unsigned int rank, const unsigned int numtasks)
Get &#39;effective&#39; start and end positions for a processing batch i.e.
std::map< std::string, std::string > renaming_scheme
std::set< std::string > data_labels
bool get_ModelParameters(std::unordered_map< std::string, double > &outputMap)
std::set< std::string > all_params
Names of all output that the primary printer knows about at startup (things GAMBIT plans to print fro...
void set_done_chunks(const ChunkSet &done_chunks)
EXPORT_SYMBOLS SignalData & signaldata()
Retrieve global instance of signal handler options struct.
ChunkSet merge_chunks(const ChunkSet &)
Simplify a ChunkSet by merging chunks which overlap.
const Logging::endofmessage EOM
Explicit const instance of the end of message struct in Gambit namespace.
Definition: logger.hpp:100
Printers::BaseBaseReader * reader
The reader object in use for the scan.
const bool verbose
Definition: logging.cpp:52
std::set< std::string > all_params
EXPORT_SYMBOLS Logging::LogMaster & logger()
Function to retrieve a reference to the Gambit global log object.
Definition: logger.cpp:95
Printers::BaseBasePrinter printer
Type of the printer objects.
virtual ulong get_current_index()=0
START_MODEL dNur_CMB r
std::map< std::string, double > cut_less_than
bool retrieve(T &out, const std::string &label, const uint rank, const ulong pointID)
Printer-retrieve dispatch function.
unsigned long long chunksize
Size of chunks to distribute to worker processes.
std::set< std::string > data_labels_copy
Labels of output datasets to be copied.
bool point_done(const ChunkSet done_chunks, size_t index)
Helper functions for performing resume related tasks.
std::vector< std::string > add_to_logl
List of likelihoods in old output to be added to the newly computed likelihood.
std::size_t update_interval
std::map< std::string, double > cut_greater_than
std::string getOutputName() const
EXPORT_SYMBOLS std::vector< str > delimiterSplit(str s, str delim)
Split a string into a vector of strings, using a delimiter, and removing any whitespace around the de...
std::size_t start
Definition: chunks.hpp:23
unsigned long long next_point
Next point scheduled to be distributed for processing.
EXPORT_SYMBOLS error & scan_error()
Scanner errors.
Options object for PPDriver See matching options in PPDriver class for description.
std::map< std::string, std::map< std::string, std::string > > longname
Map to retrieve the "model::parameter" version of the parameter name.
Class for holding model parameters.
bool discard_points_outside_cuts
Flag to throw away points that don&#39;t pass the cuts (rather than copying them un-processed) ...
std::map< std::string, double > cut_less_than
Cut maps, for selecting only points in the input datasets which pass certain criteria.
std::string reweighted_loglike_name
EXPORT_SYMBOLS const PPIDpair nullpoint
Define &#39;nullpoint&#39; const.
EXPORT_SYMBOLS pluginInfo plugin_info
Access Functor for plugin info.
bool retrieve_and_print(const std::string &label, BaseBasePrinter &printer, const uint rank, const ulong pointID)
Retrieve and directly print data to new output.
unsigned long long get_total_length()
Return total length of input dataset (mainly to track progress)
pointID / process number pair Used to identify a single parameter space point
TODO: see if we can use this one:
Definition: Analysis.hpp:33
std::vector< std::string > getKeys() const
Get parameter keys (names), probably for external iteration.
unsigned long long next_point_index()
Return index of next point to be distributed for processing (mainly to track progress) ...
likelihood container for scanner plugins.
std::vector< std::string > add_to_logl
bool discard_old_logl
virtual std::size_t get_type(const std::string &label)=0
Get type information for a data entry, i.e.
std::set< std::string > data_labels
Labels of all output datasets.
Scanner::like_ptr LogLike
The likelihood container plugin.
Scanner::like_ptr getLogLike()
std::string root