gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-252-gf9a3f78
a Global And Modular Bsm Inference Tool
gambit.cpp
Go to the documentation of this file.
1 // GAMBIT: Global and Modular BSM Inference Tool
2 // *********************************************
15 
16 #include <csignal>
17 
18 #include "gambit/Core/gambit.hpp"
20 
21 
22 using namespace Gambit;
23 using namespace LogTags;
24 
25 #ifdef WITH_MPI
26  bool use_mpi_abort = true; // Set later via inifile value
27 #endif
28 
30 void do_cleanup()
31 {
32  Gambit::Scanner::Plugins::plugin_info.dump(); // Also calls printer finalise() routine
33 }
34 
35 
37 int main(int argc, char* argv[])
38 {
39 
40  std::set_terminate(terminator);
41  cout << std::setprecision(Core().get_outprec());
42 
43  // Default exit behaviour in cases where no exceptions are raised
44  int return_value(EXIT_SUCCESS);
45 
46  #ifdef WITH_MPI
47  bool allow_finalize(true);
48  GMPI::Init();
49  #endif
50 
53  #ifdef WITH_MPI
54  {
55  GMPI::Comm temp_comm;
56  int rank = temp_comm.Get_rank();
57  if( getenv("GAMBIT_MPI_DEBUG") != NULL )
58  {
59  fprintf(stderr, "pid %li (rank %i) waiting for debugger \n", (long)getpid(), rank);
60  if( rank==0 )
61  {
62  volatile int i = 0;
63  while(i == 0) { /* change ’i’ in the debugger */ }
64  fprintf(stderr, "Variable 'i' changed externally; resuming execution! \n");
65  }
66  }
67  temp_comm.Barrier();
68  // All processes wait at the barrier until process 0 is "released" by debugger.
69  // If try/catch etc needs to be set for other processes, do those first before
70  // releasing process zero.
71  }
72  #endif
73 
74  { // Scope to ensure that all MPI communicators get destroyed before Finalize is called.
75 
76  // Set up signal handling function
77  // We attempt a clean shutdown on any of these signals
78  signal(SIGTERM, sighandler_soft);
79  signal(SIGINT, sighandler_soft);
80  signal(SIGUSR1, sighandler_soft);
81  signal(SIGUSR2, sighandler_soft);
82 
83  #ifdef WITH_MPI
84  GMPI::Comm errorComm;
86  errorComm.dup(MPI_COMM_WORLD,"errorComm"); // duplicates the COMM_WORLD context
87  const int ERROR_TAG=1; // Tag for error messages
88  errorComm.mytag = ERROR_TAG;
89  signaldata().set_MPI_comm(&errorComm); // Provide a communicator for signal handling routines to use.
91  GMPI::Comm scanComm;
92  scanComm.dup(MPI_COMM_WORLD,"scanComm"); // duplicates the COMM_WORLD context
93  Scanner::Plugins::plugin_info.initMPIdata(&scanComm);
95  int rank = scanComm.Get_rank();
96  int size = scanComm.Get_size();
97  #else
98  int rank = 0;
99  //int size = 0; // Unused if not WITH_MPI
100  #endif
101 
102  // Check number of OpenMP threads used
103  int n_omp_threads = 1;
104  #pragma omp parallel
105  {
106  if(omp_get_thread_num()==0) n_omp_threads = omp_get_num_threads();
107  }
108 
109  try
110  {
111  // Parse command line arguments, launching into the appropriate diagnostic mode
112  // if the argument passed warrants it. Otherwise just get the filename.
113  const str filename = Core().run_diagnostic(argc,argv);
114 
115  if (rank == 0)
116  {
117  cout << endl << "Starting GAMBIT" << endl;
118  cout << "----------" << endl;
119  #ifdef WITH_MPI
120  cout << "Running in MPI-parallel mode with "<<size<<" processes" << endl;
121  #else
122  cout << "WARNING! Running in SERIAL (no MPI) mode! Recompile with -DWITH_MPI=1 for MPI parallelisation" << endl;
123  #endif
124  cout << "----------" << endl;
125  cout << "Running with "<< n_omp_threads << " OpenMP threads per MPI process (set by the environment variable OMP_NUM_THREADS)." << endl;
126  if(Core().found_inifile) cout << "YAML file: "<< filename << endl;
127  }
128 
129  std::vector<std::string> arguments(argv, argv + argc);
130  logger() << core << "Command invoked: ";
131  for(int i=0;i<argc;i++){ logger() << arguments[i] << " "; }
132  logger() << endl;
133  logger() << core << "Starting GAMBIT" << EOM;
134  logger() << core;
135  #ifdef WITH_MPI
136  logger() << "Running in MPI-parallel mode with "<<size<<" processes" << endl;
137  #else
138  logger() << "WARNING! Running in SERIAL (no MPI) mode!" << endl;
139  #endif
140  logger() << "Running with "<< n_omp_threads << " OpenMP threads per MPI process (set by the environment variable OMP_NUM_THREADS)." << EOM;
141  if( Core().resume ) logger() << core << "Attempting to resume scan..." << EOM;
142  logger() << core << "Registered module functors [Core().getModuleFunctors().size()]: ";
143  logger() << Core().getModuleFunctors().size() << endl;
144  logger() << "Registered backend functors [Core().getBackendFunctors().size()]: ";
145  logger() << Core().getBackendFunctors().size() << EOM;
146 
147  // Read YAML file, which also initialises the logger.
148  IniParser::IniFile iniFile;
149  iniFile.readFile(filename);
150 
151  // Check if user wants to disable use of MPI_Abort (since it does not work correctly in all MPI implementations)
152  #ifdef WITH_MPI
153  use_mpi_abort = iniFile.getValueOrDef<bool>(true, "use_mpi_abort");
154  #endif
155 
156  // Initialise the random number generator, letting the RNG class choose its own defaults.
157  Options rng(iniFile.getValueOrDef<YAML::Node>(YAML::Node(), "rng"));
158  str generator = rng.getValueOrDef<str>("default", "generator");
159  int seed = rng.getValueOrDef<int>(-1, "seed");
160  Random::create_rng_engine(generator, seed);
161 
162  // Determine selected model(s)
163  std::set<str> selectedmodels = iniFile.getModelNames();
164 
165  // Activate "primary" model functors
166  Core().registerActiveModelFunctors( Models::ModelDB().getPrimaryModelFunctorsToActivate( selectedmodels, Core().getPrimaryModelFunctors() ) );
167 
168  // Deactivate module functions reliant on classes from missing backends
170 
171  // Set up the printer manager for redirection of scan output.
172  Printers::PrinterManager printerManager(iniFile.getPrinterNode(),Core().resume);
173 
174  // Set up dependency resolver
175  DRes::DependencyResolver dependencyResolver(Core(), Models::ModelDB(), iniFile, Utils::typeEquivalencies(), *(printerManager.printerptr));
176 
177  // Log module function info
178  dependencyResolver.printFunctorList();
179 
180  // Do the dependency resolution
181  if (rank == 0) cout << "Resolving dependencies and backend requirements. Hang tight..." << endl;
182  dependencyResolver.doResolution();
183  if (rank == 0) cout << "...done!" << endl;
184 
185  // Check that all requested models are used for at least one computation
186  Models::ModelDB().checkPrimaryModelFunctorUsage(Core().getActiveModelFunctors());
187 
188  // Report the proposed (output) functor evaluation order
189  dependencyResolver.printFunctorEvalOrder(Core().show_runorder);
190 
191  // If true, bail out (just wanted the run order, not a scan); otherwise, keep going.
192  if (not Core().show_runorder)
193  {
194  //Define the likelihood container object for the scanner
195  Likelihood_Container_Factory factory(Core(), dependencyResolver, iniFile, *(printerManager.printerptr));
196 
197  //Make scanner yaml node
198  YAML::Node scanner_node;
199  scanner_node["Scanner"] = iniFile.getScannerNode();
200  scanner_node["Parameters"] = iniFile.getParametersNode();
201  scanner_node["Priors"] = iniFile.getPriorsNode();
202 
203  //Create the master scan manager
204  Scanner::Scan_Manager scan(scanner_node, &printerManager, &factory);
205 
206  // Set cleanup function to call during premature shutdown
208 
209  // For extra speed with fast likelihood evaluations, disable the logs while the scans runs
210  bool disable_logs_during_scan = iniFile.getValueOrDef<bool>(false, "disable_logs_during_scan");
211  if(disable_logs_during_scan) logger().disable();
212  //Do the scan!
213  logger() << core << "Starting scan." << EOM;
214  if (rank == 0) std::cerr << "Starting scan." << std::endl;
215  scan.Run(); // Note: the likelihood container will unblock signals when it is safe to receive them.
216  logger().enable(); // Turn logs back on (in case they were disabled for speed)
217  // Check why we have exited the scanner; scan may have been terminated early by a signal.
218  // We assume here that because the scanner has exited that it has already down whatever
219  // cleanup it requires, including finalising the printers, i.e. the 'do_cleanup()' function will NOT run.
220  if(signaldata().shutdown_begun())
221  {
222  logger() << "GAMBIT has performed a controlled early shutdown due to early termination of the scanner plugin." << EOM;
223  if (rank == 0) cout << "GAMBIT has performed a controlled early shutdown." << endl << endl;
224  }
225  else
226  {
227  //Scan is done; inform signal handlers
229  logger() << "GAMBIT run completed successfully." << EOM;
230  if (rank == 0) cout << endl << "GAMBIT has finished successfully!" << endl << endl;
231  }
232  }
233 
234  }
235 
238  catch (const SilentShutdownException& e)
239  {
240  // No need to do anything, just let program shut down normally from here
241  }
242 
246  catch (const SoftShutdownException& e)
247  {
248  if (not logger().disabled())
249  {
250  std::ostringstream ss;
251  ss << e.what() << endl;
252  ss << "GAMBIT has performed a controlled early shutdown." << endl;
253  if(rank == 0) cout << ss.str();
254  logger() << ss.str() << signaldata().display_received_signals() << EOM;
255  }
256  // Let program shutdown normally from here
257  }
258 
263  catch (const HardShutdownException& e)
264  {
265  if (not logger().disabled())
266  {
267  std::ostringstream ss;
268  ss << e.what() << endl;
269  ss << "GAMBIT has shutdown (but could not finalise or abort MPI)." << endl;
270  if(rank == 0) cout << ss.str();
271  logger() << ss.str() << signaldata().display_received_signals() << EOM;
272  }
273  return EXIT_SUCCESS;
274  }
275 
277  catch (const MPIShutdownException& e)
278  {
279  if (not logger().disabled())
280  {
281  std::ostringstream ss;
282  ss << e.what() << endl;
283  ss << "GAMBIT has shutdown due to an error on another process." << endl;
284  if(rank == 0) cout << ss.str();
285  logger() << ss.str() << EOM;
286  #ifdef WITH_MPI
287  allow_finalize = GMPI::PrepareForFinalizeWithTimeout(use_mpi_abort);
288  #endif
289  }
290  return_value = EXIT_FAILURE;
291  }
292 
293  catch (const std::exception& e)
294  {
295  if (not logger().disabled())
296  {
297  cerr << endl << " \033[00;31;1mFATAL ERROR\033[00m" << endl << endl;
298  cerr << "GAMBIT has exited with fatal exception: " << e.what() << endl;
299  }
300  #ifdef WITH_MPI
301  signaldata().broadcast_shutdown_signal();
302  allow_finalize = GMPI::PrepareForFinalizeWithTimeout(use_mpi_abort);
303  #endif
304  return_value = EXIT_FAILURE;
305  }
306 
307  catch (str& e)
308  {
309  cout << endl << " \033[00;31;1mFATAL ERROR\033[00m" << endl << endl;
310  cout << "GAMBIT has exited with a fatal and uncaught exception " << endl;
311  cout << "thrown from a backend code. Due to poor code design in " << e << endl;
312  cout << "the backend, the exception has been thrown as a string. " << endl;
313  cout << "If you are the author of the backend, please throw only " << endl;
314  cout << "exceptions that inherit from std::exception. Error string: " << endl;
315  cout << e << endl;
316  #ifdef WITH_MPI
317  signaldata().broadcast_shutdown_signal();
318  allow_finalize = GMPI::PrepareForFinalizeWithTimeout(use_mpi_abort);
319  #endif
320  return_value = EXIT_FAILURE;
321  }
322 
323  #ifdef WITH_MPI
324  // Synchronise all processes before discarding shutdown messages, to make sure that
325  // they have all been sent.
326  if(allow_finalize and signaldata().shutdown_begun()) //signaldata().discard_excess_shutdown_messages();
327  {
328  // Need to clean up excess shutdown messages
329  // Only do this if MPI_Finalize will be called
330  // (it is needed to prevent MPI_Finalize from locking up,
331  // but there is no point doing it if we aren't going to
332  // call MPI_Finalize)
333  signaldata().broadcast_shutdown_signal(SignalData::NO_MORE_MESSAGES); // Tell all other processes that we are done sending messages
334  signaldata().ensure_no_more_shutdown_messages();
335  logger()<<"All shutdown messages successfully Recv'd on this process!"<<EOM;
336 
337  // DEBUG: Check for unreceived messages of any tag
338  // int timeout_sec(10);
339  // errorComm.check_for_unreceived_messages(timeout_sec);
340  // scanComm.check_for_unreceived_messages(0); // No need to wait again
341  }
342 
343  #endif
344 
345  #ifdef WITH_MPI
346  if(rank == 0) cout << "Calling MPI_Finalize..." << endl;
347  #endif
348  } // End main scope; want to destruct all communicators before MPI_Finalize() is called
349 
350  #ifdef WITH_MPI
351  if (allow_finalize)
352  {
353  logger()<<"Calling MPI_Finalize..."<<EOM;
354  GMPI::Finalize();
355  logger()<<"MPI successfully finalized!"<<EOM;
356  }
357  else
358  {
359  logger()<<"MPI_Finalize has been disabled (e.g. due to an error) and will not be called."<<EOM;
360  }
361  #endif
362 
363  return return_value;
364 
365 }
gambit_core & Core()
Core accessor function.
void terminator()
Definition: terminator.cpp:21
Manager class for creating printer objects.
std::string display_received_signals()
Print to string a list of the signals received so far by this process.
void set_shutdown_begun(const sig_atomic_t emergnc=0)
Register that shutdown has begun.
ModelFunctorClaw & ModelDB()
Claw accessor function.
Special exception used during clean exit from diagnostics.
Definition: exceptions.hpp:308
void checkPrimaryModelFunctorUsage(const activemodel_map &) const
Active model functor "usefulness" checker.
Definition: models.cpp:113
TYPE getValueOrDef(TYPE def, const args &... keys) const
void registerActiveModelFunctors(const pmfVec &)
Add entries to the map of activated primary model functors.
Definition: core.cpp:232
virtual void readFile(str filename)
Read in the YAML file.
Definition: yaml_parser.cpp:33
const fVec & getBackendFunctors() const
Get a reference to the list of backend model functors.
Definition: core.cpp:247
void do_cleanup()
Cleanup function.
Definition: gambit.cpp:30
Special exception used during controlled early shutdown.
Definition: exceptions.hpp:318
const fVec & getModuleFunctors() const
Get a reference to the list of module functors.
Definition: core.cpp:241
virtual const char * what() const
Definition: exceptions.cpp:515
Special exception raised when emergency shutdown triggered via MPI.
Definition: exceptions.hpp:336
YAML::Node getPriorsNode() const
Main dependency resolver.
Special exception used during emergency early shutdown.
Definition: exceptions.hpp:327
EXPORT_SYMBOLS SignalData & signaldata()
Retrieve global instance of signal handler options struct.
bool resume
Flag to trigger "resume" mode.
Definition: core.hpp:135
const Logging::endofmessage EOM
Explicit const instance of the end of message struct in Gambit namespace.
Definition: logger.hpp:99
YAML::Node getPrinterNode() const
Logging::LogMaster & logger()
Function to retrieve a reference to the Gambit global log object.
Definition: logger.cpp:95
static void create_rng_engine(str, int=-1)
Choose the engine to use for random number generation, based on the contents of the ini file...
std::string str
Shorthand for a standard string.
Definition: Analysis.hpp:35
void sighandler_soft(int sig)
Signal handler functions.
Header for main GAMBIT executable.
A simple C++ wrapper for the MPI C bindings.
void accountForMissingClasses() const
Tell the module functors which backends are actually present.
Definition: core.cpp:258
YAML::Node getScannerNode() const
virtual const char * what() const
Definition: exceptions.cpp:510
const std::set< str > getModelNames() const
Return list of model names (without "adhoc" model!)
void dump()
Dump contents for resume.
Main inifile class.
Definition: yaml_parser.hpp:89
YAML::Node getParametersNode() const
Getters for key/value section.
virtual const char * what() const
Definition: exceptions.cpp:520
type_equivalency & typeEquivalencies()
Backend info accessor function.
EXPORT_SYMBOLS pluginInfo plugin_info
Access Functor for plugin info.
TODO: see if we can use this one:
Definition: Analysis.hpp:33
A small wrapper object for &#39;options&#39; nodes.
void set_cleanup(void_func f)
Set cleanup function.
int main(int argc, char *argv[])
Main GAMBIT program.
Definition: gambit.cpp:37
str run_diagnostic(int, char **)
Diagnostics function.
Definition: core.cpp:588