From f1436f7be9910356aaeb9da445ed5ad76bad7df9 Mon Sep 17 00:00:00 2001 From: Robert Izzard <r.izzard@surrey.ac.uk> Date: Mon, 18 Oct 2021 13:49:36 +0100 Subject: [PATCH] add option "save_ensemble_chunks" to make sure the ensemble chunks are saved : I am chasing up a grid that completely froze (and gave segfaults) ... --- binarycpython/utils/grid.py | 67 ++++++++++++++------ binarycpython/utils/grid_options_defaults.py | 5 +- 2 files changed, 51 insertions(+), 21 deletions(-) diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index fda9f533b..dee6f9693 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -1708,7 +1708,7 @@ class Population: if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL: stream_logger.debug(f"Process-{self.process_ID} is finishing.") - #################### + ########################### # Handle ensemble outut # if ensemble==1, then either directly write that data to a file, or combine everything into 1 file. @@ -1727,6 +1727,7 @@ class Population: self.persistent_data_memory_dict[self.process_ID] ) ) + if ensemble_raw_output is None: verbose_print( "Process {}: Warning! Ensemble output is empty. ".format(ID), @@ -1734,32 +1735,47 @@ class Population: 1, ) - # - if self.grid_options["combine_ensemble_with_thread_joining"] is True: - verbose_print( - "Process {}: Extracting ensemble info from raw string".format(ID), - self.grid_options["verbosity"], - 3, - ) + # save the ensemble chunk to a file + if self.grid_options["save_ensemble_chunks"] is True or self.grid_options["combine_ensemble_with_thread_joining"] is False: - ensemble_json["ensemble"] = extract_ensemble_json_from_string( - ensemble_raw_output - ) # Load this into a dict so that we can combine it later - else: - # If we do not allow this, automatically we will export this to the data_dir, in - # some formatted way output_file = os.path.join( self.custom_options["data_dir"], "ensemble_output_{}_{}.json".format( self.grid_options["_population_id"], self.process_ID ), ) + verbose_print( + "Writing process {} JSON ensemble chunk output to {} ".format( + ID, + output_file + ), + self.grid_options["verbosity"], + 1, + ) self.write_ensemble(output_file, ensemble_raw_output) - ######## - # Clean up and return + # combine ensemble chunks + if self.grid_options["combine_ensemble_with_thread_joining"] is True: + verbose_print( + "Process {}: Extracting ensemble info from raw string".format(ID), + self.grid_options["verbosity"], + 1, + ) + + ensemble_json["ensemble"] = extract_ensemble_json_from_string( + ensemble_raw_output + ) # Load this into a dict so that we can combine it later + ########################## + # Clean up and return + verbose_print( + "process {} free memory and return ".format( + ID + ), + self.grid_options["verbosity"], + 1, + ) # free store memory: _binary_c_bindings.free_store_memaddr(self.grid_options["_store_memaddr"]) @@ -1835,7 +1851,7 @@ class Population: f.write(json.dumps(summary_dict, indent=4)) f.close() - # Set status to running + # Set status to finished with open( os.path.join( self.grid_options["tmp_dir"], @@ -1847,16 +1863,29 @@ class Population: f.write("FINISHED") f.close() + verbose_print( + "process {} queue put output_dict ".format( + ID + ), + self.grid_options["verbosity"], + 1, + ) result_queue.put(output_dict) if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL: stream_logger.debug(f"Process-{self.process_ID} is finished.") - # Clean up the interpolators if they exist + # Don't do this : Clean up the interpolators if they exist # TODO: make a cleanup function for the individual threads # TODO: make sure this is necessary. Actually its probably not, because we have a centralised queue - + verbose_print( + "process {} return ".format( + ID + ), + self.grid_options["verbosity"], + 1, + ) return # Single system diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py index 24feaf9ed..57f06c4e2 100644 --- a/binarycpython/utils/grid_options_defaults.py +++ b/binarycpython/utils/grid_options_defaults.py @@ -34,6 +34,7 @@ grid_options_defaults_dict = { "multiplicity_fraction_function": 0, # Which multiplicity fraction function to use. 0: None, 1: Arenou 2010, 2: Rhagavan 2010, 3: Moe and di Stefano 2017 "tmp_dir": temp_dir(), # Setting the temp dir of the program "_main_pid": -1, # Placeholder for the main process id of the run. + "save_ensemble_chunks" : True, # Force the ensemble chunk to be saved even if we are joining a thread (just in case the joining fails) "combine_ensemble_with_thread_joining": True, # Flag on whether to combine everything and return it to the user or if false: write it to data_dir/ensemble_output_{population_id}_{thread_id}.json "compress_ensemble":False, # compress the ensemble output? "_commandline_input": "", @@ -111,8 +112,8 @@ grid_options_defaults_dict = { "_start_time_evolution": 0, # Start time of the grid "_end_time_evolution": 0, # end time of the grid "_errors_found": False, # Flag whether there are any errors from binary_c - "_errors_exceeded": False, # Flag whether the amt of errors have exceeded the limit - "_failed_count": 0, # amt of failed systems + "_errors_exceeded": False, # Flag whether the number of errors have exceeded the limit + "_failed_count": 0, # number of failed systems "_failed_prob": 0, # Summed probability of failed systems "failed_systems_threshold": 20, # Maximum failed systems per process allowed to fail before the process stops logging the failing systems. "_failed_systems_error_codes": [], # List to store the unique error codes -- GitLab