From f1436f7be9910356aaeb9da445ed5ad76bad7df9 Mon Sep 17 00:00:00 2001
From: Robert Izzard <r.izzard@surrey.ac.uk>
Date: Mon, 18 Oct 2021 13:49:36 +0100
Subject: [PATCH] add option "save_ensemble_chunks" to make sure the ensemble
 chunks are saved : I am chasing up a grid that completely froze (and gave
 segfaults) ...

---
 binarycpython/utils/grid.py                  | 67 ++++++++++++++------
 binarycpython/utils/grid_options_defaults.py |  5 +-
 2 files changed, 51 insertions(+), 21 deletions(-)

diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py
index fda9f533b..dee6f9693 100644
--- a/binarycpython/utils/grid.py
+++ b/binarycpython/utils/grid.py
@@ -1708,7 +1708,7 @@ class Population:
         if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL:
             stream_logger.debug(f"Process-{self.process_ID} is finishing.")
 
-        ####################
+        ###########################
         # Handle ensemble outut
 
         # if ensemble==1, then either directly write that data to a file, or combine everything into 1 file.
@@ -1727,6 +1727,7 @@ class Population:
                     self.persistent_data_memory_dict[self.process_ID]
                 )
             )
+
             if ensemble_raw_output is None:
                 verbose_print(
                     "Process {}: Warning! Ensemble output is empty. ".format(ID),
@@ -1734,32 +1735,47 @@ class Population:
                     1,
                 )
 
-            #
-            if self.grid_options["combine_ensemble_with_thread_joining"] is True:
-                verbose_print(
-                    "Process {}: Extracting ensemble info from raw string".format(ID),
-                    self.grid_options["verbosity"],
-                    3,
-                )
+            # save the ensemble chunk to a file
+            if self.grid_options["save_ensemble_chunks"] is True or self.grid_options["combine_ensemble_with_thread_joining"] is False:
 
-                ensemble_json["ensemble"] = extract_ensemble_json_from_string(
-                    ensemble_raw_output
-                )  # Load this into a dict so that we can combine it later
-            else:
-                # If we do not allow this, automatically we will export this to the data_dir, in
-                # some formatted way
                 output_file = os.path.join(
                     self.custom_options["data_dir"],
                     "ensemble_output_{}_{}.json".format(
                         self.grid_options["_population_id"], self.process_ID
                     ),
                 )
+                verbose_print(
+                    "Writing process {} JSON ensemble chunk output to {} ".format(
+                        ID,
+                        output_file
+                    ),
+                    self.grid_options["verbosity"],
+                    1,
+                )
                 self.write_ensemble(output_file,
                                     ensemble_raw_output)
 
-        ########
-        # Clean up and return
+            # combine ensemble chunks
+            if self.grid_options["combine_ensemble_with_thread_joining"] is True:
+                verbose_print(
+                    "Process {}: Extracting ensemble info from raw string".format(ID),
+                    self.grid_options["verbosity"],
+                    1,
+                )
+
+                ensemble_json["ensemble"] = extract_ensemble_json_from_string(
+                    ensemble_raw_output
+                )  # Load this into a dict so that we can combine it later
 
+        ##########################
+        # Clean up and return
+        verbose_print(
+            "process {} free memory and return ".format(
+                ID
+            ),
+            self.grid_options["verbosity"],
+            1,
+        )
         # free store memory:
         _binary_c_bindings.free_store_memaddr(self.grid_options["_store_memaddr"])
 
@@ -1835,7 +1851,7 @@ class Population:
             f.write(json.dumps(summary_dict, indent=4))
             f.close()
 
-        # Set status to running
+        # Set status to finished
         with open(
                 os.path.join(
                     self.grid_options["tmp_dir"],
@@ -1847,16 +1863,29 @@ class Population:
             f.write("FINISHED")
             f.close()
 
+        verbose_print(
+            "process {} queue put output_dict ".format(
+                ID
+            ),
+            self.grid_options["verbosity"],
+            1,
+        )
         result_queue.put(output_dict)
 
         if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL:
             stream_logger.debug(f"Process-{self.process_ID} is finished.")
 
-        # Clean up the interpolators if they exist
+        # Don't do this : Clean up the interpolators if they exist
 
         # TODO: make a cleanup function for the individual threads
         # TODO: make sure this is necessary. Actually its probably not, because we have a centralised queue
-
+        verbose_print(
+            "process {} return ".format(
+                ID
+            ),
+            self.grid_options["verbosity"],
+            1,
+        )
         return
 
     # Single system
diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py
index 24feaf9ed..57f06c4e2 100644
--- a/binarycpython/utils/grid_options_defaults.py
+++ b/binarycpython/utils/grid_options_defaults.py
@@ -34,6 +34,7 @@ grid_options_defaults_dict = {
     "multiplicity_fraction_function": 0,  # Which multiplicity fraction function to use. 0: None, 1: Arenou 2010, 2: Rhagavan 2010, 3: Moe and di Stefano 2017
     "tmp_dir": temp_dir(),  # Setting the temp dir of the program
     "_main_pid": -1,  # Placeholder for the main process id of the run.
+    "save_ensemble_chunks" : True, # Force the ensemble chunk to be saved even if we are joining a thread (just in case the joining fails)
     "combine_ensemble_with_thread_joining": True,  # Flag on whether to combine everything and return it to the user or if false: write it to data_dir/ensemble_output_{population_id}_{thread_id}.json
     "compress_ensemble":False, # compress the ensemble output?
     "_commandline_input": "",
@@ -111,8 +112,8 @@ grid_options_defaults_dict = {
     "_start_time_evolution": 0,  # Start time of the grid
     "_end_time_evolution": 0,  # end time of the grid
     "_errors_found": False,  # Flag whether there are any errors from binary_c
-    "_errors_exceeded": False,  # Flag whether the amt of errors have exceeded the limit
-    "_failed_count": 0,  # amt of failed systems
+    "_errors_exceeded": False,  # Flag whether the number of errors have exceeded the limit
+    "_failed_count": 0,  # number of failed systems
     "_failed_prob": 0,  # Summed probability of failed systems
     "failed_systems_threshold": 20,  # Maximum failed systems per process allowed to fail before the process stops logging the failing systems.
     "_failed_systems_error_codes": [],  # List to store the unique error codes
-- 
GitLab