From 6ccd7c03eae559a8b753c8e86c7623076c5defdb Mon Sep 17 00:00:00 2001
From: Robert Izzard <r.izzard@surrey.ac.uk>
Date: Tue, 9 Nov 2021 14:11:49 +0000
Subject: [PATCH] add CPU time to metadata output, and fix an issue with a
 tuple rather than a string from platform.something()

---
 binarycpython/utils/grid.py                  | 65 ++++++++++++++++----
 binarycpython/utils/grid_options_defaults.py |  1 +
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py
index f7e870b5a..e49e3a50d 100644
--- a/binarycpython/utils/grid.py
+++ b/binarycpython/utils/grid.py
@@ -776,6 +776,23 @@ class Population:
 
         return all_info
 
+    def time_elapsed(self):
+        """
+        Function to return how long a population object has been running.
+        """
+        for x in ["_start_time_evolution","_end_time_evolution"]:
+            if not self.grid_options[x]:
+                self.grid_options[x] = time.time()
+        return self.grid_options["_end_time_evolution"] - self.grid_options["_start_time_evolution"]
+
+    def CPU_time(self):
+        """
+        Function to return how much CPU time we've used
+        """
+        dt = self.time_elapsed()
+        ncpus = self.grid_options['num_processes']
+        return dt * ncpus
+
     def export_all_info(
         self,
         use_datadir: bool = True,
@@ -1182,7 +1199,7 @@ class Population:
         if self.grid_options['do_analytics']:
             # Put all interesting stuff in a variable and output that afterwards, as analytics of the run.
             analytics_dict = {
-                "population_name": self.grid_options["_population_id"],
+                "population_id": self.grid_options["_population_id"],
                 "evolution_type": self.grid_options["evolution_type"],
                 "failed_count": self.grid_options["_failed_count"],
                 "failed_prob": self.grid_options["_failed_prob"],
@@ -1195,6 +1212,7 @@ class Population:
                 "total_count": self.grid_options["_count"],
                 "start_timestamp": self.grid_options["_start_time_evolution"],
                 "end_timestamp": self.grid_options["_end_time_evolution"],
+                "time_elapsed" : self.time_elapsed(),
                 "total_mass_run": self.grid_options["_total_mass_run"],
                 "total_probability_weighted_mass_run": self.grid_options[
                     "_total_probability_weighted_mass_run"
@@ -1296,10 +1314,8 @@ class Population:
         self.grid_options["_end_time_evolution"] = time.time()
 
         # Log and print some information
-        dtsecs = (
-            self.grid_options["_end_time_evolution"]
-            - self.grid_options["_start_time_evolution"]
-        )
+        dtsecs = self.time_elapsed()
+
         string1 = "Population-{} finished!\nThe total probability is {:g}.".format(
             self.grid_options["_population_id"], self.grid_options["_probtot"]
         )
@@ -3113,10 +3129,6 @@ class Population:
         else:
             print_freq = 10
 
-        # Calculate amount of time left
-        # calculate amount of time passed
-        # time_passed = time.time() - self.grid_options["_start_time_evolution"]
-
         if run_number % print_freq == 0:
             binary_cmdline_string = self._return_argline(full_system_dict)
             info_string = "{color_part_1} \
@@ -5183,8 +5195,27 @@ eccentricity3=0
 
             if not self.grid_options['slurm_postpone_sbatch']:
                 # call sbatch to launch the jobs
-                cmd = "sbatch " + scriptpath
-                os.system(cmd)
+                cmd = [self.grid_options['slurm_sbatch'], scriptpath]
+                pipes = subprocess.Popen(cmd,
+                                         stdout = subprocess.PIPE,
+                                         stderr = subprocess.PIPE)
+                std_out, std_err = pipes.communicate()
+                if pipes.returncode != 0:
+                    # an error happened!
+                    err_msg = "{red}{err}\nReturn Code: {code}{reset}".format(err=std_err.strip(),
+                                                                              code=pipes.returncode,
+                                                                              red=self.ANSI_colours["red"],
+                                                                              reset=self.ANSI_colours["reset"],)
+                    raise Exception(err_msg)
+
+                elif len(std_err):
+                    print("{red}{err}{reset}".format(red=self.ANSI_colours["red"],
+                                                     reset=self.ANSI_colours["reset"],
+                                                     err=std_err.strip().decode('utf-8')))
+
+                print("{yellow}{out}{reset}".format(yellow=self.ANSI_colours["yellow"],
+                                                    reset=self.ANSI_colours["reset"],
+                                                    out=std_out.strip().decode('utf-8')))
             else:
                 # just say we would have (use this for testing)
                 print("Slurm script is at {path} but has not been launched".format(path=scriptpath))
@@ -5304,7 +5335,7 @@ eccentricity3=0
 
         try:
             # special cases:
-            # copy the Xinit and settings: these should just be overridden
+            # copy the settings and Xinit: these should just be overridden
             try:
                 settings = copy.deepcopy(newpop.grid_ensemble_results['metadata']['settings'])
             except:
@@ -5439,7 +5470,7 @@ eccentricity3=0
             self.grid_ensemble_results['metadata']['platform_release'] = platform.release()
             self.grid_ensemble_results['metadata']['platform_version'] = platform.version()
             self.grid_ensemble_results['metadata']['platform_processor'] = platform.processor()
-            self.grid_ensemble_results['metadata']['platform_python_build'] = platform.python_build()
+            self.grid_ensemble_results['metadata']['platform_python_build'] = ' '.join(platform.python_build())
             self.grid_ensemble_results['metadata']['platform_python_version'] = platform.python_version()
         except Exception as e:
             print("platform call failed:",e)
@@ -5451,4 +5482,12 @@ eccentricity3=0
             print("platform call failed:",e)
             pass
 
+        try:
+            self.grid_ensemble_results['metadata']['duration'] = self.time_elapsed()
+            self.grid_ensemble_results['metadata']['CPU_time'] = self.CPU_time()
+        except Exception as e:
+            print("Failure to calculate time elapsed and/or CPU time consumed")
+            pass
+
+
         return
diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py
index 12d2cc9a4..83d207062 100644
--- a/binarycpython/utils/grid_options_defaults.py
+++ b/binarycpython/utils/grid_options_defaults.py
@@ -192,6 +192,7 @@ grid_options_defaults_dict = {
     "slurm_postpone_sbatch": 0,  # if 1: don't submit, just make the script
     "slurm_array": None,  # override for --array, useful for rerunning jobs
     "slurm_extra_settings": {},  # Dictionary of extra settings for Slurm to put in its launch script.
+    "slurm_sbatch": "sbatch", # sbatch command
     ########################################
     # Condor stuff
     ########################################
-- 
GitLab