From 99a016109dfdf39846e07ed4e3b82118fc825b75 Mon Sep 17 00:00:00 2001
From: Robert Izzard <r.izzard@surrey.ac.uk>
Date: Sat, 13 Nov 2021 12:22:37 +0000
Subject: [PATCH] fix issues with CPU_time calculation

cleaned up the evolve function so the option to join is selected before it is called, and this now has its own function
---
 binarycpython/utils/analytics.py |  33 +++++++-
 binarycpython/utils/dataIO.py    |   1 +
 binarycpython/utils/grid.py      | 125 +++++++++++++++----------------
 binarycpython/utils/metadata.py  |   1 -
 binarycpython/utils/version.py   |  26 +++----
 5 files changed, 106 insertions(+), 80 deletions(-)

diff --git a/binarycpython/utils/analytics.py b/binarycpython/utils/analytics.py
index 4e421d1a3..43c557fd9 100644
--- a/binarycpython/utils/analytics.py
+++ b/binarycpython/utils/analytics.py
@@ -50,22 +50,49 @@ class analytics():
 
         return analytics_dict
 
-    def time_elapsed(self):
+    def set_time(self,when):
+        """
+        Function to set the timestamp at when, where when is 'start' or 'end'.
+
+        If when == end, we also calculate the time elapsed.
+        """
+        self.grid_options['_' + when + '_time_evolution'] = time.time()
+        if when == 'end':
+            self.grid_options["_time_elapsed"] = self.time_elapsed(force=True)
+
+    def time_elapsed(self,force=False):
         """
         Function to return how long a population object has been running.
+
+        We return the cached value if it's available, and calculate
+        the time elapsed if otherwise or if force is True
         """
         for x in ["_start_time_evolution","_end_time_evolution"]:
             if not self.grid_options[x]:
                 self.grid_options[x] = time.time()
-        return self.grid_options["_end_time_evolution"] - self.grid_options["_start_time_evolution"]
+                print("{} missing : {}",x,self.grid_options[x])
+
+        if force or "_time_elapsed" not in self.grid_options:
+            self.grid_options["_time_elapsed"] = self.grid_options["_end_time_evolution"] - self.grid_options["_start_time_evolution"]
+            print("set time elapsed = {} - {} = {}".format(
+                self.grid_options["_end_time_evolution"],
+                self.grid_options["_start_time_evolution"],
+                self.grid_options["_time_elapsed"],
+            ))
+
+        return self.grid_options["_time_elapsed"]
 
     def CPU_time(self):
         """
         Function to return how much CPU time we've used
         """
-        dt = self.time_elapsed()
+        dt = self.grid_options["_time_elapsed"]
         try:
             ncpus = self.grid_options['num_processes']
         except:
             ncpus = 1
+        print("CPU time : dt={} n={} -> {}".format(
+            dt,
+            ncpus,
+            dt*ncpus))
         return dt * ncpus
diff --git a/binarycpython/utils/dataIO.py b/binarycpython/utils/dataIO.py
index fe6cbf065..39ca476e5 100644
--- a/binarycpython/utils/dataIO.py
+++ b/binarycpython/utils/dataIO.py
@@ -4,6 +4,7 @@
 
 import bz2
 import compress_pickle
+import copy
 import datetime
 import gzip
 import json
diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py
index 1008893d5..c383e7dca 100644
--- a/binarycpython/utils/grid.py
+++ b/binarycpython/utils/grid.py
@@ -830,7 +830,7 @@ class Population(analytics,
         # Just to make sure we don't have stuff from a previous run hanging around
         self._pre_run_setup()
 
-        if self.grid_options["slurm"]>=1:
+        if self.grid_options["slurm"]>=1 or self.grid_options["condor"]>=1:
             self.grid_options["symlink_latest_gridcode"] = False
 
         if self.grid_options["condor"] >= 1:
@@ -841,8 +841,13 @@ class Population(analytics,
         elif self.grid_options["slurm"] == 1:
             # Slurm setup grid then exit
             self.slurm_grid()
-
             self.exit(code=0)
+
+        elif self.grid_options['evolution_type'] == 'join':
+            # join previously calculated data and return immediately
+            self.join_previous()
+            return
+
         else:
             # Execute population evolution subroutines
             self._evolve_population()
@@ -889,50 +894,16 @@ class Population(analytics,
             - TODO: include options for different ways of generating a population here. (i.e. MC or source file)
         """
 
-        ##
+        ############################################################
         # Prepare code/initialise grid.
         # set custom logging, set up store_memaddr, build grid code. dry run grid code.
         self._setup()
 
-        # special cases
-        if self.grid_options['evolution_type'] == 'join':
-
-            # check that our job has finished
-            status = self.get_slurm_status()
-
-            if status != "finished":
-                # job did not finish : save a snapshot
-                print("This job did not finish (status is {status}) : cannot join".format(status=status))
-                self.exit(code=1)
-            else:
-                # our job has finished
-                joinfiles = self.joinfiles()
-                joiningfile = self.slurmpath('joining')
-                if self.can_join(joinfiles,joiningfile):
-                    # join object files
-                    try:
-                        pathlib.Path(joiningfile).touch(exist_ok=False)
-                        print("can join : all tasks are finished")
-                        try:
-                            self.join_from_files(self,joinfiles)
-                        except Exception as e:
-                            print("Join gave exception",e)
-                        # disable analytics calculations : use the
-                        # values we just loaded
-                        self.grid_options['do_analytics'] = False
-                        return
-                    except:
-                        pass
-                else:
-                    print("cannot join : other tasks are not yet finished\n")
-                    print("Finished this job : exiting")
-                    self.exit(code=1)
-
         ############################################################
         # Evolve systems
-        elif (
-            self.grid_options["evolution_type"]
-            in self.grid_options["_evolution_type_options"]
+        self.set_time("start")
+        if (
+            self.grid_options["evolution_type"] in self.grid_options["_evolution_type_options"]
         ):
             if self.grid_options["evolution_type"] in ["grid", "custom_generator"]:
                 self._evolve_population_grid()
@@ -946,32 +917,33 @@ class Population(analytics,
                     self.grid_options["_evolution_type_options"]
                 )
             )
+        self.set_time("end")
 
-        # finished!
-        self.grid_options["_end_time_evolution"] = time.time()
-
+        ############################################################
         # Log and print some information
-        dtsecs = self.time_elapsed()
-
         string1 = "Population-{} finished!\nThe total probability is {:g}.".format(
             self.grid_options["_population_id"], self.grid_options["_probtot"]
         )
-        string2 = "It took a total of {dtsecs} to run {starcount} systems on {ncores} cores\n = {totaldtsecs} of CPU time.\nMaximum memory use {memuse:.3f} MB".format(
-            dtsecs=timedelta(dtsecs),
+        string2 = "It took a total of {dtsecs} to run {starcount} systems on {ncores} cores\n = {CPUtime} of CPU time.\nMaximum memory use {memuse:.3f} MB".format(
+            dtsecs=timedelta(self.grid_options["_time_elapsed"]),
             starcount=self.grid_options["_count"], # not _total_count! we may have ended the run early...
             ncores=self.grid_options["num_processes"],
-            totaldtsecs=timedelta(dtsecs * self.grid_options["num_processes"]),
+            CPUtime=timedelta(self.CPU_time()),
             memuse=sum(self.shared_memory["max_memory_use_per_thread"]),
         )
 
-
+        ############################################################
         # add warning about a grid that was killed
+        ############################################################
         if self.was_killed():
             string2 += "\n>>> Grid was killed <<<"
             self.set_status("killed")
 
         self.verbose_print(self._boxed(string1, string2), self.grid_options["verbosity"], 0)
 
+        ############################################################
+        # handle errors
+        ############################################################
         if self.grid_options["_errors_found"]:
             # Some information afterwards
             self.verbose_print(
@@ -1008,6 +980,8 @@ class Population(analytics,
                 0,
             )
 
+        return
+
     def _system_queue_filler(self, job_queue, num_processes):
         """
         Function that is responsible for keeping the queue filled.
@@ -2013,9 +1987,6 @@ class Population(analytics,
             self.grid_options[
                 "_probtot"
             ] = 0  # To make sure that the values are reset. TODO: fix this in a cleaner way
-            self.grid_options[
-                "_start_time_evolution"
-            ] = time.time()  # Setting start time of grid
 
             # # Making sure the loaded grid code isn't lingering in the main PID
             # self._generate_grid_code(dry_run=False)
@@ -2061,9 +2032,6 @@ class Population(analytics,
             self.grid_options[
                 "_probtot"
             ] = 0  # To make sure that the values are reset. TODO: fix this in a cleaner way
-            self.grid_options[
-                "_start_time_evolution"
-            ] = time.time()  # Setting start time of grid
 
             #
             # TODO: fix this function
@@ -2074,9 +2042,6 @@ class Population(analytics,
         self.grid_options[
             "_probtot"
         ] = 0  # To make sure that the values are reset. TODO: fix this in a cleaner way
-        self.grid_options[
-            "_start_time_evolution"
-        ] = time.time()  # Setting start time of grid
 
     def _cleanup(self):
         """
@@ -2253,7 +2218,7 @@ class Population(analytics,
 
         if binary_c_output:
             if (binary_c_output.splitlines()[0].startswith("SYSTEM_ERROR")) or (
-                binary_c_output.splitlines()[-1].startswith("SYSTEM_ERROR")
+                    binary_c_output.splitlines()[-1].startswith("SYSTEM_ERROR")
             ):
                 self.verbose_print(
                     "FAILING SYSTEM FOUND",
@@ -2276,8 +2241,8 @@ class Population(analytics,
                     )
 
                     if (
-                        not error_code
-                        in self.grid_options["_failed_systems_error_codes"]
+                            not error_code
+                            in self.grid_options["_failed_systems_error_codes"]
                     ):
                         self.grid_options["_failed_systems_error_codes"].append(
                             error_code
@@ -2291,8 +2256,8 @@ class Population(analytics,
 
                 # Check if we have exceeded the number of errors
                 if (
-                    self.grid_options["_failed_count"]
-                    > self.grid_options["failed_systems_threshold"]
+                        self.grid_options["_failed_count"]
+                        > self.grid_options["failed_systems_threshold"]
                 ):
                     if not self.grid_options["_errors_exceeded"]:
                         self.verbose_print(
@@ -2328,3 +2293,37 @@ class Population(analytics,
                 self.grid_options["verbosity"],
                 3,
             )
+
+    def join_previous(self):
+        """
+        Function to join previously generated datasets.
+        """
+        # check that our job has finished
+        status = self.get_slurm_status()
+
+        if status != "finished":
+            # job did not finish : save a snapshot
+            print("This job did not finish (status is {status}) : cannot join".format(status=status))
+        else:
+            # our job has finished
+            joinfiles = self.joinfiles()
+            joiningfile = self.slurmpath('joining')
+            if self.can_join(joinfiles,joiningfile):
+                # join object files
+                try:
+                    pathlib.Path(joiningfile).touch(exist_ok=False)
+                    print("can join : all tasks are finished")
+                    try:
+                        self.join_from_files(self,joinfiles)
+                    except Exception as e:
+                        print("Join gave exception",e)
+                        # disable analytics calculations : use the
+                        # values we just loaded
+                    self.grid_options['do_analytics'] = False
+                    return
+                except:
+                    pass
+            else:
+                print("cannot join : other tasks are not yet finished\n")
+                print("Finished this job : exiting")
+        self.exit(code=1)
diff --git a/binarycpython/utils/metadata.py b/binarycpython/utils/metadata.py
index afea81737..45e6cbf7c 100644
--- a/binarycpython/utils/metadata.py
+++ b/binarycpython/utils/metadata.py
@@ -101,7 +101,6 @@ class metadata():
             self.grid_options[x] = combined_output_dict[x]
         self.grid_options["_failed_systems_error_codes"] = list(set(combined_output_dict["_failed_systems_error_codes"]))
 
-
     def _metadata_keylist(self):
         return ["_failed_count",
                 "_failed_prob",
diff --git a/binarycpython/utils/version.py b/binarycpython/utils/version.py
index 52f63979a..d7de3375e 100644
--- a/binarycpython/utils/version.py
+++ b/binarycpython/utils/version.py
@@ -120,7 +120,7 @@ class version():
         # Isotopes:
         # Split off
         isotopes = {el for el in cleaned if el.startswith("Isotope ")}
-        cleaned = cleaned - isotopes
+        cleaned -= isotopes
 
         isotope_dict = {}
         for el in isotopes:
@@ -157,7 +157,7 @@ class version():
         # Arg pairs:
         # Split off
         argpairs = set([el for el in cleaned if el.startswith("ArgPair")])
-        cleaned = cleaned - argpairs
+        cleaned -= argpairs
 
         argpair_dict = {}
         for el in sorted(argpairs):
@@ -174,7 +174,7 @@ class version():
         # ensembles:
         # Split off
         ensembles = {el for el in cleaned if el.startswith("Ensemble")}
-        cleaned = cleaned - ensembles
+        cleaned -= ensembles
 
         ensemble_dict = {}
         ensemble_filter_dict = {}
@@ -197,7 +197,7 @@ class version():
         # macros:
         # Split off
         macros = {el for el in cleaned if el.startswith("macroxyz")}
-        cleaned = cleaned - macros
+        cleaned -= macros
 
         param_type_dict = {
             "STRING": str,
@@ -256,7 +256,7 @@ class version():
         # Elements:
         # Split off:
         elements = {el for el in cleaned if el.startswith("Element")}
-        cleaned = cleaned - elements
+        cleaned -= elements
 
         # Fill dict:
         elements_dict = {}
@@ -285,7 +285,7 @@ class version():
         # dt_limits:
         # split off
         dt_limits = {el for el in cleaned if el.startswith("DTlimit")}
-        cleaned = cleaned - dt_limits
+        cleaned -= dt_limits
 
         # Fill dict
         dt_limits_dict = {}
@@ -364,32 +364,32 @@ class version():
         misc_dict["git_revision"] = (
             git_revision[0].split("git revision ")[-1].replace('"', "")
         )
-        cleaned = cleaned - set(git_revision)
+        cleaned -= set(git_revision)
 
         # filter out git url
         git_url = [el for el in cleaned if el.startswith("git URL")]
         misc_dict["git_url"] = git_url[0].split("git URL ")[-1].replace('"', "")
-        cleaned = cleaned - set(git_url)
+        cleaned -= set(git_url)
 
         # filter out version
         version = [el for el in cleaned if el.startswith("Version")]
         misc_dict["version"] = str(version[0].split("Version ")[-1])
-        cleaned = cleaned - set(version)
+        cleaned -= set(version)
 
         git_branch = [el for el in cleaned if el.startswith("git branch")]
         misc_dict["git_branch"] = git_branch[0].split("git branch ")[-1].replace('"', "")
-        cleaned = cleaned - set(git_branch)
+        cleaned -= set(git_branch)
 
         build = [el for el in cleaned if el.startswith("Build")]
         misc_dict["build"] = build[0].split("Build: ")[-1].replace('"', "")
-        cleaned = cleaned - set(build)
+        cleaned -= set(build)
 
         email = [el for el in cleaned if el.startswith("Email")]
         misc_dict["email"] = email[0].split("Email ")[-1].split(",")
-        cleaned = cleaned - set(email)
+        cleaned -= set(email)
 
         other_items = set([el for el in cleaned if " is " in el])
-        cleaned = cleaned - other_items
+        cleaned -= other_items
 
         for el in other_items:
             split = el.split(" is ")
-- 
GitLab