From 6ccd7c03eae559a8b753c8e86c7623076c5defdb Mon Sep 17 00:00:00 2001 From: Robert Izzard <r.izzard@surrey.ac.uk> Date: Tue, 9 Nov 2021 14:11:49 +0000 Subject: [PATCH] add CPU time to metadata output, and fix an issue with a tuple rather than a string from platform.something() --- binarycpython/utils/grid.py | 65 ++++++++++++++++---- binarycpython/utils/grid_options_defaults.py | 1 + 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index f7e870b5a..e49e3a50d 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -776,6 +776,23 @@ class Population: return all_info + def time_elapsed(self): + """ + Function to return how long a population object has been running. + """ + for x in ["_start_time_evolution","_end_time_evolution"]: + if not self.grid_options[x]: + self.grid_options[x] = time.time() + return self.grid_options["_end_time_evolution"] - self.grid_options["_start_time_evolution"] + + def CPU_time(self): + """ + Function to return how much CPU time we've used + """ + dt = self.time_elapsed() + ncpus = self.grid_options['num_processes'] + return dt * ncpus + def export_all_info( self, use_datadir: bool = True, @@ -1182,7 +1199,7 @@ class Population: if self.grid_options['do_analytics']: # Put all interesting stuff in a variable and output that afterwards, as analytics of the run. analytics_dict = { - "population_name": self.grid_options["_population_id"], + "population_id": self.grid_options["_population_id"], "evolution_type": self.grid_options["evolution_type"], "failed_count": self.grid_options["_failed_count"], "failed_prob": self.grid_options["_failed_prob"], @@ -1195,6 +1212,7 @@ class Population: "total_count": self.grid_options["_count"], "start_timestamp": self.grid_options["_start_time_evolution"], "end_timestamp": self.grid_options["_end_time_evolution"], + "time_elapsed" : self.time_elapsed(), "total_mass_run": self.grid_options["_total_mass_run"], "total_probability_weighted_mass_run": self.grid_options[ "_total_probability_weighted_mass_run" @@ -1296,10 +1314,8 @@ class Population: self.grid_options["_end_time_evolution"] = time.time() # Log and print some information - dtsecs = ( - self.grid_options["_end_time_evolution"] - - self.grid_options["_start_time_evolution"] - ) + dtsecs = self.time_elapsed() + string1 = "Population-{} finished!\nThe total probability is {:g}.".format( self.grid_options["_population_id"], self.grid_options["_probtot"] ) @@ -3113,10 +3129,6 @@ class Population: else: print_freq = 10 - # Calculate amount of time left - # calculate amount of time passed - # time_passed = time.time() - self.grid_options["_start_time_evolution"] - if run_number % print_freq == 0: binary_cmdline_string = self._return_argline(full_system_dict) info_string = "{color_part_1} \ @@ -5183,8 +5195,27 @@ eccentricity3=0 if not self.grid_options['slurm_postpone_sbatch']: # call sbatch to launch the jobs - cmd = "sbatch " + scriptpath - os.system(cmd) + cmd = [self.grid_options['slurm_sbatch'], scriptpath] + pipes = subprocess.Popen(cmd, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + std_out, std_err = pipes.communicate() + if pipes.returncode != 0: + # an error happened! + err_msg = "{red}{err}\nReturn Code: {code}{reset}".format(err=std_err.strip(), + code=pipes.returncode, + red=self.ANSI_colours["red"], + reset=self.ANSI_colours["reset"],) + raise Exception(err_msg) + + elif len(std_err): + print("{red}{err}{reset}".format(red=self.ANSI_colours["red"], + reset=self.ANSI_colours["reset"], + err=std_err.strip().decode('utf-8'))) + + print("{yellow}{out}{reset}".format(yellow=self.ANSI_colours["yellow"], + reset=self.ANSI_colours["reset"], + out=std_out.strip().decode('utf-8'))) else: # just say we would have (use this for testing) print("Slurm script is at {path} but has not been launched".format(path=scriptpath)) @@ -5304,7 +5335,7 @@ eccentricity3=0 try: # special cases: - # copy the Xinit and settings: these should just be overridden + # copy the settings and Xinit: these should just be overridden try: settings = copy.deepcopy(newpop.grid_ensemble_results['metadata']['settings']) except: @@ -5439,7 +5470,7 @@ eccentricity3=0 self.grid_ensemble_results['metadata']['platform_release'] = platform.release() self.grid_ensemble_results['metadata']['platform_version'] = platform.version() self.grid_ensemble_results['metadata']['platform_processor'] = platform.processor() - self.grid_ensemble_results['metadata']['platform_python_build'] = platform.python_build() + self.grid_ensemble_results['metadata']['platform_python_build'] = ' '.join(platform.python_build()) self.grid_ensemble_results['metadata']['platform_python_version'] = platform.python_version() except Exception as e: print("platform call failed:",e) @@ -5451,4 +5482,12 @@ eccentricity3=0 print("platform call failed:",e) pass + try: + self.grid_ensemble_results['metadata']['duration'] = self.time_elapsed() + self.grid_ensemble_results['metadata']['CPU_time'] = self.CPU_time() + except Exception as e: + print("Failure to calculate time elapsed and/or CPU time consumed") + pass + + return diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py index 12d2cc9a4..83d207062 100644 --- a/binarycpython/utils/grid_options_defaults.py +++ b/binarycpython/utils/grid_options_defaults.py @@ -192,6 +192,7 @@ grid_options_defaults_dict = { "slurm_postpone_sbatch": 0, # if 1: don't submit, just make the script "slurm_array": None, # override for --array, useful for rerunning jobs "slurm_extra_settings": {}, # Dictionary of extra settings for Slurm to put in its launch script. + "slurm_sbatch": "sbatch", # sbatch command ######################################## # Condor stuff ######################################## -- GitLab