From 2b9f6f3ab179b6800982b8a4b1d1cc41af3deac5 Mon Sep 17 00:00:00 2001 From: Robert Izzard <r.izzard@surrey.ac.uk> Date: Tue, 9 Nov 2021 11:12:50 +0000 Subject: [PATCH] fix metadata addition --- binarycpython/utils/grid.py | 78 ++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index 346e10b1e..95bfa0665 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -36,6 +36,7 @@ import msgpack import multiprocessing import os import pathlib +import platform import pprint # for debugging only import psutil import py_rinterpolate @@ -57,7 +58,6 @@ from collections.abc import Iterable # drop `.abc` with Python 2.7 or lower from colorama import init as colorama_init colorama_init() from diskcache import Cache -from platform import platform from typing import Union, Any from binarycpython.utils.grid_options_defaults import ( @@ -221,6 +221,9 @@ class Population: # Create location where ensemble results are written to self.grid_ensemble_results = {} + # add metadata + self.add_system_metadata() + ################################################### # Argument functions ################################################### @@ -1076,7 +1079,16 @@ class Population: # set default directory locations - # default status_dir to be tmp_dir if it doesn't exist + # check slurm_dir is set should this be required + if self.grid_options['slurm'] > 0 and \ + self.grid_options['slurm_dir'] is None: + print("You have set slurm=",self.grid_options['slurm'],"but not set slurm_dir. Please set it and try again.") + sys.exit(1) + + # default status_dir to be tmp_dir/status + # NOTE: binary_c-python uses its own status_dir, which is not + # the same dir as Slurm uses (so tmp_dir can be local + # to a Slurm job, while slurm_dir is common to all jobs) if self.grid_options['status_dir'] is None: self.grid_options['status_dir'] = os.path.join(self.grid_options['tmp_dir'], 'status') @@ -1086,12 +1098,6 @@ class Population: self.grid_options['cache_dir'] = os.path.join(self.grid_options['tmp_dir'], 'cache') - # check slurm_dir is set should this be required - if self.grid_options['slurm'] > 0 and \ - self.grid_options['slurm_dir'] is None: - print("You have set slurm=",self.grid_options['slurm'],"but not set slurm_dir. Please set it and try again.") - sys.exit(1) - # make list of directories we want to use dirs = ['tmp_dir','status_dir','cache_dir'] @@ -1199,6 +1205,7 @@ class Population: if 'metadata' in self.grid_ensemble_results: # Add analytics dict to the metadata too: self.grid_ensemble_results["metadata"].update(analytics_dict) + self.add_system_metadata() else: # use existing analytics dict try: @@ -1938,8 +1945,6 @@ class Population: ensemble_output = extract_ensemble_json_from_string(ensemble_raw_output) - ensemble_output = extract_ensemble_json_from_string(ensemble_raw_output) - self.write_ensemble(output_file, ensemble_output) # combine ensemble chunks @@ -3495,7 +3500,6 @@ class Population: indent : number of space characters used in the JSON indent. (Default: 4, passed to json.dumps) """ - # TODO: consider writing this in a formatted structure # get the file type file_type = ensemble_file_type(output_file) @@ -5226,23 +5230,12 @@ eccentricity3=0 # add datestamp object.grid_ensemble_results['metadata']['save_population_time'] = datetime.datetime.now().strftime("%m/%d/%Y %H:%M:%S") - # add platform and build information - try: - object.grid_ensemble_results['metadata']['platform'] = platform() - object.grid_ensemble_results['metadata']['platform_uname'] = list(platform.uname()) - object.grid_ensemble_results['metadata']['platform_machine'] = platform.machine() - object.grid_ensemble_results['metadata']['platform_node'] = platform.node() - object.grid_ensemble_results['metadata']['platform_release'] = platform.release() - object.grid_ensemble_results['metadata']['platform_version'] = platform.version() - object.grid_ensemble_results['metadata']['platform_processor'] = platform.processor() - object.grid_ensemble_results['metadata']['platform_python_build'] = platform.python_build() - object.grid_ensemble_results['metadata']['platform_python_version'] = platform.python_version() - except: - pass + # add extra metadata + object.add_system_metadata() - # add max memory use of the grid + # add max memory use try: - object.grid_ensemble_results['metadata']['max_memory_use'] = copy.deepcopy(sum(shared_memory["max_memory_use_per_thread"])) + self.grid_ensemble_results['metadata']['max_memory_use'] = copy.deepcopy(sum(shared_memory["max_memory_use_per_thread"])) except Exception as e: print("save_population_object : Error: ",e) pass @@ -5419,3 +5412,36 @@ eccentricity3=0 if vb: print("returning True from can_join()") return True + + def add_system_metadata(self): + + # add metadata if it doesn't exist + if not 'metadata' in self.grid_ensemble_results: + self.grid_ensemble_results['metadata'] = {} + + # add date + self.grid_ensemble_results['metadata']['date'] = datetime.datetime.now().strftime("%m/%d/%Y %H:%M:%S") + + # add platform and build information + print("Try to write platform") + try: + self.grid_ensemble_results['metadata']['platform'] = platform.platform() + self.grid_ensemble_results['metadata']['platform_uname'] = list(platform.uname()) + self.grid_ensemble_results['metadata']['platform_machine'] = platform.machine() + self.grid_ensemble_results['metadata']['platform_node'] = platform.node() + self.grid_ensemble_results['metadata']['platform_release'] = platform.release() + self.grid_ensemble_results['metadata']['platform_version'] = platform.version() + self.grid_ensemble_results['metadata']['platform_processor'] = platform.processor() + self.grid_ensemble_results['metadata']['platform_python_build'] = platform.python_build() + self.grid_ensemble_results['metadata']['platform_python_version'] = platform.python_version() + except Exception as e: + print("platform call failed:",e) + pass + + try: + self.grid_ensemble_results['metadata']['hostname'] = platform.uname()[1] + except Exception as e: + print("platform call failed:",e) + pass + + return -- GitLab