diff --git a/binarycpython/utils/dicts.py b/binarycpython/utils/dicts.py index 68f7c9d33848eea33d25d89ef82cfa264b6a621c..a873117ea2b7a064967b7e7befaf3d636ba51591 100644 --- a/binarycpython/utils/dicts.py +++ b/binarycpython/utils/dicts.py @@ -71,23 +71,31 @@ def recursive_change_key_to_float(input_dict: dict) -> dict: Returns: new_dict: dict of which the keys have been turned to float types where possible + + If input_dict is None or empty, returns an empty dict """ new_dict = collections.OrderedDict() - for key in input_dict: - if isinstance(input_dict[key], (dict, collections.OrderedDict)): - try: - num_key = float(key) - new_dict[num_key] = recursive_change_key_to_float(input_dict[key]) - except ValueError: - new_dict[key] = recursive_change_key_to_float(input_dict[key]) - else: - try: - num_key = float(key) - new_dict[num_key] = input_dict[key] - except ValueError: - new_dict[key] = input_dict[key] + # if the input dict is None or empty, return an empty dict + if input_dict is None or not input_dict: + pass + + else: + # dict has keys, loop over them + for key in input_dict: + if isinstance(input_dict[key], (dict, collections.OrderedDict)): + try: + num_key = float(key) + new_dict[num_key] = recursive_change_key_to_float(input_dict[key]) + except ValueError: + new_dict[key] = recursive_change_key_to_float(input_dict[key]) + else: + try: + num_key = float(key) + new_dict[num_key] = input_dict[key] + except ValueError: + new_dict[key] = input_dict[key] return new_dict diff --git a/binarycpython/utils/functions.py b/binarycpython/utils/functions.py index 363180ef603779ebfc5cd5c06b336b815a502ac1..2b3013ad9795af8e446fa33b642cf7cdb43e7c8d 100644 --- a/binarycpython/utils/functions.py +++ b/binarycpython/utils/functions.py @@ -143,7 +143,6 @@ def get_ANSI_colours(): for d, background_colour in background_colours.items(): colours[c + " on " + d] = foreground_colour + background_colour colours["reset"] = Style.RESET_ALL - return colours @@ -1210,3 +1209,15 @@ def load_logfile(logfile: str) -> None: # pragma: no cover event_list.append(" ".join(split_line[9:])) print(event_list) + +def quotewrap(list): + """ + Given a list, wrap each item in double quotes and return the new list + """ + return ['"' + _x + '"' for _x in list] + +def command_string_from_list(list): + """ + Given a list, turn it into a quoted command string + """ + return ' '.join(quotewrap(list)) diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index c21dc44861fea15032ce49f182ad32207e438068..f685d4b8558a77efdc9155b7115d7e07d4f4530c 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -225,8 +225,8 @@ class Population( # non-existant subdicts. self.grid_results = AutoVivificationDict() - # Create location where ensemble results are written to - self.grid_ensemble_results = {} + # Create grid ensemble data location + self.grid_ensemble_results = self._new_grid_ensemble_results() # add metadata self.add_system_metadata() @@ -237,13 +237,17 @@ class Population( def jobID(self): """ - Function to return the job ID number of this process + Function to return the job ID number of this process as a string. Normal processes return their process ID (PID) HPC processes return whatever HPC_jobID() gives. """ if self.HPC_job(): jobID = self.HPC_jobID() + if not jobID: + # fallback: use process ID but with "HPC" prepended + # (this should never happen!) + jobID = "HPC{}".format(self.process_ID) else: jobID = "{}".format(self.process_ID) return jobID @@ -401,14 +405,19 @@ class Population( old_value = self.grid_options[parameter] old_value_found = True - elif parameter in self.defaults: - old_value = self.defaults[parameter] - old_value_found = True - elif parameter in self.custom_options: old_value = self.custom_options[parameter] old_value_found = True + elif parameter in self.bse_options: + old_value = self.bse_options[parameter] + old_value_found = True + + elif parameter in self.defaults: + # this will revert to a string type, always + old_value = self.defaults[parameter] + old_value_found = True + # (attempt to) convert type if old_value_found: if old_value is not None: @@ -728,7 +737,7 @@ class Population( # empty results self.grid_results = AutoVivificationDict() - self.grid_ensemble_results = {} + self.grid_ensemble_results = self._new_grid_ensemble_results() # set number of processes/cores we want to use self._set_nprocesses() @@ -811,7 +820,6 @@ class Population( # Make sure the subdirs of the tmp dir exist subdirs = [ "failed_systems", - "current_system", "process_summary", "runtime_systems", "snapshots", @@ -827,6 +835,20 @@ class Population( ) self.exit(code=1) + # make sure the arg logging directory exists if we need it + if self.grid_options["log_args"]: + path = os.path.join( + self.grid_options["log_args_dir"] + ) + os.makedirs(path, exist_ok=True) + if self.dir_ok(path) is False: + print( + "Failed to make directory at {log_args_dir} for output of system arguments. Please check that this directory is correct and you have write access.".format( + subdir=subdir, path=path + ) + ) + self.exit(code=1) + # restore from existing HPC files self.HPC_restore() @@ -1609,17 +1631,17 @@ class Population( # that was on, we log each current system to a file (each thread has one). # Each new system overrides the previous if self.grid_options["log_args"]: + argfile = os.path.join( + self.grid_options["log_args_dir"], + "process_{}.txt".format(self.jobID()), + ) with self.open( - os.path.join( - self.grid_options["log_args_dir"], - "current_system", - "process_{}.txt".format(self.process_ID), - ), - "w", - encoding="utf-8", + argfile, + "w", + encoding="utf-8", ) as f: - binary_cmdline_string = self._return_argline(full_system_dict) - f.write(binary_cmdline_string) + binary_c_cmdline_string = self._return_argline(full_system_dict) + f.write(binary_c_cmdline_string) f.close() ############## @@ -1986,6 +2008,7 @@ class Population( self.grid_options["verbosity"], 0, ) + print("BSE",self.bse_options) raise ValueError if not any( @@ -2387,3 +2410,13 @@ class Population( self.grid_options["verbosity"], 3, ) + + def _new_grid_ensemble_results(self): + """ + Function to return a new grid_ensemble_results dict: this should + be pre-filled by sub-dicts to prevent later errors. + """ + return { + 'metadata' : {}, + 'ensemble' : {} + } diff --git a/binarycpython/utils/population_extensions/HPC.py b/binarycpython/utils/population_extensions/HPC.py index 18934e137969c4a4f6249a3e01058e0b82f444f3..5a5555a75a3c3ab0522b3c594385b803871fa500 100644 --- a/binarycpython/utils/population_extensions/HPC.py +++ b/binarycpython/utils/population_extensions/HPC.py @@ -91,7 +91,7 @@ class HPC(condor, slurm): # make the output before checking anything, we do # this to remove any asynchronicity lines = [] - for i in range(0, n): + for i in self.HPC_job_id_range(): lines += [ os.path.join( prefix, "{hpc_jobid}.{i}.gz\n".format(hpc_jobid=hpc_jobid, i=i) @@ -304,7 +304,7 @@ class HPC(condor, slurm): def HPC_jobID(self): """ - Function to return an HPC (Slurm or Condor) job id in the form x.y. Returns None if not an HPC job. + Function to return an HPC (Slurm or Condor) job id in the form of a string, x.y. Returns None if not an HPC job. """ if self.grid_options["slurm"] > 0: @@ -558,7 +558,7 @@ class HPC(condor, slurm): joinfiles = self.HPC_load_joinfiles_list() joiningfile = self.HPC_path("joining") print( - "Joinfile list n={n} (should be {m}".format( + "Joinfile list n={n} (should be {m})".format( n=len(joinfiles), m=self.HPC_njobs() ) ) @@ -680,8 +680,9 @@ class HPC(condor, slurm): d["status"][x] = 0 d["joblist"][x] = [] - for i in range(0, n): + for i in self.HPC_job_id_range(): s = self.HPC_get_status(job_id=_id, job_index=i) + #print("HPC get job",_id,':',i," status=",s) if s is None: s = "unknown" if not s in d["status"]: @@ -713,8 +714,6 @@ class HPC(condor, slurm): def HPC_queue_stats(self): # pragma: no cover """ Function that returns the queue stats for the HPC grid - - TODO: the slurm_queue_stats doesntt actually return anything """ if self.grid_options["slurm"] > 0: @@ -725,3 +724,13 @@ class HPC(condor, slurm): x = None return x + + def HPC_job_id_range(self): + n = self.HPC_njobs() + if self.grid_options["slurm"] > 0: + return range(1, n+1) + elif self.grid_options["condor"] > 0: + return range(0, n) + else: + print("Called HPC_job_id_range when not running an HPC grid : you cannot do this.") + raise diff --git a/binarycpython/utils/population_extensions/analytics.py b/binarycpython/utils/population_extensions/analytics.py index b6fba1e3df4061841d4d6c150a60da1060005c08..57bbb25d6b469d471c39a0c582a6a9684753944f 100644 --- a/binarycpython/utils/population_extensions/analytics.py +++ b/binarycpython/utils/population_extensions/analytics.py @@ -61,6 +61,7 @@ class analytics: if "metadata" in self.grid_ensemble_results: # Add analytics dict to the metadata too: self.grid_ensemble_results["metadata"].update(analytics_dict) + print("Added analytics to metadata") self.add_system_metadata() else: # use existing analytics dict diff --git a/binarycpython/utils/population_extensions/condor.py b/binarycpython/utils/population_extensions/condor.py index 3ea970fb551e5b9f587b9c5f05163ab5dd36835c..04a6f8b75938fdca36b064af6b22081cd072e86d 100644 --- a/binarycpython/utils/population_extensions/condor.py +++ b/binarycpython/utils/population_extensions/condor.py @@ -17,6 +17,10 @@ import pathlib import datasize import lib_programname +from binarycpython.utils.functions import ( + command_string_from_list, + now, +) class condor: @@ -33,7 +37,7 @@ class condor: def condorID(self, ClusterID=None, Process=None): """ - Function to return a Condor job ID. The ClusterID and Process passed in are used if given, otherwise we default to the condor_ClusterID and condor_Process in grid_options. + Function to return a Condor job ID as a string, [ClusterID].[Process]. The ClusterID and Process passed in are used if given, otherwise we default to the condor_ClusterID and condor_Process in grid_options. """ if ClusterID is None: ClusterID = self.grid_options["condor_ClusterID"] @@ -308,7 +312,7 @@ class condor: ] ) - grid_command = " ".join(grid_command) + grid_command = command_string_from_list(grid_command) # make condor script paths submit_script_path = self.condorpath("condor_submit_script") @@ -340,8 +344,8 @@ class condor: echo "Condor Job Args: $@" # first two arguments are ClusterID and Process -export ClusterID=$1 -export Process=$2 +export ClusterID="$1" +export Process="$2" shift 2 echo "Job ClusterID $ClusterID Process $Process" @@ -352,13 +356,13 @@ export BINARY_C_PYTHON_ORIGINAL_WD=`{pwd}` export BINARY_C_PYTHON_ORIGINAL_SUBMISSION_TIME=`{date}` # set status to \"running\" -echo \"running\" > {condor_dir}/status/$ClusterID.$ProcessID +echo \"running\" > "{condor_dir}/status/$ClusterID.$ProcessID" # make list of files which is checked for joining -# echo {condor_dir}/results/$ClusterID.$Process.gz >> {condor_dir}/results/$ClusterID.all +# echo "{condor_dir}/results/$ClusterID.$Process.gz" >> "{condor_dir}/results/$ClusterID.all" # run grid of stars and, if this returns 0, set status to finished -{grid_command} condor=2 evolution_type=grid condor_ClusterID=$ClusterID condor_Process=$Process save_population_object={condor_dir}/results/$ClusterID.$Process.gz && echo -n \"finished\" > {condor_dir}/status/$ClusterID.$ProcessID && echo """.format( +{grid_command} "condor=2" "evolution_type=grid" "condor_ClusterID=$ClusterID" "condor_Process=$Process" "save_population_object={condor_dir}/results/$ClusterID.$Process.gz" && echo -n \"finished\" > "{condor_dir}/status/$ClusterID.$ProcessID" && echo """.format( bash=self.grid_options["condor_bash"], date=self.grid_options["condor_date"], pwd=self.grid_options["condor_pwd"], @@ -371,7 +375,7 @@ echo \"running\" > {condor_dir}/status/$ClusterID.$ProcessID joinfile = "{condor_dir}/results/{ClusterID}.all".format( condor_dir=self.grid_options["condor_dir"], ClusterID=ClusterID ) - condor_job_script += """&& echo \"Checking if we can join...\" && echo && {grid_command} condor=3 evolution_type=join joinlist={joinfile} condor_ClusterID=$ClusterID condor_Process=$Process + condor_job_script += """&& echo \"Checking if we can join...\" && echo && {grid_command} "condor=3" "evolution_type=join" "joinlist={joinfile}" "condor_ClusterID=$ClusterID" "condor_Process=$Process" """.format( bash=self.grid_options["condor_bash"], grid_command=grid_command, diff --git a/binarycpython/utils/population_extensions/dataIO.py b/binarycpython/utils/population_extensions/dataIO.py index c7e4e94db9fccfb1bba21399278166db30eaacca..40d485d20004a2f746f08f43e18bce92076ca7fd 100644 --- a/binarycpython/utils/population_extensions/dataIO.py +++ b/binarycpython/utils/population_extensions/dataIO.py @@ -159,7 +159,6 @@ class dataIO: except Exception as e: obj = None print("Loading of the compressed object went wrong: {}".format(e)) - return obj def merge_populations(self, refpop, newpop): @@ -176,10 +175,11 @@ class dataIO: Note: The file should be saved using save_population_object() """ - + # combine data - refpop.grid_results = merge_dicts(refpop.grid_results, newpop.grid_results) - + refpop.grid_results = merge_dicts(refpop.grid_results, + newpop.grid_results) + # special cases maxmem = 0 if "max_memory_use" in refpop.grid_ensemble_results.get( @@ -210,7 +210,8 @@ class dataIO: # merge the ensemble dicts refpop.grid_ensemble_results = merge_dicts( - refpop.grid_ensemble_results, newpop.grid_ensemble_results + refpop.grid_ensemble_results, + newpop.grid_ensemble_results ) # set special cases @@ -261,8 +262,9 @@ class dataIO: n = newpop.grid_ensemble_results["metadata"]["_count"] else: n = -1 + print("Loaded data from {n} stars".format(n=n)) - + # merge with refpop self.merge_populations(refpop, newpop) diff --git a/binarycpython/utils/population_extensions/grid_options_defaults.py b/binarycpython/utils/population_extensions/grid_options_defaults.py index 388eaca63c336fa94f6b61777ab4b25717b57d1f..776a4a868aa30a5e19bb9cdb12d9d5430846d61c 100644 --- a/binarycpython/utils/population_extensions/grid_options_defaults.py +++ b/binarycpython/utils/population_extensions/grid_options_defaults.py @@ -22,8 +22,8 @@ import shutil import sys from binarycpython.utils.custom_logging_functions import temp_dir - from binarycpython.utils.functions import ( + command_string_from_list, now, ) @@ -83,7 +83,7 @@ class grid_options_defaults: ##################### # System information ##################### - "command_line": " ".join(sys.argv), + "command_line": command_string_from_list(sys.argv), "original_command_line": os.getenv("BINARY_C_PYTHON_ORIGINAL_CMD_LINE"), "working_diretory": os.getcwd(), "original_working_diretory": os.getenv("BINARY_C_PYTHON_ORIGINAL_WD"), @@ -168,6 +168,7 @@ class grid_options_defaults: "_total_probability_weighted_mass_run": 0, # To count the total mass * probability for each system that thread/process has ran "modulo": 1, # run modulo n of the grid. "start_at": 0, # start at the first model + "skip_before" : 0, # skip models before this ## Grid type evolution "_grid_variables": {}, # grid variables "gridcode_filename": None, # filename of gridcode @@ -246,7 +247,7 @@ class grid_options_defaults: "slurm_jobarrayindex": None, # slurm job array index (%a) "slurm_jobname": "binary_c-python", # default "slurm_partition": None, - "slurm_time": 0, # total time. 0 = infinite time + "slurm_time": '0', # total time. 0 = infinite time "slurm_postpone_sbatch": 0, # if 1: don't submit, just make the script "slurm_array": None, # override for --array, useful for rerunning jobs "slurm_array_max_jobs": None, # override for the max number of concurrent array jobs diff --git a/binarycpython/utils/population_extensions/slurm.py b/binarycpython/utils/population_extensions/slurm.py index b19ed1c07a71a1eae8fae960c3976d364ca9591b..1baf701d0337f5b864b29a8078fe4331c23b06e3 100644 --- a/binarycpython/utils/population_extensions/slurm.py +++ b/binarycpython/utils/population_extensions/slurm.py @@ -15,6 +15,10 @@ import subprocess import datasize import lib_programname +from binarycpython.utils.functions import ( + command_string_from_list, + now, +) class slurm: @@ -31,7 +35,7 @@ class slurm: def slurmID(self, jobid=None, jobarrayindex=None): """ - Function to return a Slurm job ID. The jobid and jobarrayindex passed in are used if given, otherwise we default to the jobid and jobarrayindex in grid_options. + Function to return a Slurm job ID as a string, [jobid].[jobarrayindex]. The jobid and jobarrayindex passed in are used if given, otherwise we default to the jobid and jobarrayindex in grid_options. """ if jobid is None: jobid = self.grid_options["slurm_jobid"] @@ -110,6 +114,12 @@ class slurm: f.close() self.NFS_flush_hack(status_file) + print("Have set status in",status_file,"to",string) + with self.open(status_file,"r",encoding="utf-8") as f: + print("Contents") + print(f.readlines()) + f.close() + def get_slurm_status(self, jobid=None, jobarrayindex=None, slurm_dir=None): """ Get and return the slurm status string corresponing to the self object, or jobid.jobarrayindex if they are passed in. If no status is found, returns an empty string. @@ -291,7 +301,7 @@ class slurm: ] + sys.argv[1:] + [ - "start_at=" + str(jobarrayindex) + "-1", # do we need the -1? + "start_at=$((" + str(jobarrayindex) + "-1))", "modulo=" + str(self.grid_options["slurm_njobs"]), "slurm_njobs=" + str(self.grid_options["slurm_njobs"]), "slurm_dir=" + self.grid_options["slurm_dir"], @@ -300,7 +310,8 @@ class slurm: ] ) - grid_command = " ".join(grid_command) + # wrap command arguments in quotes + grid_command = command_string_from_list(grid_command) # make slurm script scriptpath = self.slurmpath("slurm_script") @@ -350,13 +361,13 @@ export BINARY_C_PYTHON_ORIGINAL_WD=`{pwd}` export BINARY_C_PYTHON_ORIGINAL_SUBMISSION_TIME=`{date}` # set status to \"running\" -echo \"running\" > {slurm_dir}/status/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID +echo \"running\" > "{slurm_dir}/status/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID" # make list of files which is checked for joining -# echo {slurm_dir}/results/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID.gz >> {slurm_dir}/results/$SLURM_ARRAY_JOB_ID.all +# echo "{slurm_dir}/results/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID.gz" >> "{slurm_dir}/results/$SLURM_ARRAY_JOB_ID.all" # run grid of stars and, if this returns 0, set status to finished -{grid_command} slurm=2 evolution_type=grid slurm_jobid=$SLURM_ARRAY_JOB_ID slurm_jobarrayindex=$SLURM_ARRAY_TASK_ID save_population_object={slurm_dir}/results/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID.gz && echo -n \"finished\" > {slurm_dir}/status/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID && echo """.format( +{grid_command} "slurm=2" "evolution_type=grid" "slurm_jobid=$SLURM_ARRAY_JOB_ID" "slurm_jobarrayindex=$SLURM_ARRAY_TASK_ID" "save_population_object={slurm_dir}/results/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID.gz" && echo -n \"finished\" > "{slurm_dir}/status/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID" && echo """.format( slurm_dir=self.grid_options["slurm_dir"], grid_command=grid_command, cmdline=repr(self.grid_options["command_line"]), @@ -365,7 +376,7 @@ echo \"running\" > {slurm_dir}/status/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID ) if not self.grid_options["slurm_postpone_join"]: - slurmscript += """&& echo \"Checking if we can join...\" && echo && {grid_command} slurm=3 evolution_type=join joinlist={slurm_dir}/results/$SLURM_ARRAY_JOB_ID.all slurm_jobid=$SLURM_ARRAY_JOB_ID slurm_jobarrayindex=$SLURM_ARRAY_TASK_ID + slurmscript += """&& echo \"Checking if we can join...\" && echo && {grid_command} "slurm=3" "evolution_type=join" "joinlist={slurm_dir}/results/$SLURM_ARRAY_JOB_ID.all" "slurm_jobid=$SLURM_ARRAY_JOB_ID" "slurm_jobarrayindex=$SLURM_ARRAY_TASK_ID" """.format( slurm_dir=self.grid_options["slurm_dir"], grid_command=grid_command, @@ -447,7 +458,7 @@ echo \"running\" > {slurm_dir}/status/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID """ Function to XXX - TODO: is this function finished? + TODO """ return None diff --git a/binarycpython/utils/population_extensions/spacing_functions.py b/binarycpython/utils/population_extensions/spacing_functions.py index 242fe7c0ecfd7e1542d51486b95818237843a65d..36edd02e41f009195db45af17274ab93cfbde615 100644 --- a/binarycpython/utils/population_extensions/spacing_functions.py +++ b/binarycpython/utils/population_extensions/spacing_functions.py @@ -20,7 +20,7 @@ import numpy as np import cachetools import diskcache -# from binarycpython.utils.grid import Population + import py_rinterpolate @@ -345,13 +345,14 @@ class spacing_functions: return func(*args, **kwargs) return wrapped - - if cache: + + if cache is not None: __decorator = cache.memoize else: __decorator = __dummy_decorator - @__decorator + #@cache.memoize() + @__decorator() # note: () works with python3.9+, maybe not for 3.8 def _const_dt( cachedir=None, num_cores=None, @@ -392,6 +393,7 @@ class spacing_functions: if max_evolution_time: bse_options["max_evolution_time"] = max_evolution_time + from binarycpython.utils.grid import Population lifetime_population = Population() lifetime_population.bse_options = bse_options diff --git a/install.sh b/install.sh index f204a7922dcdba3ad27786cf0edd7ce43b1ac4a6..b9171547d5d648dbac19ef826961578c60e85de5 100755 --- a/install.sh +++ b/install.sh @@ -5,14 +5,14 @@ VERSION_NUMBER=$(cat "VERSION") echo "installing binarcpython version $VERSION_NUMBER" # Clean up all the stuff from before -python setup.py clean --all +python3 setup.py clean --all # Go into a directory that doesnt contain 'binarycpython' so pip will uninstall the one in the venv, not the local one. cd src -pip uninstall -y binarycpython +pip3 uninstall -y binarycpython cd ../ # Create build, sdist and install it into the venv -python setup.py build --force -python setup.py sdist -pip install -v dist/binarycpython-$VERSION_NUMBER.tar.gz +python3 setup.py build --force +python3 setup.py sdist +pip3 install -v dist/binarycpython-$VERSION_NUMBER.tar.gz