diff --git a/.gitignore b/.gitignore index 2d6b5c906e652e652cc7d411e24f013daa96d230..d4c3a5e5cdae73765c19baaecc84ef376722ce5d 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,5 @@ media/ db.sqlite3 *.swp + +*~ \ No newline at end of file diff --git a/CHANGELOG b/CHANGELOG index d8c9186dee06429c74c35f9d4804a4ef4bec0912..b0ba123c2a733de07c5cc4f83789649096dbac1d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -29,3 +29,9 @@ https://keepachangelog.com/en/0.3.0/ - dictionary utility functions put into dicts.py - some new functions to parse recursively dict keys and/or values that are faster than David's, but probably not as flexible. When these are all you need, they're more efficient. - added a few new Jupyter notebooks + +feature/HPC: +- added Slurm support +- Fixed JSON load(s) to not convert to ASCII hence preserve UTF-8 and hopefully be faster. +- Made sure file loads/saves use UTF-8 +- Fixed version dict to include units section and cleaned up a few other unit handlers diff --git a/README.md b/README.md index 4d0dc5b92ef07bdad8ee37fd78372c5f4c512691..7ad8f6a3c0a41cd123f13c77c91a790acff0285a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Docstring coverage: Test coverage:  -Binary population synthesis code that interfaces with binary_c. Based on a original work by Jeff Andrews (can be found in old_solution/ directory). Updated and extended for Python3 by David Hendriks, Robert Izzard. +Binary population synthesis code that interfaces with binary_c. Based on a original work by Jeff Andrews. Updated and extended for Python3 by David Hendriks, Robert Izzard. The current release is version [version](VERSION), make sure to use that version number when installing! diff --git a/binarycpython/utils/HPC.py b/binarycpython/utils/HPC.py new file mode 100644 index 0000000000000000000000000000000000000000..4cc6fccac55db9e489ba25cf99060e428585c272 --- /dev/null +++ b/binarycpython/utils/HPC.py @@ -0,0 +1,705 @@ +""" +Module containing the functions to HPC functionality + +These functions form a single API through which you can access HPC resources. + +Generally, you should call an HPC function rather than the Slurm or Condor interface +directly. The HPC function then decides which interface to use, so that all the +other modules can use a single API rather than have to choose to use the Slurm or +Condor API. + +This class object is an extension to the population grid object +""" + +# pylint: disable=E1101 + +import glob +import json +import os + +from binarycpython.utils.slurm import slurm +from binarycpython.utils.condor import condor + + +class HPC(condor, slurm): + """ + Extension to the population grid object that contains functionality to handle handle the Moe & distefano distributions + """ + + def __init__(self, **kwargs): + condor.__init__(self) + slurm.__init__(self) + + def HPC_njobs(self): + """ + Function to return the number of jobs this HPC jobs will use, as an int. + """ + if self.grid_options["slurm"] > 0: + n = self.grid_options["slurm_njobs"] + elif self.grid_options["condor"] > 0: + n = self.grid_options["condor_njobs"] + else: + n = None + return int(n) + + def HPC_make_joiningfile( + self, hpc_jobid=None, hpc_dir=None, n=None, overwrite=False, error_on_overwrite=False + ): + """ + Function to make the joiningfile file that contains the filenames of results from each job. When all these exist, we can join. + + Note: you normally don't need to set any of the option arguments. + + Args: + hpc_jobid : the job ID number, or self.HPC_jobID_tuple()[0] if None (default=None). + hpc_dir : the HPC directory, or self.HPC_dir() if None (default=None). + n : the number of jobs, or self.HPC_njobs() if None (default=None). + overwrite : if True, overwrite an existing joiningfile (default=False) + error_on_overwite : if True, and we try to overwrite, issue and error and exit (default=False) + + Returns: + True if the file is made, False otherwise. + + """ + + # defaults + if hpc_dir is None: + hpc_dir = self.HPC_dir() + if n is None: + n = self.HPC_njobs() + if hpc_jobid is None: + hpc_jobid = self.HPC_jobID_tuple()[0] + + # make path and filename + prefix = os.path.join(hpc_dir, "results") + file = os.path.join(prefix, hpc_jobid + ".all") + + # make the output before checking anything, we do + # this to remove any asynchronicity + lines = [] + for i in range(0, n): + lines += [os.path.join(prefix, "{hpc_jobid}.{i}.gz\n".format(hpc_jobid=hpc_jobid, i=i))] + string = "".join(lines) + + # check the joiningfile doesn't exist + if not overwrite and os.path.isfile(file): + # file already exists + print( + "Cannot make joiningfile at {file} because it already exists, instead I am waiting for it to be unlocked.".format( + file=file + ) + ) + self.wait_for_unlock(file) + joinfiles = self.HPC_load_joinfiles_list(joinlist=file) + print( + "Unlocked and got {} should be {}".format( + len(joinfiles), self.HPC_njobs() + ) + ) + # perhaps exit here? (e.g. for debugging) + if error_on_overwrite: + self.exit(code=1) + x = False + else: + # open the file, but locked so we have first unique access + (f, lock) = self.locked_open_for_write(file) + + # write to it if we are first to obtain unique access + if lock and f: + print("Making joiningfile list range (0,{}) at {}".format(n, file)) + f.write(string) + f.flush() + os.fsync(f.fileno()) + x = True + self.locked_close(f, lock) + os.sync() + self.NFS_flush_hack(file) + + print( + "Checking joiningfile {} length (size = {})".format( + file, os.path.getsize(file) + ) + ) + joinfiles = self.HPC_load_joinfiles_list(joinlist=file) + print("Got {} should be {}".format(len(joinfiles), self.HPC_njobs())) + + else: + x = False + print("Joiningfile failed to get lock: waiting for it to be unlocked") + self.wait_for_unlock(file) + return x + + def HPC_joinlist(self, joinlist=None): + """ + Function to return the default HPC joinlist file. + """ + + if joinlist is None: + joinlist = self.grid_options["joinlist"] + return joinlist + + def HPC_load_joinfiles_list(self, joinlist=None): + """ + Function to load in the list of files we should join, and return it. + + If grid_options['HPC_rebuild_joinlist'] is True, we rebuild it. + """ + + prefix = os.path.join(self.HPC_dir(), "results") + + if self.grid_options["HPC_rebuild_joinlist"] == 1: + # we should rebuild the joinlist from the + # files we find at the prefix directory + print("Rebuild joinlist from existing files") + joinlist = glob.glob(str(prefix) + "/*.gz") + return joinlist + + joinlist = self.HPC_joinlist(joinlist=joinlist) + try: + self.wait_for_unlock(joinlist) + f = self.open(joinlist, "r", encoding="utf-8") + joinlist = f.read().splitlines() + f.close() + + if False: + print( + "HPC_load_joinfiles_list read joinlist {joinlist} -> gave file joinlist of length {len_joinlist} with contents {joinlist}".format( + joinlist=joinlist, len_joinlist=len(joinlist) + ) + ) + except Exception as e: + print("Failed to open joinlist at {joinlist} : {e}".format(joinlist=joinlist, e=e)) + self.exit(code=1) + + return joinlist + + def HPC_join_from_files(self, newobj, joinfiles): + """ + Merge the results from the list joinfiles into newobj. + """ + for file in joinfiles: + print("Join data in", file) + self.merge_populations_from_file(newobj, file) + return newobj + + def HPC_can_join(self, joinfiles, joiningfile, vb=False): + """ + Check the joinfiles to make sure they all exist + and their .saved equivalents also exist + """ + + print("HPC check if we can join at {}".format(self.now())) + + if self.grid_options["HPC_force_join"] == 0 and os.path.exists(joiningfile): + if vb: + print( + "cannot join : joiningfile exists at {} (check 1)".format( + joiningfile + ) + ) + return False + else: + if vb: + print("joiningfile (at {}) does not exist".format(joiningfile)) + + for file in joinfiles: + if vb: + print("check for {}".format(file)) + + if os.path.exists(file) is False: + if vb: + print('cannot join : file "{}" does not exist'.format(file)) + return False + + savedfile = file + ".saved" + if vb: + print("check for {}".format(savedfile)) + + if os.path.exists(savedfile) is False: + if vb: + print( + 'cannot join : savedfile "{}" does not exist'.format(savedfile) + ) + return False + + # found both files + if vb: + print("found {} and {}".format(file, savedfile)) + + # check for joiningfile again + if self.grid_options["HPC_force_join"] == 1: + print("Forcing join because HPC_force_join is set") + x = True + elif os.path.exists(joiningfile): + if vb: + print( + "cannot join: joiningfile exists at {} (check 2)".format( + joiningfile + ) + ) + x = False + elif vb: + print("joiningfile at {} does not exist : can join".format(joiningfile)) + x = True + + if vb: + print("returning {} from HPC_can_join()".format(x)) + + return x + + def HPC_job(self): + """ + Function to return True if we're running an HPC (Slurm or Condor) job, False otherwise. + """ + + return bool(self.grid_options["slurm"] > 0 or self.grid_options["condor"] > 0) + + def HPC_job_task(self): + """ + Function to return the HPC task number, which is 1 when setting + up and running the scripts, 2 when joining data. + """ + if self.grid_options["slurm"] > 0: + x = self.grid_options["slurm"] + elif self.grid_options["condor"] > 0: + x = self.grid_options["condor"] + else: + x = 0 + return x + + def HPC_job_type(self): + """ + Function to return a string telling us the type of an HPC job, i.e. + "slurm", "condor" or "None". + """ + if self.grid_options["slurm"] > 0: + hpc_type = "slurm" + elif self.grid_options["condor"] > 0: + hpc_type = "condor" + else: + hpc_type = "None" + return hpc_type + + def HPC_jobID(self): + """ + Function to return an HPC (Slurm or Condor) job id in the form x.y. Returns None if not an HPC job. + """ + if self.grid_options["slurm"] > 0: + hpc_id = self.slurmID() + elif self.grid_options["condor"] > 0: + hpc_id = self.condorID() + else: + # not an HPC job + hpc_id = None + return hpc_id + + def HPC_jobID_tuple(self): + """ + Return the job ID as a tuple of ints, (x,y), or (None,None) on failure + """ + hpc_jobid = self.HPC_jobID() + + if hpc_jobid is None or id.startswith("None"): + t = [None, None] + elif self.HPC_job(): + print("JOBID", id) + t = id.split(".") + if not t[0]: + t[0] = None + if not t[1]: + t[1] = None + return tuple(t) + + def HPC_set_status(self, string): + """ + Set the appropriate HPC job (Condor or Slurm) status file to whatever is given in string. + + Arguments: + string : the new contents of the status file + + Returns: + True if the status was set, False otherwise. + (As returned by either the appropriate Condor or Slurm function) + """ + + if self.grid_options["slurm"] > 0: + status = self.set_slurm_status(string) + elif self.grid_options["condor"] > 0: + status = self.set_condor_status(string) + else: + status = None + + return status + + def HPC_get_status(self, job_id=None, job_index=None, hpc_dir=None): + """ + Get and return the appropriate HPC job (Condor or Slurm) status string for this job (or, if given, the job at id.index) + + Args: + hpc_dir : optional HPC run directory. If not set, the default (e.g. slurm_dir or condor_dir) + is used. + job_id,job_index : the id and index of the job to be queried + """ + + if self.grid_options["slurm"] > 0: + status = self.get_slurm_status( + jobid=job_id, jobarrayindex=job_index, slurm_dir=hpc_dir + ) + elif self.grid_options["condor"] > 0: + status = self.get_condor_status( + ClusterID=job_id, Process=job_index, condor_dir=hpc_dir + ) + else: + status = None + + return status + + def HPC_dirs(self): + """ + Function to return a list of directories required for this HPC job. + """ + if self.grid_options["slurm"] > 0: + dirs = self.slurm_dirs() + elif self.grid_options["condor"] > 0: + dirs = self.condor_dirs() + else: + dirs = [] + return dirs + + def HPC_grid(self, makejoiningfile=True): + """ + Function to call the appropriate HPC grid function + (e.g. Slurm or Condor) and return what it returns. + + Args: + makejoiningfile : if True, and we're the first job with self.HPC_task() == 2, we build the joiningfile. (default=True) This option exists in case you don't want to overwrite an existing joiningfile, or want to build it in another way (e.g. in the HPC scripts). + """ + + jobid = self.HPC_jobID_tuple()[0] + + # give some current status about the HPC run + self.HPC_dump_status("HPC grid before") + + if makejoiningfile and self.HPC_job_task() == 2 and jobid is not None: + self.HPC_make_joiningfile() + + if self.grid_options["slurm"] > 0: + x = self.slurm_grid() + elif self.grid_options["condor"] > 0: + x = self.condor_grid() + else: + x = None # should not happen + + # give some current status about the HPC run + self.HPC_dump_status("HPC grid after") + + return x + + def HPC_check_requirements(self): + """ + Function to check HPC option requirements have been met. Returns a tuple: (True,"") if all is ok, (False,<warning string>) otherwise. + """ + if self.grid_options["slurm"] > 0: + t = self.slurm_check_requirements() + elif self.grid_options["condor"] > 0: + t = self.condor_check_requirements() + else: + t = (True, "") + return t + + def HPC_id_filename(self): + """ + HPC jobs have a filename in their directory which specifies the job id. This function returns the contents of that file as a string, or None on failure. + """ + + if self.grid_options["slurm"] > 0: + filename = "jobid" + elif self.grid_options["condor"] > 0: + filename = "ClusterID" + else: + filename = None + return filename + + def HPC_id_from_dir(self, hpc_dir): + """ + Function to return the ID of an HPC run given its (already existing) directory. + """ + + filename = self.HPC_id_filename() + if not filename: + return None + + file = os.path.join(hpc_dir, filename) + f = self.open(file, "r", encoding="utf-8") + if not f: + print( + "Error: could not open {file} to read the HPC jobid of the directory {hpc_dir}".format( + file=file, hpc_dir=hpc_dir + ) + ) + self.exit(code=1) + + oldjobid = f.read().strip() + if not oldjobid: + print("Error: could not find jobid in {hpc_dir}".format(hpc_dir=hpc_dir)) + self.exit(code=1) + else: + f.close() + return oldjobid + + def HPC_restore(self): + """ + Set grid_options['restore_from_snapshot_file'] so that we restore data from existing + an HPC run if self.grid_options[hpc_job_type+'_restart_dir'], where hpc_job_type is "slurm" or "condor", + is provided, otherwise do nothing. This only works if grid_options[hpc_job_type] == self.HPC_job_task() == 2, which is + the run-grid stage of the process. + """ + + hpc_job_type = self.HPC_job_type() + if hpc_job_type is None: + return + + key = hpc_job_type + "_restart_dir" + if key not in self.grid_options: + return + + # get restart directory + hpc_dir = self.grid_options[hpc_job_type + "_restart_dir"] + if hpc_dir is None: + return + + # get HPC job index + index = self.HPC_jobID_tuple()[1] + if index is None: + return + + if self.HPC_job_task() == 2: # (same as) self.grid_options[hpc_job_type] == 2: + old_id = self.HPC_id_from_dir(hpc_dir) + print( + "Restart from hpc_dir {hpc_dir} which was has (old) ID {old_id}, we are job index {index}".format( + hpc_dir=hpc_dir, old_id=old_id, index=index + ) + ) + + # check status: if "finished", we don't have to do anything + status = self.HPC_get_status(hpc_dir=hpc_dir) + + if status == "finished": + print("Status is finished, cannot and do not need to restart.") + self.exit(code=0) + + file = os.path.join( + dir, "snapshots", "{id}.{index}.gz".format(id=old_id, index=index) + ) + + if os.path.exists(file): + # have data from which we can restore, set it in + # the appropriate grid option + print("Restore this run from snapshot {file}".format(file=file)) + self.grid_options["restore_from_snapshot_file"] = file + else: + # no snapshot: so no need to restore, just exit + print( + "Expected snapshot at {file} but none was found".format(file=file) + ) + self.exit(code=0) + return + + def HPC_join_previous(self): + """ + Function to join previously generated datasets. + """ + # check that our job has finished + status = self.HPC_get_status() + print("Job status", status) + + if self.grid_options["HPC_force_join"] == 0 and status != "finished": + # job did not finish : save a snapshot + print( + "This job did not finish (status is {status}) : cannot join".format( + status=status + ) + ) + else: + # our job has finished + HPC_status = self.HPC_status() + + # HPC_queue_stats = self.HPC_queue_stats() + + if HPC_status["status"]["finished"] != HPC_status["njobs"]: + print( + "HPC_status reports {} finished jobs out of {}. We cannot join because not all the jobs are finished. Exiting.".format( + HPC_status["status"]["finished"], HPC_status["njobs"] + ) + ) + self.exit(1) + + joinfiles = self.HPC_load_joinfiles_list() + joiningfile = self.HPC_path("joining") + print( + "Joinfile list n={n} (should be {m}".format( + n=len(joinfiles), m=self.HPC_njobs() + ) + ) + print("Joingingfile path : ", joiningfile) + + if len(joinfiles) != self.HPC_njobs(): + print("Number of joinfiles != njobs : this is wrong, exiting.") + self.exit(1) + + if self.HPC_can_join(joinfiles, joiningfile, vb=True): + # join object files + print("We can join") + try: + # touch joiningfile + if self.grid_options["HPC_force_join"] == 0: + print("Making joiningfile at {}".format(joiningfile)) + self.HPC_touch(joiningfile) + try: + print("Calling HPC_join_from_files()") + self.HPC_join_from_files(self, joinfiles) + except Exception as e: + print("Join gave exception", e) + # disable analytics calculations : use the + # values we just loaded + self.grid_options["do_analytics"] = False + return + except Exception as e: + print("pass {}", e) + pass + else: + print("cannot join : other tasks are not yet finished\n") + print("Finished this job : exiting") + self.exit(code=1) + + def HPC_path(self, path): + """ + Function to file the filename of this HPC job's file at path. + """ + if self.grid_options["slurm"] > 0: + p = self.slurmpath(path) + elif self.grid_options["condor"] > 0: + p = self.condorpath(path) + else: + p = None + return p + + def HPC_snapshot_filename(self): + """ + Function to return an HPC job's snapshot filename. + """ + if self.HPC_job(): + file = os.path.join(self.HPC_dir, "snapshots", self.HPC_jobID() + ".gz") + else: + file = None + return file + + def HPC_dir(self): + """ + Function to return an HPC job's directory. + """ + if self.grid_options["slurm"] > 0: + d = self.grid_options["slurm_dir"] + elif self.grid_options["condor"] > 0: + d = self.grid_options["condor_dir"] + else: + d = None + return d + + def HPC_touch(self, filename, string=None): + """ + Function to touch the file at filename, put into it the job number + and (if given) the string passed in. + """ + + try: + f = self.open(filename, "w", encoding="utf-8") + + if f: + job = self.HPC_jobID() + jobtype = self.HPC_job_type() + if job: + s = str(job) + if jobtype: + s += " " + str(jobtype) + f.write(s + "\n") + if string: + f.write(string) + f.flush() + f.close() + + self.NFS_flush_hack(filename) + except: + pass + + def HPC_status(self): + """ + Return a dict of useful information about the current status + of this HPC run. + """ + d = {} # returned + _id, _index = self.HPC_jobID_tuple() + d["job_id"] = _id + d["job_index"] = _index + if _id and _index: + n = self.HPC_njobs() + d["njobs"] = n + d["job_task"] = self.HPC_job_task() + d["job_type"] = self.HPC_job_type() + d["job_status"] = self.HPC_get_status() + d["dir"] = self.HPC_dir() + d["dirs"] = self.HPC_dirs() + + # get fellow jobs' status + d["status"] = {} + d["joblist"] = {} + + # default types + for x in ["running", "starting", "finishing", "finished", "killed"]: + d["status"][x] = 0 + d["joblist"][x] = [] + + for i in range(0, n): + s = self.HPC_get_status(job_id=_id, job_index=i) + if s is None: + s = "unknown" + if not s in d["status"]: + d["status"][s] = 1 + else: + d["status"][s] += 1 + if not s in d["joblist"]: + d["joblist"][s] = [str(_id) + "." + str(i)] + else: + d["joblist"][s] += [str(_id) + "." + str(i)] + + return d + + def HPC_dump_status(self, string=None): + """ + Function to print the status of the HPC grid + """ + + if not string: + string = "" + + d = self.HPC_status() + + print("############################################################") + print("HPC job status " + string) + print(json.dumps(d, indent=4)) + print("############################################################") + + def HPC_queue_stats(self): + """ + Function that returns the queue stats for the HPC grid + TODO: the slurm_queue_stats doesntt actually return anything + """ + + if self.grid_options["slurm"] > 0: + x = self.slurm_queue_stats() + elif self.grid_options["condor"] > 0: + x = self.condor_queue_stats() + else: + x = None + + return x diff --git a/binarycpython/utils/Moe_di_Stefano_2017.py b/binarycpython/utils/Moe_di_Stefano_2017.py new file mode 100644 index 0000000000000000000000000000000000000000..d15b9e9e2c6734595f6c5428d0801192fb449374 --- /dev/null +++ b/binarycpython/utils/Moe_di_Stefano_2017.py @@ -0,0 +1,1226 @@ +""" +Module containing the functions to handle the Moe&Distrefano data + +This class object is an extension to the population grid object +""" + +# pylint: disable=E1101 + +import os +import gc +import copy +import json + +import py_rinterpolate + +from binarycpython.utils.functions import ( + verbose_print, +) +from binarycpython.utils.dicts import update_dicts, normalize_dict +from binarycpython.utils.distribution_functions import ( + Moecache, + LOG_LN_CONVERTER, +) +from binarycpython.utils.grid_options_defaults import ( + _MOE2017_VERBOSITY_LEVEL, +) +from binarycpython.utils import moe_di_stefano_2017_data + + + +class Moe_di_Stefano_2017: + """ + Extension to the population grid object that contains functionality to handle handle the Moe & distefano distributions + """ + + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + def set_moe_di_stefano_settings(self, options=None): + """ + Function to set user input configurations for the Moe & di Stefano methods + + If nothing is passed then we just use the default options + """ + + if not options: + options = {} + + # Take the option dictionary that was given and override. + options = update_dicts(self.grid_options["Moe2017_options"], options) + self.grid_options["Moe2017_options"] = copy.deepcopy(options) + + # Write options to a file + os.makedirs( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), + exist_ok=True, + ) + with open( + os.path.join( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), + "moeopts.dat", + ), + "w", + encoding="utf-8", + ) as f: + f.write( + json.dumps( + self.grid_options["Moe2017_options"], indent=4, ensure_ascii=False + ) + ) + f.close() + + def _load_moe_di_stefano_data(self): + """ + Function to load the moe & di stefano data + """ + + # Only if the grid is loaded and Moecache contains information + if not self.grid_options["_loaded_Moe2017_data"]: # and not Moecache: + + if self.grid_options["_Moe2017_JSON_data"]: + # Use the existing (perhaps modified) JSON data + json_data = self.grid_options["_Moe2017_JSON_data"] + + else: + # Load the JSON data from a file + json_data = self.get_moe_di_stefano_dataset( + self.grid_options["Moe2017_options"], + verbosity=self.grid_options["verbosity"], + ) + + # entry of log10M1 is a list containing 1 dict. + # We can take the dict out of the list + if isinstance(json_data["log10M1"], list): + json_data["log10M1"] = json_data["log10M1"][0] + + # save this data in case we want to modify it later + self.grid_options["_Moe2017_JSON_data"] = json_data + + # Get all the masses + logmasses = sorted(json_data["log10M1"].keys()) + if not logmasses: + msg = "The table does not contain masses." + verbose_print( + "\tMoe_di_Stefano_2017: {}".format(msg), + self.grid_options["verbosity"], + 0, + ) + raise ValueError(msg) + + # Write to file + os.makedirs( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), + exist_ok=True, + ) + with open( + os.path.join( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), + "moe.log", + ), + "w", + encoding="utf-8", + ) as logfile: + logfile.write("logâ‚â‚€Masses(M☉) {}\n".format(logmasses)) + + # Get all the periods and see if they are all consistently present + logperiods = [] + for logmass in logmasses: + if not logperiods: + logperiods = sorted(json_data["log10M1"][logmass]["logP"].keys()) + dlog10P = float(logperiods[1]) - float(logperiods[0]) + + current_logperiods = sorted(json_data["log10M1"][logmass]["logP"]) + if logperiods != current_logperiods: + msg = ( + "Period values are not consistent throughout the dataset logperiods = " + + " ".join(str(x) for x in logperiods) + + "\nCurrent periods = " + + " ".join(str(x) for x in current_logperiods) + ) + verbose_print( + "\tMoe_di_Stefano_2017: {}".format(msg), + self.grid_options["verbosity"], + 0, + ) + raise ValueError(msg) + + ############################################################ + # log10period binwidth : of course this assumes a fixed + # binwidth, so we check for this too. + for i in range(len(current_logperiods) - 1): + if not dlog10P == ( + float(current_logperiods[i + 1]) - float(current_logperiods[i]) + ): + msg = "Period spacing is not consistent throughout the dataset" + verbose_print( + "\tMoe_di_Stefano_2017: {}".format(msg), + self.grid_options["verbosity"], + 0, + ) + raise ValueError(msg) + + # save the logperiods list in the cache: + # this is used in the renormalization integration + Moecache["logperiods"] = logperiods + + # Write to file + os.makedirs( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), + exist_ok=True, + ) + with open( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano", "moe.log"), + "a", + encoding="utf-8", + ) as logfile: + logfile.write("logâ‚â‚€Periods(days) {}\n".format(logperiods)) + + # Fill the global dict + for logmass in logmasses: + # Create the multiplicity table + if not Moecache.get("multiplicity_table", None): + Moecache["multiplicity_table"] = [] + + # multiplicity as a function of primary mass + Moecache["multiplicity_table"].append( + [ + float(logmass), + json_data["log10M1"][logmass]["f_multi"], + json_data["log10M1"][logmass]["single star fraction"], + json_data["log10M1"][logmass]["binary star fraction"], + json_data["log10M1"][logmass]["triple/quad star fraction"], + ] + ) + + ############################################################ + # a small log10period which we can shift just outside the + # table to force integration out there to zero + epslog10P = 1e-8 * dlog10P + + ############################################################ + # loop over either binary or triple-outer periods + first = 1 + + # Go over the periods + for logperiod in logperiods: + ############################################################ + # distributions of binary and triple star fractions + # as a function of mass, period. + # + # Note: these should be per unit log10P, hence we + # divide by dlog10P + + if first: + first = 0 + + # Create the multiplicity table + if not Moecache.get("period_distributions", None): + Moecache["period_distributions"] = [] + + ############################################################ + # lower bound the period distributions to zero probability + Moecache["period_distributions"].append( + [ + float(logmass), + float(logperiod) - 0.5 * dlog10P - epslog10P, + 0.0, + 0.0, + ] + ) + Moecache["period_distributions"].append( + [ + float(logmass), + float(logperiod) - 0.5 * dlog10P, + json_data["log10M1"][logmass]["logP"][logperiod][ + "normed_bin_frac_p_dist" + ] + / dlog10P, + json_data["log10M1"][logmass]["logP"][logperiod][ + "normed_tripquad_frac_p_dist" + ] + / dlog10P, + ] + ) + + Moecache["period_distributions"].append( + [ + float(logmass), + float(logperiod), + json_data["log10M1"][logmass]["logP"][logperiod][ + "normed_bin_frac_p_dist" + ] + / dlog10P, + json_data["log10M1"][logmass]["logP"][logperiod][ + "normed_tripquad_frac_p_dist" + ] + / dlog10P, + ] + ) + + ############################################################ + # distributions as a function of mass, period, q + # + # First, get a list of the qs given by Moe + # + qs = sorted(json_data["log10M1"][logmass]["logP"][logperiod]["q"]) + + # Fill the data and 'normalise' + qdata = self.fill_data( + qs, json_data["log10M1"][logmass]["logP"][logperiod]["q"] + ) + + # Create the multiplicity table + if not Moecache.get("q_distributions", None): + Moecache["q_distributions"] = [] + + for q in qs: + Moecache["q_distributions"].append( + [float(logmass), float(logperiod), float(q), qdata[q]] + ) + + ############################################################ + # eccentricity distributions as a function of mass, period, ecc + eccs = sorted(json_data["log10M1"][logmass]["logP"][logperiod]["e"]) + + # Fill the data and 'normalise' + ecc_data = self.fill_data( + eccs, json_data["log10M1"][logmass]["logP"][logperiod]["e"] + ) + + # Create the multiplicity table + if not Moecache.get("ecc_distributions", None): + Moecache["ecc_distributions"] = [] + + for ecc in eccs: + Moecache["ecc_distributions"].append( + [ + float(logmass), + float(logperiod), + float(ecc), + ecc_data[ecc], + ] + ) + + ############################################################ + # upper bound the period distributions to zero probability + Moecache["period_distributions"].append( + [ + float(logmass), + float(logperiods[-1]) + 0.5 * dlog10P, # TODO: why this shift? to center it? + json_data["log10M1"][logmass]["logP"][logperiods[-1]][ + "normed_bin_frac_p_dist" + ] + / dlog10P, + json_data["log10M1"][logmass]["logP"][logperiods[-1]][ + "normed_tripquad_frac_p_dist" + ] + / dlog10P, + ] + ) + Moecache["period_distributions"].append( + [ + float(logmass), + float(logperiods[-1]) + 0.5 * dlog10P + epslog10P, + 0.0, + 0.0, + ] + ) + + verbose_print( + "\tMoe_di_Stefano_2017: Length period_distributions table: {}".format( + len(Moecache["period_distributions"]) + ), + self.grid_options["verbosity"], + _MOE2017_VERBOSITY_LEVEL, + ) + verbose_print( + "\tMoe_di_Stefano_2017: Length multiplicity table: {}".format( + len(Moecache["multiplicity_table"]) + ), + self.grid_options["verbosity"], + _MOE2017_VERBOSITY_LEVEL, + ) + verbose_print( + "\tMoe_di_Stefano_2017: Length q table: {}".format( + len(Moecache["q_distributions"]) + ), + self.grid_options["verbosity"], + _MOE2017_VERBOSITY_LEVEL, + ) + verbose_print( + "\tMoe_di_Stefano_2017: Length ecc table: {}".format( + len(Moecache["ecc_distributions"]) + ), + self.grid_options["verbosity"], + _MOE2017_VERBOSITY_LEVEL, + ) + + # Write to log file + os.makedirs( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), + exist_ok=True, + ) + with open( + os.path.join( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), + "moecache.json", + ), + "w", + encoding="utf-8", + ) as cache_filehandle: + cache_filehandle.write( + json.dumps(Moecache, indent=4, ensure_ascii=False) + ) + + # Signal that the data has been loaded + self.grid_options["_loaded_Moe2017_data"] = True + + def _set_moe_di_stefano_distributions(self): + """ + Function to set the Moe & di Stefano distribution + """ + + ############################################################ + # first, the multiplicity, this is 1,2,3,4, ... + # for singles, binaries, triples, quadruples, ... + + max_multiplicity = self.get_max_multiplicity( + self.grid_options["Moe2017_options"]["multiplicity_modulator"] + ) + verbose_print( + "\tMoe_di_Stefano_2017: Max multiplicity = {}".format(max_multiplicity), + self.grid_options["verbosity"], + _MOE2017_VERBOSITY_LEVEL, + ) + ###### + # Setting up the grid variables + + # Multiplicity + self.add_grid_variable( + name="multiplicity", + parameter_name="multiplicity", + longname="multiplicity", + valuerange=[1, max_multiplicity], + samplerfunc="self.const_int(1, {n}, {n})".format(n=max_multiplicity), + precode='self.grid_options["multiplicity"] = multiplicity; self.bse_options["multiplicity"] = multiplicity; options={}'.format( + self.grid_options["Moe2017_options"] + ), + condition="({}[int(multiplicity)-1] > 0)".format( + str(self.grid_options["Moe2017_options"]["multiplicity_modulator"]) + ), + gridtype="discrete", + probdist=1, + ) + + ############################################################ + # always require M1, for all systems + # + # log-spaced m1 with given resolution + self.add_grid_variable( + name="lnM_1", + parameter_name="M_1", + longname="Primary mass", + samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"]["M"][0] + or "self.const_linear(np.log({}), np.log({}), {})".format( + self.grid_options["Moe2017_options"]["ranges"]["M"][0], + self.grid_options["Moe2017_options"]["ranges"]["M"][1], + self.grid_options["Moe2017_options"]["resolutions"]["M"][0], + ), + valuerange=[ + "np.log({})".format( + self.grid_options["Moe2017_options"]["ranges"]["M"][0] + ), + "np.log({})".format( + self.grid_options["Moe2017_options"]["ranges"]["M"][1] + ), + ], + gridtype="centred", + dphasevol="dlnM_1", + precode='M_1 = np.exp(lnM_1); options["M_1"]=M_1', + probdist="self.Moe_di_Stefano_2017_pdf({{{}, {}, {}}}, verbosity=self.grid_options['verbosity'])['total_probdens'] if multiplicity == 1 else 1".format( + str(dict(self.grid_options["Moe2017_options"]))[1:-1], + "'multiplicity': multiplicity", + "'M_1': M_1", + ), + ) + + # Go to higher multiplicities + if max_multiplicity >= 2: + # binaries: period + self.add_grid_variable( + name="log10per", + parameter_name="orbital_period", + longname="log10(Orbital_Period)", + probdist=1.0, + condition='(self.grid_options["multiplicity"] >= 2)', + branchpoint=1 + if max_multiplicity > 1 + else 0, # Signal here to put a branchpoint if we have a max multiplicity higher than 1. + gridtype="centred", + dphasevol="({} * dlog10per)".format(LOG_LN_CONVERTER), + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["logP"][0], + self.grid_options["Moe2017_options"]["ranges"]["logP"][1], + ], + samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"][ + "logP" + ][0] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["logP"][0], + self.grid_options["Moe2017_options"]["ranges"]["logP"][1], + self.grid_options["Moe2017_options"]["resolutions"]["logP"][0], + ), + precode="""orbital_period = 10.0**log10per +qmin={}/M_1 +qmax=maximum_mass_ratio_for_RLOF(M_1, orbital_period) +""".format( + self.grid_options["Moe2017_options"]["Mmin"] + ), + ) # TODO: change the maximum_mass_ratio_for_RLOF + + # binaries: mass ratio + self.add_grid_variable( + name="q", + parameter_name="M_2", + longname="Mass ratio", + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["q"][0] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "options['Mmin']/M_1", + self.grid_options["Moe2017_options"]["ranges"]["q"][1] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "qmax", + ], + probdist=1, + gridtype="centred", + dphasevol="dq", + precode=""" +M_2 = q * M_1 +sep = calc_sep_from_period(M_1, M_2, orbital_period) + """, + samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"]["M"][1] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["q"][0] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", [None, None])[0] + else "{}/M_1".format(self.grid_options["Moe2017_options"]["Mmin"]), + self.grid_options["Moe2017_options"]["ranges"]["q"][1] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", [None, None])[1] + else "qmax", + self.grid_options["Moe2017_options"]["resolutions"]["M"][1], + ), + ) + + # (optional) binaries: eccentricity + if self.grid_options["Moe2017_options"]["resolutions"]["ecc"][0] > 0: + self.add_grid_variable( + name="ecc", + parameter_name="eccentricity", + longname="Eccentricity", + probdist=1, + gridtype="centred", + dphasevol="decc", + precode="eccentricity=ecc", + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["ecc"][ + 0 + ], # Just fail if not defined. + self.grid_options["Moe2017_options"]["ranges"]["ecc"][1], + ], + samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"][ + "ecc" + ][0] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["ecc"][ + 0 + ], # Just fail if not defined. + self.grid_options["Moe2017_options"]["ranges"]["ecc"][1], + self.grid_options["Moe2017_options"]["resolutions"]["ecc"][0], + ), + ) + + # Now for triples and quadruples + if max_multiplicity >= 3: + # Triple: period + self.add_grid_variable( + name="log10per2", + parameter_name="orbital_period_triple", + longname="log10(Orbital_Period2)", + probdist=1.0, + condition='(self.grid_options["multiplicity"] >= 3)', + branchpoint=2 + if max_multiplicity > 2 + else 0, # Signal here to put a branchpoint if we have a max multiplicity higher than 1. + gridtype="centred", + dphasevol="({} * dlog10per2)".format(LOG_LN_CONVERTER), + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["logP"][0], + self.grid_options["Moe2017_options"]["ranges"]["logP"][1], + ], + samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"][ + "logP" + ][1] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["logP"][0], + self.grid_options["Moe2017_options"]["ranges"]["logP"][1], + self.grid_options["Moe2017_options"]["resolutions"]["logP"][1], + ), + precode="""orbital_period_triple = 10.0**log10per2 +q2min={}/(M_1+M_2) +q2max=maximum_mass_ratio_for_RLOF(M_1+M_2, orbital_period_triple) + """.format( + self.grid_options["Moe2017_options"]["Mmin"] + ), + ) + + # Triples: mass ratio + # Note, the mass ratio is M_outer/M_inner + self.add_grid_variable( + name="q2", + parameter_name="M_3", + longname="Mass ratio outer/inner", + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["q"][0] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "options['Mmin']/(M_1+M_2)", + self.grid_options["Moe2017_options"]["ranges"]["q"][1] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "q2max", + ], + probdist=1, + gridtype="centred", + dphasevol="dq2", + precode=""" +M_3 = q2 * (M_1 + M_2) +sep2 = calc_sep_from_period((M_1+M_2), M_3, orbital_period_triple) +eccentricity2=0 +""", + samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"][ + "M" + ][2] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["q"][0] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "options['Mmin']/(M_1+M_2)", + self.grid_options["Moe2017_options"]["ranges"]["q"][1] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "q2max", + self.grid_options["Moe2017_options"]["resolutions"]["M"][2], + ), + ) + + # (optional) triples: eccentricity + if self.grid_options["Moe2017_options"]["resolutions"]["ecc"][1] > 0: + self.add_grid_variable( + name="ecc2", + parameter_name="eccentricity2", + longname="Eccentricity of the triple", + probdist=1, + gridtype="centred", + dphasevol="decc2", + precode="eccentricity2=ecc2", + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["ecc"][ + 0 + ], # Just fail if not defined. + self.grid_options["Moe2017_options"]["ranges"]["ecc"][1], + ], + samplerfunc=self.grid_options["Moe2017_options"][ + "samplerfuncs" + ]["ecc"][1] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["ecc"][ + 0 + ], # Just fail if not defined. + self.grid_options["Moe2017_options"]["ranges"]["ecc"][1], + self.grid_options["Moe2017_options"]["resolutions"]["ecc"][ + 1 + ], + ), + ) + + if max_multiplicity == 4: + # Quadruple: period + self.add_grid_variable( + name="log10per3", + parameter_name="orbital_period_quadruple", + longname="log10(Orbital_Period3)", + probdist=1.0, + condition='(self.grid_options["multiplicity"] >= 4)', + branchpoint=3 + if max_multiplicity > 3 + else 0, # Signal here to put a branchpoint if we have a max multiplicity higher than 1. + gridtype="centred", + dphasevol="({} * dlog10per3)".format(LOG_LN_CONVERTER), + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["logP"][0], + self.grid_options["Moe2017_options"]["ranges"]["logP"][1], + ], + samplerfunc=self.grid_options["Moe2017_options"][ + "samplerfuncs" + ]["logP"][2] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["logP"][0], + self.grid_options["Moe2017_options"]["ranges"]["logP"][1], + self.grid_options["Moe2017_options"]["resolutions"]["logP"][ + 2 + ], + ), + precode="""orbital_period_quadruple = 10.0**log10per3 +q3min={}/(M_3) +q3max=maximum_mass_ratio_for_RLOF(M_3, orbital_period_quadruple) + """.format( + self.grid_options["Moe2017_options"]["Mmin"] + ), + ) + + # Quadruple: mass ratio : M_outer / M_inner + self.add_grid_variable( + name="q3", + parameter_name="M_4", + longname="Mass ratio outer low/outer high", + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["q"][0] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "options['Mmin']/(M_3)", + self.grid_options["Moe2017_options"]["ranges"]["q"][1] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "q3max", + ], + probdist=1, + gridtype="centred", + dphasevol="dq3", + precode=""" +M_4 = q3 * M_3 +sep3 = calc_sep_from_period((M_3), M_4, orbital_period_quadruple) +eccentricity3=0 +""", + samplerfunc=self.grid_options["Moe2017_options"][ + "samplerfuncs" + ]["M"][3] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["q"][0] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "options['Mmin']/(M_3)", + self.grid_options["Moe2017_options"]["ranges"]["q"][1] + if self.grid_options["Moe2017_options"] + .get("ranges", {}) + .get("q", None) + else "q3max", + self.grid_options["Moe2017_options"]["resolutions"]["M"][2], + ), + ) + + # (optional) triples: eccentricity + if ( + self.grid_options["Moe2017_options"]["resolutions"]["ecc"][2] + > 0 + ): + self.add_grid_variable( + name="ecc3", + parameter_name="eccentricity3", + longname="Eccentricity of the triple+quadruple/outer binary", + probdist=1, + gridtype="centred", + dphasevol="decc3", + precode="eccentricity3=ecc3", + valuerange=[ + self.grid_options["Moe2017_options"]["ranges"]["ecc"][ + 0 + ], # Just fail if not defined. + self.grid_options["Moe2017_options"]["ranges"]["ecc"][ + 1 + ], + ], + samplerfunc=self.grid_options["Moe2017_options"][ + "samplerfuncs" + ]["ecc"][2] + or "self.const_linear({}, {}, {})".format( + self.grid_options["Moe2017_options"]["ranges"]["ecc"][ + 0 + ], # Just fail if not defined. + self.grid_options["Moe2017_options"]["ranges"]["ecc"][ + 1 + ], + self.grid_options["Moe2017_options"]["resolutions"][ + "ecc" + ][2], + ), + ) + + # Now we are at the last part. + # Here we should combine all the information that we calculate and update the options + # dictionary. This will then be passed to the Moe_di_Stefano_2017_pdf to calculate + # the real probability. The trick we use is to strip the options_dict as a string + # and add some keys to it: + + updated_options = "{{{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}}}".format( + str(dict(self.grid_options["Moe2017_options"]))[1:-1], + '"multiplicity": multiplicity', + '"M_1": M_1', + '"M_2": M_2', + '"M_3": M_3', + '"M_4": M_4', + '"P": orbital_period', + '"P2": orbital_period_triple', + '"P3": orbital_period_quadruple', + '"ecc": eccentricity', + '"ecc2": eccentricity2', + '"ecc3": eccentricity3', + ) + + probdist_addition = "self.Moe_di_Stefano_2017_pdf({}, verbosity=self.grid_options['verbosity'])['total_probdens']".format( + updated_options + ) + + # and finally the probability calculator + self.grid_options["_grid_variables"][self._last_grid_variable()][ + "probdist" + ] = probdist_addition + + verbose_print( + "\tMoe_di_Stefano_2017: Added final call to the pdf function", + self.grid_options["verbosity"], + _MOE2017_VERBOSITY_LEVEL, + ) + + # Signal that the MOE2017 grid has been set + self.grid_options["_set_Moe2017_grid"] = True + + ################################################################################################ + def Moe_di_Stefano_2017(self, options=None): + """ + Function to handle setting the user input settings, + set up the data and load that into interpolators and + then set the distribution functions + + Takes a dictionary as its only argument + """ + + default_options = { + "apply settings": True, + "setup grid": True, + "load data": True, + "clean cache": False, + "clean load flag": False, + "clean all": False, + } + + if not options: + options = {} + options = update_dicts(default_options, options) + + # clean cache? + if options["clean all"] or options["clean cache"]: + Moecache.clear() + + if options["clean all"] or options["clean load flag"]: + self.grid_options["_loaded_Moe2017_data"] = False + + # Set the user input + if options["apply settings"]: + self.set_moe_di_stefano_settings(options=options) + + # Load the data + if options["load data"]: + self._load_moe_di_stefano_data() + + # construct the grid here + if options["setup grid"]: + self._set_moe_di_stefano_distributions() + + def _clean_interpolators(self): + """ + Function to clean up the interpolators after a run + + We look in the Moecache global variable for items that are interpolators. + Should be called by the general cleanup function AND the thread cleanup function + """ + + interpolator_keys = [] + for key, value in Moecache.items(): + if isinstance(value, py_rinterpolate.Rinterpolate): + interpolator_keys.append(key) + + for key in interpolator_keys: + Moecache[key].destroy() + del Moecache[key] + gc.collect() + + def _calculate_multiplicity_fraction(self, system_dict): + """ + Function to calculate multiplicity fraction + + Makes use of the self.bse_options['multiplicity'] value. If its not set, it will raise an error + + grid_options['multiplicity_fraction_function'] will be checked for the choice + + TODO: add option to put a manual binary fraction in here (solve via negative numbers being the functions) + """ + + # Just return 1 if no option has been chosen + if self.grid_options["multiplicity_fraction_function"] in [0, "None"]: + verbose_print( + "_calculate_multiplicity_fraction: Chosen not to use any multiplicity fraction.", + self.grid_options["verbosity"], + 3, + ) + + return 1 + + # Raise an error if the multiplicity is not set + if not system_dict.get("multiplicity", None): + msg = "Multiplicity value has not been set. When using a specific multiplicity fraction function please set the multiplicity" + raise ValueError(msg) + + # Go over the chosen options + if self.grid_options["multiplicity_fraction_function"] in [1, "Arenou2010"]: + # Arenou 2010 will be used + verbose_print( + "_calculate_multiplicity_fraction: Using Arenou 2010 to calculate multiplicity fractions", + self.grid_options["verbosity"], + 3, + ) + + binary_fraction = self.Arenou2010_binary_fraction(system_dict["M_1"]) + multiplicity_fraction_dict = { + 1: 1 - binary_fraction, + 2: binary_fraction, + 3: 0, + 4: 0, + } + + elif self.grid_options["multiplicity_fraction_function"] in [2, "Raghavan2010"]: + # Raghavan 2010 will be used + verbose_print( + "_calculate_multiplicity_fraction: Using Raghavan (2010) to calculate multiplicity fractions", + self.grid_options["verbosity"], + 3, + ) + + binary_fraction = self.raghavan2010_binary_fraction(system_dict["M_1"]) + multiplicity_fraction_dict = { + 1: 1 - binary_fraction, + 2: binary_fraction, + 3: 0, + 4: 0, + } + + elif self.grid_options["multiplicity_fraction_function"] in [3, "Moe2017"]: + # We need to check several things now here: + + # First, are the options for the MOE2017 grid set? On start it is filled with the default settings + if not self.grid_options["Moe2017_options"]: + msg = "The MOE2017 options do not seem to be set properly. The value is {}".format( + self.grid_options["Moe2017_options"] + ) + raise ValueError(msg) + + # Second: is the Moecache filled. + if not Moecache: + verbose_print( + "_calculate_multiplicity_fraction: Moecache is empty. It needs to be filled with the data for the interpolators. Loading the data now", + self.grid_options["verbosity"], + 3, + ) + + # Load the data + self._load_moe_di_stefano_data() + + # record the prev value + prev_M1_value_ms = self.grid_options["Moe2017_options"].get("M_1", None) + + # Set value of M1 of the current system + self.grid_options["Moe2017_options"]["M_1"] = system_dict["M_1"] + + # Calculate the multiplicity fraction + multiplicity_fraction_list = ( + self.Moe_di_Stefano_2017_multiplicity_fractions( + self.grid_options["Moe2017_options"], self.grid_options["verbosity"] + ) + ) + + # Turn into dict + multiplicity_fraction_dict = { + el + 1: multiplicity_fraction_list[el] + for el in range(len(multiplicity_fraction_list)) + } + + # Set the prev value back + self.grid_options["Moe2017_options"]["M_1"] = prev_M1_value_ms + + # we don't know what to do next + else: + msg = "Chosen value for the multiplicity fraction function is not known." + raise ValueError(msg) + + # To make sure we normalize the dictionary + multiplicity_fraction_dict = normalize_dict( + multiplicity_fraction_dict, verbosity=self.grid_options["verbosity"] + ) + + verbose_print( + "Multiplicity: {} multiplicity_fraction: {}".format( + system_dict["multiplicity"], + multiplicity_fraction_dict[system_dict["multiplicity"]], + ), + self.grid_options["verbosity"], + 3, + ) + + return multiplicity_fraction_dict[system_dict["multiplicity"]] + + def get_moe_di_stefano_dataset(self, options, verbosity=0): + """ + Function to get the default Moe and di Stefano dataset or accept a user input. + + Returns a dict containing the (JSON) data. + """ + + json_data = None + + if "JSON" in options: + # use the JSON data passed in + json_data = options["JSON"] + + elif "file" in options: + # use the file passed in, if provided + if not os.path.isfile(options["file"]): + verbose_print( + "The provided 'file' Moe and de Stefano JSON file does not seem to exist at {}".format( + options["file"] + ), + verbosity, + 1, + ) + + raise ValueError + if not options["file"].endswith(".json"): + verbose_print( + "Provided filename is not a json file", + verbosity, + 1, + ) + + else: + # Read input data and Clean up the data if there are white spaces around the keys + with open(options["file"], "r", encoding="utf-8") as data_filehandle: + datafile_data = data_filehandle.read() + datafile_data = datafile_data.replace('" ', '"') + datafile_data = datafile_data.replace(' "', '"') + datafile_data = datafile_data.replace(' "', '"') + json_data = json.loads(datafile_data) + + if not json_data: + # no JSON data or filename given, use the default 2017 dataset + verbose_print( + "Using the default Moe and de Stefano 2017 datafile", + verbosity, + 1, + ) + json_data = copy.deepcopy(moe_di_stefano_2017_data.moe_di_stefano_2017_data) + + return json_data + + def get_Moe_di_Stefano_2017_default_options(self): + """ + Function that returns the default options for the Moe & diStefano grid options + """ + + return { + # place holder for the JSON data to be used if a file + # isn't specified + "JSON": None, + # resolution data + "resolutions": { + "M": [ + 20, # M1 + 20, # M2 (i.e. q) + 0, # M3 currently unused + 0, # M4 currently unused + ], + "logP": [ + 20, # P2 (binary period) + 0, # P3 (triple period) currently unused + 0, # P4 (quadruple period) currently unused + ], + "ecc": [ + 10, # e (binary eccentricity) + 0, # e2 (triple eccentricity) currently unused + 0, # e3 (quadruple eccentricity) currently unused + ], + }, + "samplerfuncs": { + "M": [None, None, None, None], + "logP": [None, None, None], + "ecc": [None, None, None], + }, + "ranges": { + # stellar masses (Msun) + "M": [ + self.minimum_stellar_mass() + * 1.05, # 0.08 is a tad bit above the minimum mass. Don't sample at 0.07, otherwise the first row of q values will have a phasevol of 0. Anything higher is fine. + 80.0, # (rather arbitrary) upper mass cutoff + ], + "q": [ + None, # artificial qmin : set to None to use default + None, # artificial qmax : set to None to use default + ], + "logP": [0.0, 8.0], # 0 = log10(1 day) # 8 = log10(10^8 days) + "ecc": [0.0, 0.99], + }, + # minimum stellar mass + "Mmin": self.minimum_stellar_mass(), # We take the value that binary_c has set as the default + # multiplicity model (as a function of log10M1) + # + # You can use 'Poisson' which uses the system multiplicity + # given by Moe and maps this to single/binary/triple/quad + # fractions. + # + # Alternatively, 'data' takes the fractions directly + # from the data, but then triples and quadruples are + # combined (and there are NO quadruples). + "multiplicity_model": "Poisson", + # multiplicity modulator: + # [single, binary, triple, quadruple] + # + # e.g. [1,0,0,0] for single stars only + # [0,1,0,0] for binary stars only + # + # defaults to [1,1,0,0] i.e. all types + # + "multiplicity_modulator": [ + 1, # single + 1, # binary + 0, # triple + 0, # quadruple + ], + # given a mix of multiplicities, you can either (noting that + # here (S,B,T,Q) = appropriate modulator * model(S,B,T,Q) ) + # + # 'norm' : normalise so the whole population is 1.0 + # after implementing the appropriate fractions + # S/(S+B+T+Q), B/(S+B+T+Q), T/(S+B+T+Q), Q/(S+B+T+Q) + # + # 'raw' : stick to what is predicted, i.e. + # S/(S+B+T+Q), B/(S+B+T+Q), T/(S+B+T+Q), Q/(S+B+T+Q) + # without normalisation + # (in which case the total probability < 1.0 unless + # all you use single, binary, triple and quadruple) + # + # 'merge' : e.g. if you only have single and binary, + # add the triples and quadruples to the binaries, so + # binaries represent all multiple systems + # ... + # *** this is canonical binary population synthesis *** + # + # Note: if multiplicity_modulator == [1,1,1,1] this + # option does nothing (equivalent to 'raw'). + # + # + # note: if you only set one multiplicity_modulator + # to 1, and all the others to 0, then normalising + # will mean that you effectively have the same number + # of stars as single, binary, triple or quad (whichever + # is non-zero) i.e. the multiplicity fraction is ignored. + # This is probably not useful except for + # testing purposes or comparing to old grids. + "normalize_multiplicities": "merge", + # q extrapolation (below 0.15 and above 0.9) method. We can choose from ['flat', 'linear', 'plaw2', 'nolowq'] + "q_low_extrapolation_method": "linear", + "q_high_extrapolation_method": "linear", + } + + def get_Moe_di_Stefano_2017_default_options_description(self): + """ + Function to return the descriptions for all the Moe & diStefano grid options + """ + + return { + "resolutions": "", + "ranges": "", + "Mmin": "Minimum stellar mass", + "multiplicity_model": """ + multiplicity model (as a function of log10M1) + + You can use 'Poisson' which uses the system multiplicity + given by Moe and maps this to single/binary/triple/quad + fractions. + + Alternatively, 'data' takes the fractions directly + from the data, but then triples and quadruples are + combined (and there are NO quadruples). + """, + "multiplicity_modulator": """ + [single, binary, triple, quadruple] + + e.g. [1,0,0,0] for single stars only + [0,1,0,0] for binary stars only + + defaults to [1,1,0,0] i.e. singles and binaries + """, + "normalize_multiplicities": """ + 'norm': normalise so the whole population is 1.0 + after implementing the appropriate fractions + S/(S+B+T+Q), B/(S+B+T+Q), T/(S+B+T+Q), Q/(S+B+T+Q) + given a mix of multiplicities, you can either (noting that + here (S,B,T,Q) = appropriate modulator * model(S,B,T,Q) ) + note: if you only set one multiplicity_modulator + to 1, and all the others to 0, then normalising + will mean that you effectively have the same number + of stars as single, binary, triple or quad (whichever + is non-zero) i.e. the multiplicity fraction is ignored. + This is probably not useful except for + testing purposes or comparing to old grids. + + 'raw' : stick to what is predicted, i.e. + S/(S+B+T+Q), B/(S+B+T+Q), T/(S+B+T+Q), Q/(S+B+T+Q) + without normalisation + (in which case the total probability < 1.0 unless + all you use single, binary, triple and quadruple) + + 'merge' : e.g. if you only have single and binary, + add the triples and quadruples to the binaries, so + binaries represent all multiple systems + ... + *** this is canonical binary population synthesis *** + + It only takes the maximum multiplicity into account, + i.e. it doesn't multiply the resulting array by the multiplicity modulator again. + This prevents the resulting array to always be 1 if only 1 multiplicity modulator element is nonzero + + Note: if multiplicity_modulator == [1,1,1,1]. this option does nothing (equivalent to 'raw'). + """, + "q_low_extrapolation_method": """ + q extrapolation (below 0.15) method + none + flat + linear2 + plaw2 + nolowq + """, + "q_high_extrapolation_method": "Same as q_low_extrapolation_method", + } diff --git a/binarycpython/utils/analytics.py b/binarycpython/utils/analytics.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8e1b8676a898828b2382b8c6be354aea4b16cc --- /dev/null +++ b/binarycpython/utils/analytics.py @@ -0,0 +1,113 @@ +""" +The class extension for the population object that contains analytics functionality +""" + +# pylint: disable=E1101 + +import time + +class analytics: + """ + Extension for the Population class containing the functions for analytics + """ + + def __init__(self, **kwargs): + return + + ####################### + # time used functions + ####################### + + def make_analytics_dict(self): + """ + Function to create the analytics dictionary + """ + + print("Do analytics") + + analytics_dict = {} + + if self.grid_options["do_analytics"]: + # Put all interesting stuff in a variable and output that afterwards, as analytics of the run. + analytics_dict = { + "population_id": self.grid_options["_population_id"], + "evolution_type": self.grid_options["evolution_type"], + "failed_count": self.grid_options["_failed_count"], + "failed_prob": self.grid_options["_failed_prob"], + "failed_systems_error_codes": self.grid_options[ + "_failed_systems_error_codes" + ].copy(), + "errors_exceeded": self.grid_options["_errors_exceeded"], + "errors_found": self.grid_options["_errors_found"], + "total_probability": self.grid_options["_probtot"], + "total_count": self.grid_options["_count"], + "start_timestamp": self.grid_options["_start_time_evolution"], + "end_timestamp": self.grid_options["_end_time_evolution"], + "time_elapsed": self.time_elapsed(), + "total_mass_run": self.grid_options["_total_mass_run"], + "total_probability_weighted_mass_run": self.grid_options[ + "_total_probability_weighted_mass_run" + ], + "zero_prob_stars_skipped": self.grid_options[ + "_zero_prob_stars_skipped" + ], + } + + if "metadata" in self.grid_ensemble_results: + # Add analytics dict to the metadata too: + self.grid_ensemble_results["metadata"].update(analytics_dict) + self.add_system_metadata() + else: + # use existing analytics dict + analytics_dict = self.grid_ensemble_results.get("metadata", {}) + + return analytics_dict + + def set_time(self, when): + """ + Function to set the timestamp at when, where when is 'start' or 'end'. + + If when == end, we also calculate the time elapsed. + """ + self.grid_options["_" + when + "_time_evolution"] = time.time() + if when == "end": + self.grid_options["_time_elapsed"] = self.time_elapsed(force=True) + + def time_elapsed(self, force=False): + """ + Function to return how long a population object has been running. + + We return the cached value if it's available, and calculate + the time elapsed if otherwise or if force is True + """ + for x in ["_start_time_evolution", "_end_time_evolution"]: + if not self.grid_options[x]: + self.grid_options[x] = time.time() + print("{} missing : {}".format(x, self.grid_options[x])) + + if force or "_time_elapsed" not in self.grid_options: + self.grid_options["_time_elapsed"] = ( + self.grid_options["_end_time_evolution"] + - self.grid_options["_start_time_evolution"] + ) + print( + "set time elapsed = {} - {} = {}".format( + self.grid_options["_end_time_evolution"], + self.grid_options["_start_time_evolution"], + self.grid_options["_time_elapsed"], + ) + ) + + return self.grid_options["_time_elapsed"] + + def CPU_time(self): + """ + Function to return how much CPU time we've used + """ + dt = self.grid_options["_time_elapsed"] + + ncpus = self.grid_options.get("num_processes", 1) + + print("CPU time : dt={} n={} -> {}".format(dt, ncpus, dt * ncpus)) + + return dt * ncpus diff --git a/binarycpython/utils/cache.py b/binarycpython/utils/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..f515324bb6b53ee9d96924dffc39d49dd0ee2a59 --- /dev/null +++ b/binarycpython/utils/cache.py @@ -0,0 +1,330 @@ +""" +File containing the class extension for the population object that contains cache functionality + +Module containing (e.g. LRU) cache functionality for binary_c-python. + +We use cachetools when possible because this allows us to set up the +cache of the appropriate size for the task in the grid_options dict. +Please see the LRU_* options in there. +""" + +# pylint: disable=E1101 + +import os +import time +import tempfile +import importlib +import getpass +import contextlib + +import cachetools + + +class cache: + """ + Class extension for the population object that contains cache functionality + """ + + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + def default_cache_dir(self): + """ + Return a default cache directory path for binary_c-python, or None if we cannot find one. This is used in grid_options_defaults.py + """ + error_string = "__*ERR*__" # string that cannot be a path + for path in [ + os.path.join(os.environ.get("HOME", error_string), ".cache"), + os.path.join(os.environ.get("TMP", error_string), "cache"), + os.path.join("var", "tmp", getpass.getuser(), "cache"), + ]: + if not error_string in path and os.path.isdir(path): + return os.path.join(path, "binary_c") + return None + + class NullCache(cachetools.Cache): + """ + A cachetools cache object that does as little as possible and never matches. + """ + + def __init__(self, *args, **kwargs): + return None + + def popitem(self): + return # do nothing + + def __getitem__(self, key): + return self.__missing__(key) + + def __setitem__(self, key, value): + return + + def __delitem__(self, key): + return + + def setup_function_cache(self, vb=False, cachetype=None): + """ + Function to wrap binary_c-python's functions in function cache. + + The functions listed in self.grid_options['function_cache_functions'] are + given caches of size self.grid_options['function_cache_size'][func] + + Args: None + """ + + # add our custom NullCache to the cachetools selection + setattr(cachetools, "NullCache", self.NullCache) + + if not self.grid_options["function_cache"]: + # no function cache: set all to NullCache + # TODO: This cachetype(Nullcache) is wrong. + for func in self.grid_options["function_cache_functions"].keys(): + self.function_cache[func] = cachetype(NullCache) + + for func in self.grid_options["function_cache_functions"].keys(): + (maxsize, cachetype, testargs) = self.grid_options[ + "function_cache_functions" + ].get(func) + + # which cache should we use? + if cachetype: + # use type passed in, if given + usecachetype = cachetype + elif not self.grid_options["function_cache"]: + # function cache is disabled, use NoCache + usecachetype = "NoCache" + else: + if cachetype is None: + # use the default type + usecachetype = self.grid_options["function_cache_default_type"] + else: + # use type passed in + usecachetype = cachetype + + if vb: + print( + "Setup cache for func {func} : maxsize={maxsize}, cachetype={cachetype}, testargs={testargs}-> use {usecachetype}".format( + func=func, + maxsize=maxsize, + cachetype=cachetype, + testargs=testargs, + usecachetype=usecachetype, + ) + ) + + if usecachetype == "TTLCache": + extra_cacheargs = [self.grid_options["function_cache_TTL"]] + else: + extra_cacheargs = [] + + # detect if the function is already wrapped + x = func.split(".") + modulename = "binarycpython.utils." + x[0] + _ = importlib.import_module(modulename) + _method = eval("module.{}.{}".format(x[0], x[1])) + _wrapped = getattr(_method, "__wrapped__", False) + + # if function is wrapped... + if _wrapped and id(_method) != id(_wrapped): + # save the wrapped function (this calls the cache) + if func not in self.cached_function_cache: + self.cached_function_cache[func] = _method + self.original_function_cache[func] = _wrapped + + if usecachetype == "NoCache": + # unwrap if we're after NoCache + _code = "module.{}.{} = _wrapped".format(x[0], x[1]) + exec(_code) + else: + # function isn't wrapped, which means it was previously + # unwrapped, so rewrap it if not using NoCache + if usecachetype != "NoCache" and func in self.cached_function_cache: + _code = 'module.{}.{} = self.cached_function_cache["{}"]'.format( + x[0], x[1], func + ) + exec(_code) + + # check we're not still wrapped + _method = eval("module" + "." + x[0] + "." + x[1]) + _wrapped = getattr(_method, "__wrapped__", False) + + # if NoCache (explicity use no cache), just use NullCache + # (it's never actually set) + if usecachetype == "NoCache": + cachetools_func = getattr(cachetools, "NullCache") + else: + cachetools_func = getattr(cachetools, usecachetype) + + if maxsize == 0: + maxsize = self.grid_options["function_cache_default_maxsize"] + + if vb: + print( + "Make function cache for func {func}, maxsize {maxsize}".format( + func=func, maxsize=maxsize + ) + ) + + # set up cache function args + if maxsize is None: + args = [2] + else: + args = [maxsize] + args += extra_cacheargs + + # clear any existing cache + if func in self.caches: + try: + self.caches[func].cache_clear() + except: + pass + del self.caches[func] + + # set up new cache using the appropriate cachetools function + if usecachetype != "NoCache": + self.caches[func] = cachetools_func(*args) + + def test_caches(self, dt=5.0): + """ + Function to test cache speeds of the functions that binary_c-python automatically caches. + + Args: + dt (default 5) in seconds the length of each test. Long is more accurate, but takes longer. + """ + + # loop lists + cachetypes = ("NoCache", "NullCache", "FIFOCache", "LRUCache", "TTLCache") + functions = self.grid_options["function_cache_functions"].keys() + maxsizes = (0, 1, 2, 4, 8, 16, 32, 64, 128, 256) + + self.grid_options["function_cache"] = True + for n, func in enumerate(functions): + print("Cache speed test of function {func}".format(func=func)) + print("{:18s}".format(""), end="") + for x, maxsize in enumerate(maxsizes): + print("{:>9s}".format(str(maxsize)), end="") + print("") + + best = 0 + best_type = None + best_maxsize = None + for y, type in enumerate(cachetypes): + print("{:18s}".format(type), end="") + self.grid_options["function_cache_default_type"] = type + self.setup_function_cache() + (maxsize, cachetype, testargs) = self.grid_options[ + "function_cache_functions" + ].get(func) + + x = func.split(".") + modulename = "binarycpython.utils." + x[0] + module = importlib.import_module(modulename) + _method = eval("module.{}.{}".format(x[0], x[1])) + if testargs: + + def _func_wrap(*args, **kwargs): + return (args, kwargs) + + args, kwargs = eval("_func_wrap({})".format(testargs)) + for x, maxsize in enumerate(maxsizes): + if type == "NoCache" and maxsize > 0: + continue + + # redirect stdout to prevent lots of output + with contextlib.redirect_stdout(None): + + # loop for dt seconds + tfin = dt + time.time() + count = 0 + try: + while time.time() < tfin: + _method(self, *args, **kwargs) + count += 1 + # TODO: specify the exception + except Exception as e: + print("Cache call failed:", e) + self.exit(1) + + if count < 99999: + print("{:9d}".format(count), end="") + else: + print("{:9.2e}".format(float(count)), end="") + + if count > best: + best = count + best_type = type + best_maxsize = maxsize + print("") + + print( + "Best cache type {type} with maxsize {maxsize}\n".format( + type=best_type, maxsize=best_maxsize + ) + ) + + + """ +Cache speed test of function distribution_functions.powerlaw_constant + 0 1 2 4 8 16 32 64 128 256 +NoCache 6.28e+07 +NullCache 6.39e+07 6.40e+07 6.41e+07 6.39e+07 6.44e+07 6.43e+07 6.37e+07 6.40e+07 6.38e+07 6.40e+07 +FIFOCache 6.41e+07 6.37e+07 6.40e+07 6.39e+07 6.40e+07 6.37e+07 6.41e+07 6.40e+07 6.41e+07 6.40e+07 +LRUCache 6.42e+07 6.41e+07 6.42e+07 6.41e+07 6.38e+07 6.43e+07 6.41e+07 6.43e+07 6.40e+07 6.41e+07 +TTLCache 6.41e+07 6.35e+07 6.37e+07 6.39e+07 6.37e+07 6.42e+07 6.39e+07 6.38e+07 6.37e+07 6.38e+07 +Best cache type NullCache with maxsize 8 + +Cache speed test of function distribution_functions.calculate_constants_three_part_powerlaw + 0 1 2 4 8 16 32 64 128 256 +NoCache 1.44e+07 +NullCache 9.13e+06 9.18e+06 9.20e+06 9.21e+06 9.20e+06 9.12e+06 9.18e+06 9.18e+06 9.15e+06 9.12e+06 +FIFOCache 2.53e+07 2.52e+07 2.51e+07 2.50e+07 2.51e+07 2.52e+07 2.52e+07 2.52e+07 2.52e+07 2.51e+07 +LRUCache 1.62e+07 1.62e+07 1.62e+07 1.62e+07 1.62e+07 1.62e+07 1.62e+07 1.62e+07 1.62e+07 1.62e+07 +TTLCache 1.43e+07 1.43e+07 1.43e+07 1.43e+07 1.43e+07 1.44e+07 1.42e+07 1.43e+07 1.43e+07 1.43e+07 +Best cache type FIFOCache with maxsize 0 + +Cache speed test of function distribution_functions.gaussian_normalizing_const + 0 1 2 4 8 16 32 64 128 256 +NoCache 64183 +NullCache 64340 64339 64544 64260 64491 64382 64400 63974 63954 64338 +FIFOCache 2.62e+07 2.62e+07 2.62e+07 2.61e+07 2.61e+07 2.59e+07 2.61e+07 2.59e+07 2.57e+07 2.59e+07 +LRUCache 1.66e+07 1.66e+07 1.65e+07 1.66e+07 1.65e+07 1.65e+07 1.64e+07 1.65e+07 1.64e+07 1.65e+07 +TTLCache 1.42e+07 1.44e+07 1.42e+07 1.44e+07 1.43e+07 1.43e+07 1.42e+07 1.44e+07 1.42e+07 1.44e+07 +Best cache type FIFOCache with maxsize 1 + +Cache speed test of function spacing_functions.const_linear + 0 1 2 4 8 16 32 64 128 256 +NoCache 1.22e+06 +NullCache 1.05e+06 1.05e+06 1.06e+06 1.05e+06 1.05e+06 1.06e+06 1.05e+06 1.05e+06 1.05e+06 1.05e+06 +FIFOCache 2.85e+07 2.85e+07 2.86e+07 2.85e+07 2.84e+07 2.85e+07 2.84e+07 2.84e+07 2.85e+07 2.81e+07 +LRUCache 1.77e+07 1.79e+07 1.73e+07 1.73e+07 1.76e+07 1.79e+07 1.76e+07 1.74e+07 1.74e+07 1.72e+07 +TTLCache 1.46e+07 1.49e+07 1.50e+07 1.53e+07 1.51e+07 1.53e+07 1.52e+07 1.51e+07 1.47e+07 1.50e+07 +Best cache type FIFOCache with maxsize 2 + +Cache speed test of function spacing_functions.const_int + 0 1 2 4 8 16 32 64 128 256 +NoCache 4.23e+07 +NullCache 1.65e+07 1.66e+07 1.65e+07 1.64e+07 1.66e+07 1.65e+07 1.59e+07 1.59e+07 1.65e+07 1.64e+07 +FIFOCache 2.86e+07 2.86e+07 2.87e+07 2.86e+07 2.84e+07 2.86e+07 2.81e+07 2.79e+07 2.78e+07 2.85e+07 +LRUCache 1.78e+07 1.78e+07 1.77e+07 1.75e+07 1.77e+07 1.78e+07 1.78e+07 1.78e+07 1.74e+07 1.75e+07 +TTLCache 1.55e+07 1.54e+07 1.55e+07 1.54e+07 1.55e+07 1.49e+07 1.52e+07 1.51e+07 1.52e+07 1.54e+07 +Best cache type NoCache with maxsize 0 + +Cache speed test of function spacing_functions.const_ranges + 0 1 2 4 8 16 32 64 128 256 +NoCache 2.54e+05 +NullCache 2.25e+05 2.25e+05 2.24e+05 2.25e+05 2.25e+05 2.25e+05 2.25e+05 2.26e+05 2.25e+05 2.26e+05 +FIFOCache 2.58e+07 2.55e+07 2.53e+07 2.54e+07 2.56e+07 2.57e+07 2.56e+07 2.57e+07 2.58e+07 2.58e+07 +LRUCache 1.62e+07 1.63e+07 1.62e+07 1.62e+07 1.61e+07 1.62e+07 1.62e+07 1.62e+07 1.61e+07 1.63e+07 +TTLCache 1.41e+07 1.43e+07 1.42e+07 1.42e+07 1.40e+07 1.42e+07 1.42e+07 1.43e+07 1.40e+07 1.43e+07 +Best cache type FIFOCache with maxsize 128 + +Cache speed test of function spacing_functions.gaussian_zoom + 0 1 2 4 8 16 32 64 128 256 +NoCache 24703 +NullCache 24872 24935 24927 24896 24968 24964 24882 24840 24873 24913 +FIFOCache 2.54e+07 2.54e+07 2.54e+07 2.54e+07 2.53e+07 2.52e+07 2.53e+07 2.51e+07 2.52e+07 2.52e+07 +LRUCache 1.63e+07 1.63e+07 1.63e+07 1.64e+07 1.63e+07 1.64e+07 1.63e+07 1.63e+07 1.63e+07 1.63e+07 +TTLCache 1.43e+07 1.43e+07 1.42e+07 1.42e+07 1.43e+07 1.42e+07 1.43e+07 1.43e+07 1.43e+07 1.43e+07 +Best cache type FIFOCache with maxsize 0 + """ diff --git a/binarycpython/utils/condor.py b/binarycpython/utils/condor.py new file mode 100644 index 0000000000000000000000000000000000000000..b6746b7f8ae35b3b8b13baa19d7bb90c24972e96 --- /dev/null +++ b/binarycpython/utils/condor.py @@ -0,0 +1,550 @@ +""" +The class extension for the population object that contains the Condor functionality + +TODO: there are many uses of $<variable name> in this file but this is not perl and we should replace them by actual format placeholders +""" + +# pylint: disable=E1101 + +import os +import re +import sys +import stat +import time +import subprocess + +import pathlib + +import datasize +import lib_programname + +class condor: + """ + Extension for the Population class containing the code for Condor grid runs + """ + + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + def condorID(self, ClusterID=None, Process=None): + """ + Function to return a Condor job ID. The ClusterID and Process passed in are used if given, otherwise we default to the condor_ClusterID and condor_Process in grid_options. + """ + if ClusterID is None: + ClusterID = self.grid_options["condor_ClusterID"] + if Process is None: + Process = self.grid_options["condor_Process"] + return "{ClusterID}.{Process}".format(ClusterID=ClusterID, Process=Process) + + def condorpath(self, path, condor_dir=None): + """ + Function to return the full condor directory path. + """ + + if condor_dir is None: + condor_dir = self.grid_options["condor_dir"] + return os.path.abspath(os.path.join(condor_dir, path)) + + def condor_status_file(self, ClusterID=None, Process=None, condor_dir=None): + """ + Return the condor status file corresponding to the ClusterID and Process, which default to grid_options condor_ClusterID and condor_Process, respectively. + """ + return os.path.join( + self.condorpath("status", condor_dir=condor_dir), self.condorID(ClusterID, Process) + ) + + def condor_check_requirements(self): + """ + Function to check whether the condor parameters in grid_options have been set appropriately. + """ + if self.grid_options["condor"] > 0 and self.grid_options["condor_dir"] is None: + return ( + False, + "You have set condor={condor} but not set condor_dir (which is {condor_dir}). Please set it and try again.".format( + condor=self.grid_options["condor"], + condor_dir=self.grid_options["condor_dir"], + ), + ) + return (True, "") + + def condor_dirs(self): + """ + Directories associated specifically with this condor job. + """ + + return ["condor_dir"] + + def set_condor_status(self, string, condor_dir=None): + """ + Set the condor status corresponing to the self object, which should have condor_ClusterID and condor_Process set. + + Args: + string : the status string to be set + dir : the directory in which the status directory is held. If not set, this defaults to the HPC directory (e.g. slurm_dir or condor_dir). + """ + # save condor ClusterID to file + + if condor_dir is None: + condor_dir = self.grid_options["condor_dir"] + + idfile = os.path.join(condor_dir, "ClusterID") + if not os.path.exists(idfile): + with self.open(idfile, "w", encoding="utf-8") as fClusterID: + fClusterID.write( + "{ClusterID}\n".format( + ClusterID=self.grid_options["condor_ClusterID"] + ) + ) + fClusterID.close() + self.NFS_flush_hack(idfile) + + # save condor status + file = self.condor_status_file(condor_dir=condor_dir) + if file: + with self.open(file, "w", encoding="utf-8") as f: + f.write(string) + f.close() + self.NFS_flush_hack(file) + + def get_condor_status(self, ClusterID=None, Process=None, condor_dir=None): + """ + Get and return the condor status corresponing to the self object, or ClusterID.Process if they are passed in. If no status is found, returns an empty string.. + """ + if ClusterID is None: + ClusterID = self.grid_options["condor_ClusterID"] + if Process is None: + Process = self.grid_options["condor_Process"] + if ClusterID is None or Process is None: + return None + # print("get_condor_status {}.{}".format(ClusterID,Process)) + + try: + path = pathlib.Path( + self.condor_status_file(condor_dir=condor_dir, ClusterID=ClusterID, Process=Process) + ) + # print("path={}".format(path)) + # print("size={}".format(path.stat().st_size)) + if path: + s = path.read_text().strip() + # print("contains {}".format(s)) + return s + # print("path empty") + return "" + + # NOTE: What is the actual exception that can occur here? + # TODO: We should specify that exception + except: + # print("read failed") + return "" + + def condor_outfile(self, condor_dir=None): + """ + return a standard filename for the condor chunk files + """ + file = "{id}.gz".format(id=self.condorID()) + if condor_dir is None: + condor_dir = self.grid_options["condor_dir"] + return os.path.abspath(os.path.join(condor_dir, "results", file)) + + def make_condor_dirs(self, condor_dir=None): + """ + Function to make the condor directories + """ + + # make the condor directories + if condor_dir is None: + condor_dir = self.grid_options["condor_dir"] + if not condor_dir: + print( + "You must set self.grid_options['condor_dir'] (or pass condor_dir=whatever to make_condor_dirs()) to a directory which we can use to set up binary_c-python's Condor files. This should be unique to your set of grids." + ) + os.exit() + + # make a list of directories, these contain the various condor + # output, status files, etc. + dirs = [] + for d in ["stdout", "stderr", "log", "results", "status", "snapshots"]: + dirs.append(self.condorpath(d, condor_dir=condor_dir)) + + # make the directories: we do not allow these to already exist + # as the condor directory should be a fresh location for each set of jobs + for d in dirs: + try: + pathlib.Path(self.condorpath(d, condor_dir=condor_dir)).mkdir( + exist_ok=False, parents=True + ) + # TODO: specify the actual exception + # TODO: is this try-except necessary? Especially having the code fail here, instead of earlier, if the directories exist already. Otherwise we can also just do exist_ok=True? + except: + print( + "Tried to make the directory {d} but it already exists. When you launch a set of binary_c jobs on Condor, you need to set your condor_dir to be a fresh directory with no contents.".format( + d=d + ) + ) + self.exit(code=1) + + # check that they have been made and exist: we need this + # because on network mounts (NFS) there's often a delay between the mkdir + # above and the actual directory being made. This shouldn't be too long... + fail = True + count = 0 + count_warn = 10 + while fail is True: + fail = False + count += 1 + if count > count_warn: + print( + "Warning: Have been waiting about {} seconds for Condor directories to be made, there seems to be significant delay...".format( + count + ) + ) + for d in dirs: + if os.path.isdir(d) is False: + fail = True + time.sleep(1) + break + + def condor_grid(self): + """ + function to be called when running grids when grid_options['condor']>=1 + + if grid_options['condor']==1, we set up the condor script and launch the jobs, then return True to exit. + if grid_options['condor']==2, we run the stars, which means we return False to continue. + if grid_options['condor']==3, we are being called from the jobs to run the grids, return False to continue. + """ + + if self.grid_options["condor"] == 3: + # joining : set the evolution type to "join" + # + # return False to continue + self.grid_options["evolution_type"] = "join" + return False + + if self.grid_options["condor"] == 2: + # run a grid of stars only, leaving the results + # in the appropriate outfile + # + # return False to actually run the stars + self.grid_options["evolution_type"] = "grid" + return False + + if self.grid_options["condor"] == 1: + # if condor=1, we should have no evolution type, we + # set up the Condor scripts and get them evolving + # in a Condor queue + self.grid_options["evolution_type"] = None + + # make dirs + self.make_condor_dirs() + + # check we're not using too much RAM + if datasize.DataSize( + self.grid_options["condor_memory"] + ) > datasize.DataSize(self.grid_options["condor_warn_max_memory"]): + print( + "WARNING: you want to use {} MB of RAM : this is unlikely to be correct. If you believe it is, set condor_warn_max_memory to something very large (it is currently {} MB)\n".format( + self.grid_options["condor_memory"], + self.grid_options["condor_warn_max_memory"], + ) + ) + self.exit(code=1) + + # get job id (might be passed in) + ClusterID = ( + self.grid_options["condor_ClusterID"] + if self.grid_options["condor_ClusterID"] != "" + else "$ClusterID" + ) + + # get job array index + Process = ( + self.grid_options["condor_Process"] + if self.grid_options["condor_Process"] != "" + else "$Process" + ) + + if self.grid_options["condor_njobs"] == 0: + print( + "binary_c-python Condor : You must set grid_option condor_njobs to be non-zero" + ) + self.exit(code=1) + + # find the path to the Python script that we are running + pyscriptpath = str(lib_programname.get_path_executed_script()) + + # set the condor initial dir to be our current working directory + if not self.grid_options["condor_initial_dir"]: + self.grid_options["condor_initial_dir"] = os.getcwd() + + # build the grid command + grid_command = ( + [ + str(self.grid_options["condor_env"]), + sys.executable, + pyscriptpath, + ] + + sys.argv[1:] + + [ + "start_at=$Process", # Process is 0,1,2... which is what we want + "modulo=" + str(self.grid_options["condor_njobs"]), + "condor_njobs=" + str(self.grid_options["condor_njobs"]), + "condor_dir=" + self.grid_options["condor_dir"], + "verbosity=" + str(self.grid_options["verbosity"]), + "num_cores=" + str(self.grid_options["num_processes"]), + ] + ) + + grid_command = " ".join(grid_command) + + # make condor script paths + submit_script_path = self.condorpath("condor_submit_script") + job_script_path = self.condorpath("condor_job_script") + + # open the files + try: + submit_script = self.open(submit_script_path, "w", encoding="utf-8") + except IOError: + print( + "Could not open Condor script at {path} for writing: please check you have set {condor_dir} correctly (it is currently {condor_dir} and can write to this directory.".format( + path=submit_script_path, + condor_dir=self.grid_options["condor_dir"], + ) + ) + try: + job_script = self.open(job_script_path, "w", encoding="utf-8") + except IOError: + print( + "Could not open Condor script at {path} for writing: please check you have set {condor_dir} correctly (it is currently {condor_dir} and can write to this directory.".format( + path=job_script_path, condor_dir=self.grid_options["condor_dir"] + ) + ) + + ############################################################ + # The condor job script calls your binary_c-pthyon script + ############################################################ + condor_job_script = """#!{bash} +echo "Condor Job Args: $@" + +# first two arguments are ClusterID and Process +export ClusterID=$1 +export Process=$2 +shift 2 + +echo "Job ClusterID $ClusterID Process $Process" + +# Set binary_c startup conditions +export BINARY_C_PYTHON_ORIGINAL_CMD_LINE={cmdline} +export BINARY_C_PYTHON_ORIGINAL_WD=`{pwd}` +export BINARY_C_PYTHON_ORIGINAL_SUBMISSION_TIME=`{date}` + +# set status to \"running\" +echo \"running\" > {condor_dir}/status/$ClusterID.$ProcessID + +# make list of files which is checked for joining +# echo {condor_dir}/results/$ClusterID.$Process.gz >> {condor_dir}/results/$ClusterID.all + +# run grid of stars and, if this returns 0, set status to finished +{grid_command} condor=2 evolution_type=grid condor_ClusterID=$ClusterID condor_Process=$Process save_population_object={condor_dir}/results/$ClusterID.$Process.gz && echo -n \"finished\" > {condor_dir}/status/$ClusterID.$ProcessID && echo """.format( + bash=self.grid_options["condor_bash"], + date=self.grid_options["condor_date"], + pwd=self.grid_options["condor_pwd"], + cmdline=repr(self.grid_options["command_line"]), + grid_command=grid_command, + condor_dir=self.grid_options["condor_dir"], + ) + + if not self.grid_options["condor_postpone_join"]: + joinfile = "{condor_dir}/results/{ClusterID}.all".format( + condor_dir=self.grid_options["condor_dir"], ClusterID=ClusterID + ) + condor_job_script += """&& echo \"Checking if we can join...\" && echo && {grid_command} condor=3 evolution_type=join joinlist={joinfile} condor_ClusterID=$ClusterID condor_Process=$Process + """.format( + bash=self.grid_options["condor_bash"], + grid_command=grid_command, + joinfile=joinfile, + ) + + ############################################################ + # The Condor submit script is sent to condor_submit + # In here we know $(Cluster) and $(Process) which identify + # each job + ############################################################ + extra_settings = "" + if self.grid_options["condor_extra_settings"]: + for key in self.grid_options["condor_extra_settings"]: + extra_settings += "{key} = {value}\n".format( + key=key, + value=self.grid_options["condor_extra_settings"][key], + ) + + jobid = "$(Cluster).$(Process)" + condor_submit_script = """ +executable = {usr_bin_env} +arguments = {bash} {job_script_path} $(Cluster) $(Process) +universe = {universe} +getenv = {getenv} +initial_dir = {initial_dir} +output = {outfile} +error = {errfile} +log = {logfile} +stream_output = {stream_output} +stream_error = {stream_error} +request_memory = {request_memory} +request_cpus = {request_cpus} +should_transfer_files = {should_transfer_files} +when_to_transfer_output = {when_to_transfer_output} +requirements = {requirements} +JobBatchName = {batchname} +kill_sig = {kill_sig} +{extra_settings} +queue {njobs} + """.format( + usr_bin_env=self.grid_options["condor_env"], + bash=self.grid_options["condor_bash"], + job_script_path=job_script_path, + universe=self.grid_options["condor_universe"], + getenv=self.grid_options["condor_getenv"], + initial_dir=self.grid_options["condor_initial_dir"], + outfile=os.path.abspath( + os.path.join(self.grid_options["condor_dir"], "stdout", jobid) + ), + errfile=os.path.abspath( + os.path.join(self.grid_options["condor_dir"], "stderr", jobid) + ), + logfile=os.path.abspath( + os.path.join(self.grid_options["condor_dir"], "log", jobid) + ), + stream_output=self.grid_options["condor_stream_output"], + stream_error=self.grid_options["condor_stream_error"], + request_memory=self.grid_options["condor_memory"], + request_cpus=self.grid_options["num_processes"], + should_transfer_files=self.grid_options[ + "condor_should_transfer_files" + ], + when_to_transfer_output=self.grid_options[ + "condor_when_to_transfer_output" + ], + requirements=self.grid_options["condor_requirements"], + batchname=self.grid_options["condor_batchname"], + kill_sig=self.grid_options["condor_kill_sig"], + extra_settings=extra_settings, + njobs=self.grid_options["condor_njobs"], + ) + + # write the scripts, close them and make them executable by + # all (so the condor user can pick it up) + for file, contents in [ + (submit_script, condor_submit_script), + (job_script, condor_job_script), + ]: + path = file.name + file.writelines(contents) + file.close() + os.chmod( + path, + stat.S_IREAD + | stat.S_IWRITE + | stat.S_IEXEC + | stat.S_IRGRP + | stat.S_IXGRP + | stat.S_IROTH + | stat.S_IXOTH, + ) + + if not self.grid_options["condor_postpone_submit"]: + # call sbatch to launch the condor jobs + cmd = [self.grid_options["condor_submit"], submit_script_path] + with subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) as pipes: + std_out, std_err = pipes.communicate() + + if pipes.returncode != 0: + # an error happened! + err_msg = "{red}{err}\nReturn Code: {code}{reset}".format( + err=std_err.strip(), + code=pipes.returncode, + red=self.ANSI_colours["red"], + reset=self.ANSI_colours["reset"], + ) + raise Exception(err_msg) + + + if len(std_err) > 0: + print( + "{red}{err}{reset}".format( + red=self.ANSI_colours["red"], + reset=self.ANSI_colours["reset"], + err=std_err.strip().decode("utf-8"), + ) + ) + + print( + "{yellow}{out}{reset}".format( + yellow=self.ANSI_colours["yellow"], + reset=self.ANSI_colours["reset"], + out=std_out.strip().decode("utf-8"), + ) + ) + else: + # just say we would have (use this for testing) + print( + "Condor script is at {path} but has not been launched".format( + path=submit_script_path + ) + ) + + # some messages to the user, then return + if self.grid_options["condor_postpone_submit"] == 1: + print( + "Condor script written, to {path}, but launching the jobs with sbatch was postponed.".format( + path=submit_script_path + ) + ) + else: + print("Condor jobs launched.") + print("All done in condor_grid().") + + # return True so we exit immediately + return True + + def condor_queue_stats(self): + """ + Return condor queue statistics for this job + """ + + _id = self.grid_options["condor_ClusterID"] + if not _id: + return None + + cmd = "{} {} 2>&1".format( + "/usr/bin/condor_q", _id # self.grid_options["condor_q"], + ) + print("Q cmd", cmd) + + with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) as subp: + result = subp.stdout.read() + print("Q result ", result) + + if not result: + return None + + d = {} + for x in [ + "jobs", + "completed", + "removed", + "idle", + "running", + "held", + "suspended", + ]: + print("Q x ", x) + m = re.search("(\d+)\s+{}".format(x), result) + print("Q m ", m) + if m: + d[x] = m.group(0) + + print("Q d ", d) + return d diff --git a/binarycpython/utils/custom_logging_functions.py b/binarycpython/utils/custom_logging_functions.py index 12d7632135c03d2081f0f486b73e391dee459c84..7298ce86c853a9583bf61a0f2ab1f64acf95cf66 100644 --- a/binarycpython/utils/custom_logging_functions.py +++ b/binarycpython/utils/custom_logging_functions.py @@ -4,12 +4,14 @@ The functions here make it possible for the user to define binaryc output logs o """ import os +import uuid +import ctypes +import socket import textwrap import subprocess -import socket -import ctypes -import uuid -from typing import Union, Tuple, Optional + +from typing import Tuple, Optional + from binarycpython.utils.functions import temp_dir, remove_file, verbose_print @@ -242,7 +244,7 @@ def return_compilation_dict(verbosity: int = 0) -> dict: else: raise NameError("Envvar BINARY_C doesnt exist") - # TODO: make more options for the compiling + # cc = from_binary_c_config(BINARY_C_CONFIG, "cc") # Check for binary_c @@ -251,7 +253,7 @@ def return_compilation_dict(verbosity: int = 0) -> dict: print("We require binary_c executable; have you built binary_c?") raise NameError("BINARY_C executable doesnt exist") - # TODO: debug + # libbinary_c = "-lbinary_c" binclibs = from_binary_c_config(BINARY_C_CONFIG, "libs") libdirs = "{} -L{}".format( @@ -321,7 +323,6 @@ def compile_shared_lib( """ Function to write the custom logging code to a file and then compile it. - TODO: nicely put in the -fPIC TODO: consider returning a status Args: diff --git a/binarycpython/utils/dataIO.py b/binarycpython/utils/dataIO.py new file mode 100644 index 0000000000000000000000000000000000000000..01d27fed6af07dfed83d8929134ee95962fb5cf6 --- /dev/null +++ b/binarycpython/utils/dataIO.py @@ -0,0 +1,786 @@ +""" +File containing the class extension for the population object that contains data input-output (IO) functions +""" + +# pylint: disable=E1101 + +import os +import bz2 +import gzip +import copy +import json +import time +import datetime +import subprocess +from typing import Union + +import msgpack +import flufl.lock +import compress_pickle + +from binarycpython.utils.ensemble import ( + ensemble_file_type, +) +from binarycpython.utils.dicts import ( + merge_dicts, +) +from binarycpython.utils.functions import ( + verbose_print, +) + + +class dataIO: + """ + Class extension for the population object that contains data input-output (IO) functions + """ + + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + def dir_ok(self, directory): + """ + Function to test if we can read and write to a directory that must exist. Return True if all is ok, False otherwise. + """ + + return os.access(directory, os.F_OK) and os.access(directory, os.R_OK | os.W_OK) + + def save_population_object( + self, population_object=None, filename=None, confirmation=True, compression="gzip" + ): + """ + Save pickled Population object to file at filename or, if filename is None, whatever is set at self.grid_options['save_population_object'] + + Args: + population_object : the object to be saved to the file. If population_object is None, use self. + filename : the name of the file to be saved. If not set, use self.grid_options['save_population_object'] + confirmation : if True, a file "filename.saved" is touched just after the dump, so we know it is finished. TODO: fix this + compression (optional, default = "gzip"): TODO: fix this + + Compression is performed according to the filename, as stated in the + compress_pickle documentation at + https://lucianopaz.github.io/compress_pickle/html/ + + Shared memory, stored in the population_object.shared_memory dict, is not saved. + """ + + if population_object is None: + # default to using self + population_object = self + + if filename is None: + # get filename from self + filename = self.grid_options["save_population_object"] + + if filename: + print( + "Save population {id}, probtot {probtot} to pickle in {filename}".format( + id=self.grid_options["_population_id"], + probtot=population_object.grid_options["_probtot"], + filename=filename, + ) + ) + + # Some parts of the population_object cannot be pickled: + # remove them, and restore them after pickling + + # remove shared memory + shared_memory = population_object.shared_memory + population_object.shared_memory = None + + # delete system generator + system_generator = population_object.grid_options["_system_generator"] + population_object.grid_options["_system_generator"] = None + + # delete _store_memaddr + _store_memaddr = population_object.grid_options["_store_memaddr"] + population_object.grid_options["_store_memaddr"] = None + + # delete persistent_data_memory_dict + persistent_data_memory_dict = population_object.persistent_data_memory_dict + population_object.persistent_data_memory_dict = None + + # add metadata if it doesn't exist + if not "metadata" in population_object.grid_ensemble_results: + population_object.grid_ensemble_results["metadata"] = {} + + # add datestamp + population_object.grid_ensemble_results["metadata"][ + "save_population_time" + ] = self.now() + + # add extra metadata + population_object.add_system_metadata() + + # add max memory use + try: + self.grid_ensemble_results["metadata"][ + "max_memory_use" + ] = copy.deepcopy(sum(shared_memory["max_memory_use_per_thread"])) + except Exception as e: + print("save_population_object : Error: ", e) + raise Exception(e) from e + + # dump pickle file + compress_pickle.dump(population_object, filename, pickler_method="dill") + + # restore data + population_object.shared_memory = shared_memory + population_object.grid_options["_system_generator"] = system_generator + del population_object.grid_ensemble_results["metadata"]["save_population_time"] + population_object.grid_options["store_memaddr"] = _store_memaddr + population_object.persistent_data_memory_dict = persistent_data_memory_dict + + self.NFS_flush_hack(filename) + + # touch 'saved' file + saved = filename + ".saved" + self.HPC_touch(saved) + + def load_population_object(self, filename): + """ + returns the Population object loaded from filename + """ + + self.NFS_flush_hack(filename) + if filename is None: + obj = None + else: + try: + obj = compress_pickle.load(filename, pickler_method="dill") + except Exception as e: + obj = None + print("Loading of the compressed object went wrong: {}".format(e)) + + return obj + + def merge_populations(self, refpop, newpop): + """ + merge newpop's results data into refpop's results data + + Args: + refpop : the original "reference" Population object to be added to + newpop : Population object containing the new data + + Returns: + nothing + + Note: + The file should be saved using save_population_object() + """ + + # combine data + refpop.grid_results = merge_dicts(refpop.grid_results, newpop.grid_results) + + # special cases + maxmem = 0 + if "max_memory_use" in refpop.grid_ensemble_results.get("metadata", {}) and "max_memory_use" in newpop.grid_ensemble_results.get("metadata", {}): + maxmem = max( + refpop.grid_ensemble_results["metadata"]["max_memory_use"], + newpop.grid_ensemble_results["metadata"]["max_memory_use"], + ) + + try: + # special cases: + # copy the settings + settings = None + if "settings" in newpop.grid_ensemble_results.get("metadata", {}): + settings = copy.deepcopy( + newpop.grid_ensemble_results["metadata"]["settings"] + ) + if settings: + refpop.grid_ensemble_results["metadata"]["settings"] = settings + + # Copy the Xinit + Xinit = None + if "Xinit" in newpop.grid_ensemble_results.get("ensemble", {}): + Xinit = copy.deepcopy(newpop.grid_ensemble_results["ensemble"]["Xinit"]) + if Xinit: + refpop.grid_ensemble_results["ensemble"]["Xinit"] = Xinit + + # merge the ensemble dicts + refpop.grid_ensemble_results = merge_dicts( + refpop.grid_ensemble_results, newpop.grid_ensemble_results + ) + + # set special cases + refpop.grid_ensemble_results["metadata"]["max_memory_use"] = maxmem + + except Exception as e: + print("Error merging grid_ensemble_results:", e) + raise Exception(e) from e + + for key in ["_probtot"]: + refpop.grid_options[key] += newpop.grid_options[key] + + refpop.grid_options["_killed"] |= newpop.grid_options["_killed"] + + def merge_populations_from_file(self, refpop, filename): + """ + Wrapper for merge_populations so it can be done directly + from a file. + + Args: + refpop : the original "reference" Population object to be added to + filename : file containing the Population object containing the new data + + Note: + The file should be saved using save_population_object() + """ + + mtime = time.localtime(os.path.getmtime(filename)) + modtime = time.strftime("%a, %d %b %Y %H:%M:%S", mtime) + print( + "Load data from {filename} : size {size}, modtime {modtime}".format( + filename=filename, + size=os.path.getsize(filename), + modtime=modtime, + ) + ) + + newpop = self.load_population_object(filename) + + if 'total_count' in newpop.grid_options: + n = newpop.grid_options["total_count"] + elif "_count" in newpop.grid_options: + n = newpop.grid_options["_count"] + elif 'metadata' in newpop.grid_ensemble_results and "_count" in newpop.grid_ensemble_results["metadata"]: + n = newpop.grid_ensemble_results["metadata"]["_count"] + else: + n = -1 + print("Loaded data from {n} stars".format(n=n)) + + # merge with refpop + self.merge_populations(refpop, newpop) + + def snapshot_filename(self): + """ + Automatically choose the snapshot filename. + """ + if self.HPC_job(): + return self.HPC_snapshot_filename() + + file = os.path.join(self.grid_options["tmp_dir"], "snapshot.gz") + return file + + def load_snapshot(self, file): + """ + Load a snapshot from file and set it in the preloaded_population placeholder. + """ + newpop = self.load_population_object(file) + + # unset the _killed flag, in case it was set + newpop.grid_options["_killed"] = False + + # set in preloaded_population for later merge + self.preloaded_population = newpop + + # set the start position for new stars + self.grid_options["start_at"] = newpop.grid_options["start_at"] + + print( + "Loaded from snapshot at {file} : {nstars} stars, start at star {nstart}".format( + file=file, + nstars=0, # self.grid_options[''], + nstart=self.grid_options["start_at"], + ) + ) + return + + def save_snapshot(self, file=None): + """ + Save the population object to a snapshot file, automatically choosing the filename if none is given. + """ + if file is None: + file = self.snapshot_filename() + + if "_count" in self.grid_options: + n = self.grid_options["_count"] + else: + n = "?" + + print("Saving snapshot containing {} stars to {}".format(n, file)) + self.save_population_object(object=self, filename=file) + + def write_ensemble( + self, + output_file, + data=None, + sort_keys=True, + indent=4, + encoding="utf-8", + ensure_ascii=False, + ): + """ + write_ensemble : Write ensemble results to a file. + + Args: + output_file : the output filename. + + If the filename has an extension that we recognise, + e.g. .gz or .bz2, we compress the output appropriately. + + The filename should contain .json or .msgpack, the two + currently-supported formats. + + Usually you'll want to output to JSON, but we can + also output to msgpack. + + data : the data dictionary to be converted and written to the file. + If not set, this defaults to self.grid_ensemble_results. + + sort_keys : if True, and output is to JSON, the keys will be sorted. + (default: True, passed to json.dumps) + + indent : number of space characters used in the JSON indent. (Default: 4, + passed to json.dumps) + + encoding : file encoding method, usually defaults to 'utf-8' + + ensure_ascii : the ensure_ascii flag passed to json.dump and/or json.dumps + (Default: False) + """ + + # get the file type + file_type = ensemble_file_type(output_file) + + # default to using grid_ensemble_results if no data is given + if data is None: + data = self.grid_ensemble_results + + if not file_type: + print( + "Unable to determine file type from ensemble filename {} : it should be .json or .msgpack.".format(output_file) + ) + self.exit(code=1) + else: + f = self.open(output_file, "wt", encoding=encoding) + if file_type == "JSON": + # JSON output + f.write( + json.dumps( + data, + sort_keys=sort_keys, + indent=indent, + ensure_ascii=ensure_ascii, + ) + ) + elif file_type == "msgpack": + # msgpack output + msgpack.dump(data, f) + f.close() + + print( + "Thread {thread}: Wrote ensemble results to file: {colour}{file}{reset} (file type {file_type})".format( + thread=self.process_ID, + file=output_file, + colour=self.ANSI_colours["green"], + reset=self.ANSI_colours["reset"], + file_type=file_type, + ) + ) + + def write_binary_c_calls_to_file( + self, + output_dir: Union[str, None] = None, + output_filename: Union[str, None] = None, + include_defaults: bool = False, + encoding="utf-8", + ) -> None: + """ + Function that loops over the grid code and writes the generated parameters to a file. + In the form of a command line call + + Only useful when you have a variable grid as system_generator. MC wouldn't be that useful + + Also, make sure that in this export there are the basic parameters + like m1,m2,sep, orb-per, ecc, probability etc. + + On default this will write to the datadir, if it exists + + Args: + output_dir: (optional, default = None) directory where to write the file to. If custom_options['data_dir'] is present, then that one will be used first, and then the output_dir + output_filename: (optional, default = None) filename of the output. If not set it will be called "binary_c_calls.txt" + include_defaults: (optional, default = None) whether to include the defaults of binary_c in the lines that are written. Beware that this will result in very long lines, and it might be better to just export the binary_c defaults and keep them in a separate file. + + Returns: + filename: filename that was used to write the calls to + """ + + # Check if there is no compiled grid yet. If not, lets try to build it first. + if not self.grid_options["_system_generator"]: + + ## check the settings: + if self.bse_options.get("ensemble", None): + if self.bse_options["ensemble"] == 1: + if not self.bse_options.get("ensemble_defer", 0) == 1: + verbose_print( + "Error, if you want to run an ensemble in a population, the output needs to be deferred", + self.grid_options["verbosity"], + 0, + ) + raise ValueError + + # Put in check + if len(self.grid_options["_grid_variables"]) == 0: + print("Error: you haven't defined any grid variables! Aborting") + raise ValueError + + # + self._generate_grid_code(dry_run=False) + + # + self._load_grid_function() + + # then if the _system_generator is present, we go through it + if self.grid_options["_system_generator"]: + # Check if there is an output dir configured + if self.custom_options.get("data_dir", None): + binary_c_calls_output_dir = self.custom_options["data_dir"] + # otherwise check if there's one passed to the function + else: + if not output_dir: + print( + "Error. No data_dir configured and you gave no output_dir. Aborting" + ) + raise ValueError + binary_c_calls_output_dir = output_dir + + # check if there's a filename passed to the function + if output_filename: + binary_c_calls_filename = output_filename + # otherwise use default value + else: + binary_c_calls_filename = "binary_c_calls.txt" + + binary_c_calls_full_filename = os.path.join( + binary_c_calls_output_dir, binary_c_calls_filename + ) + print("Writing binary_c calls to {}".format(binary_c_calls_full_filename)) + + # Write to file + with self.open( + binary_c_calls_full_filename, "w", encoding=encoding + ) as file: + # Get defaults and clean them, then overwrite them with the set values. + if include_defaults: + # TODO: make sure that the defaults here are cleaned up properly + cleaned_up_defaults = self.cleaned_up_defaults + full_system_dict = cleaned_up_defaults.copy() + full_system_dict.update(self.bse_options.copy()) + else: + full_system_dict = self.bse_options.copy() + + for system in self.grid_options["_system_generator"](self): + # update values with current system values + full_system_dict.update(system) + + binary_cmdline_string = self._return_argline(full_system_dict) + file.write(binary_cmdline_string + "\n") + else: + print("Error. No grid function found!") + raise ValueError + + return binary_c_calls_full_filename + + def set_status(self, string, format_statment="process_{}.txt", ID=None): + """ + Function to set the status string in its appropriate file + """ + + if ID is None: + ID = self.process_ID + + if self.grid_options["status_dir"]: + path = os.path.join( + self.grid_options["status_dir"], + format_statment.format(ID), + ) + with self.open(path, "w", encoding="utf-8") as f: + f.write(string) + f.close() + self.NFS_flush_hack(path) + + # custom logging functions for HPC jobs + if self.HPC_job(): + self.HPC_set_status(string) + + def locked_close(self, file, lock): + """ + Partner function to locked_open_for_write() + + Closes and unlocks the file + """ + if file: + file.close() + if lock: + lock.unlock() + if file: + self.NFS_flush_hack(file.name) + + def wait_for_unlock(self, filename, lock_suffix=".lock"): + """ + Companion to locked_open_for_write that waits for a filename + to a) exist and b) be unlocked. + + This should work because the lock file is created before the file + is created. + """ + while not os.path.isfile(filename): + time.sleep(0.25) + while os.path.isfile(filename + lock_suffix): + time.sleep(0.25) + + def locked_open_for_write( + self, + filename, + encoding="utf-8", + lock_suffix=".lock", + lock_timeout=5, + lock_lifetime=60, + exists_ok=False, + fatal_open_errors=True, + vb=False, + **kwargs, + ): + """ + Wrapper for Python's open(filename) which opens a file at + filename for writing (mode "w") and locks it. + + We check whether the file's lockfile already exists, in which + case just return (None,None), and if we cannot obtain a + lock on the file we also return (None,None). + + If the file does not exist, we keep trying to lock until it does. + + To do the locking, we use flufl.lock which is NFS safe. + + Args: + lock_lifetime: (passed to flufl.lock.Lock()) default 60 seconds. + It should take less than this time to write the file. + lock_timeout: (passed to flufl.lock.Lock()) default 5 seconds. + This should be non-zero. + fatal_open_errors: if open() fails and fatal_open_errors is True, exit. + exists_ok: if False and the file at filename exists, return (None,None) (default False) + vb: verbose logging if True, defaults to False + + Returns: + (file_object, lock_object) tuple. + If the file was not opened, returns (None,None). + """ + + if exists_ok is False and os.path.isfile(filename): + if vb: + print("File at {} already exists: cannot write to it".format(filename)) + return (None, None) + + # set the lockfile path: this should be the same + # for all processes, so it's just the original file + # plus the lock_suffix + lockfilename = filename + lock_suffix + if vb: + print("lockfile={}".format(lockfilename)) + + while True: + # if the file exists, just return + if os.path.isfile(lockfilename): + if vb: + print( + "lockfile at {} already exists (corresponding to file at {})".format( + lockfilename, filename + ) + ) + return (None, None) + + # make the lock object by opening the lockfile + lock = flufl.lock.Lock(lockfilename, default_timeout=lock_timeout) + if vb: + print("post-lock: {}".format(lock)) + + if lock: + # we have the lockfile, so set the lifetime and try to lock it + lock.lifetime = datetime.timedelta(seconds=lock_lifetime) + try: + if vb: + print("try to lock {}".format(lock)) + lock.lock() + if vb: + if lock.is_locked: + print("locked {}".format(lock)) + else: + print("failed to lock {}".format(lock)) + except: + pass + + # if we acquired the lock, try to open the file + if lock.is_locked: + if vb: + print( + "{} is locked by {} to {}".format( + filename, lock, lockfilename + ) + ) + + if exists_ok is False and os.path.isfile(filename): + if vb: + print( + "File at {} already exists (2): cannot write to it, unlocking and returning (None,None)".format( + filename + ) + ) + lock.unlock() + return (None, None) + + # All is apparently ok: file is locked + try: + if vb: + print("Try to open file at {}".format(filename)) + f = self.open(filename, mode="w", encoding=encoding, **kwargs) + if vb: + print("Return locked file {}, {}".format(f, lock)) + return (f, lock) + + # error on open should be fatal + except Exception as e: + print("Error in locked_open_for_write() : {}".format(e)) + if fatal_open_errors: + if vb: + print("fatal exit on open") + self.exit(1) + else: + if vb: + print("unlock {}".format(lock)) + lock.unlock() + if vb: + print("unlocked {} return None,None".format(lock)) + return (None, None) + + # failed to lock this time, keep trying + # (we shouldn't lock up the CPU because the timeout is non-zero) + continue + + def NFS_flush_hack(self, filename): + """ + Use opendir()/closedir() to flush NFS access to a file. + Note: this may or may not work! + """ + os.sync() + dirname = os.path.dirname(filename) + os.scandir(dirname) + + def compression_type(self, filename): + """ + Return the compression type of the ensemble file, based on its filename extension. + """ + + if filename.endswith(".bz2"): + return "bzip2" + if filename.endswith(".gz"): + return "gzip" + + return None + + def open( + self, + file, + mode="r", + buffering=-1, + encoding=None, + errors=None, + newline=None, + closefd=True, + opener=None, + compression=None, + compresslevel=None, + vb=False, + ): + """ + Wrapper for open() with automatic compression based on the file extension. + """ + + if compression is None: + compression = self.compression_type(file) + + if vb: + print( + 'open() file at "{file}" with mode = {mode}, compression {compression}, compresslevel {compresslevel}'.format( + file=file, + compression=compression, + compresslevel=compresslevel, + mode=mode, + ) + ) + + if compression: + if compresslevel is None: + compresslevel = 9 + if "b" not in mode: + # if we don't specify binary-mode, the gzip module + # defaults to binary, which isn't compatible with JSON, + # so default to text if not specified otherwise + mode += "t" + if vb: + print("open() adding text mode") + else: + encoding = None + errors = None + newline = None + if vb: + print("open() setting encoding=errors=newline=None") + if compression == "bzip2": + file_object = bz2.open( + file, + mode=mode, + compresslevel=compresslevel, + encoding=encoding, + errors=errors, + newline=newline, + ) + elif compression == "gzip": + file_object = gzip.open( + file, + mode=mode, + compresslevel=compresslevel, + encoding=encoding, + errors=errors, + newline=newline, + ) + else: + file_object = open( + file, + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + closefd=closefd, + opener=opener, + ) + + if vb: + print("open() return file_object =", file_object) + return file_object + + def NFSpath(self, path): + """ + Test path to see if it's on an NFS mount. + + Args: + path : the path to be tested + + Returns: + True : if on an NFS mount point. + False : if not. + None : if the path does not exist. + """ + + if os.path.exists(path): + cmd = 'stat -f -L -c %T "' + path + '"' + return ( + "nfs" + in subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + .stdout.read() + .decode() + ) + + return None diff --git a/binarycpython/utils/dicts.py b/binarycpython/utils/dicts.py index 63a2209e53b7e64175b8cf2fa6012094c6c6b20c..419091e221c66516d11e8e964085e9ef5430b17e 100644 --- a/binarycpython/utils/dicts.py +++ b/binarycpython/utils/dicts.py @@ -1,21 +1,20 @@ """ -Module containing functions that binary_c-python uses to modify -dictionaries. +Module containing functions that binary_c-python uses to modify dictionaries. """ -import copy + import collections + import astropy.units as u import numpy as np -from collections import ( - OrderedDict, -) - -# we need to convert keys to floats: -# this is ~ a factor 10 faster than David's -# recursive_change_key_to_float routine, -# probably because this version only does -# the float conversion, nothing else. + def keys_to_floats(json_data): + """ + Function to convert all the keys of the dictionary to float to float + + we need to convert keys to floats: + this is ~ a factor 10 faster than David's recursive_change_key_to_float routine, probably because this version only does the float conversion, nothing else. + """ + # assumes nested dicts ... # new_data = {} @@ -28,21 +27,29 @@ def keys_to_floats(json_data): new_data = type(json_data)() for k, v in json_data.items(): + # convert key to a float, if we can + # otherwise leave as is + try: + newkey = float(k) + except ValueError: + newkey = k + + # act on value(s) if isinstance(v, list): - v = [ + # list data + new_data[newkey] = [ keys_to_floats(item) if isinstance(item, collections.abc.Mapping) else item for item in v ] elif isinstance(v, collections.abc.Mapping): - # dict, ordereddict, etc. - v = keys_to_floats(v) - try: - f = float(k) - new_data[f] = json_data[k] - except: - new_data[k] = v + # dict, ordereddict, etc. data + new_data[newkey] = keys_to_floats(v) + else: + # assume all other data are scalars + new_data[newkey] = v + return new_data @@ -54,19 +61,15 @@ def recursive_change_key_to_float(input_dict): Does not work with lists as values """ - new_dict = collections.OrderedDict() # TODO: check if this still works + new_dict = collections.OrderedDict() for key in input_dict: if isinstance(input_dict[key], (dict, collections.OrderedDict)): try: num_key = float(key) - new_dict[num_key] = recursive_change_key_to_float( - input_dict[key] - ) + new_dict[num_key] = recursive_change_key_to_float(input_dict[key]) except ValueError: - new_dict[key] = recursive_change_key_to_float( - input_dict[key] - ) + new_dict[key] = recursive_change_key_to_float(input_dict[key]) else: try: num_key = float(key) @@ -77,27 +80,23 @@ def recursive_change_key_to_float(input_dict): return new_dict -def recursive_change_key_to_string(input_dict): +def recursive_change_key_to_string(input_dict, custom_format="{:g}"): """ Function to recursively change the key back to a string but this time in a format that we decide """ - new_dict = collections.OrderedDict() # TODO: check if this still works + new_dict = collections.OrderedDict() for key in input_dict: if isinstance(input_dict[key], (dict, collections.OrderedDict)): if isinstance(key, (int, float)): - string_key = "{:g}".format(key) - new_dict[string_key] = recursive_change_key_to_string( - input_dict[key] - ) + string_key = custom_format.format(key) + new_dict[string_key] = recursive_change_key_to_string(input_dict[key]) else: - new_dict[key] = recursive_change_key_to_string( - input_dict[key] - ) + new_dict[key] = recursive_change_key_to_string(input_dict[key]) else: if isinstance(key, (int, float)): - string_key = "{:g}".format(key) + string_key = custom_format.format(key) new_dict[string_key] = input_dict[key] else: new_dict[key] = input_dict[key] @@ -134,13 +133,14 @@ def _recursive_normalize_floats(path, d, const, parent=None, ignore=None): # must be a float, multiply by the constant _nested_set(parent, path, v * const) path.pop() - elif isinstance(v, str) or isinstance(v, int): + elif isinstance(v, (str, int)): path.append(k) # do nothing to strings or ints path.pop() elif v is None: path.append(k) path.pop() + # dicts # note: isinstance isn't enough, we need to check the Mapping elif isinstance(v, collections.abc.Mapping): @@ -265,13 +265,12 @@ def subtract_dicts(dict_1: dict, dict_2: dict) -> dict: del new_dict[key] else: - msg = "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( - key, - dict_1[key], - type(dict_1[key]), - key, - dict_2[key], - type(dict_2[key]), + msg = "Error key: {key} value: {value1} type: {type} and key: {key} value: {value2} type: {type2} are not of the same type and cannot be merged".format( + key=key, + value1=dict_1[key], + type=type(dict_1[key]), + value2=dict_2[key], + type2=type(dict_2[key]), ) print(msg) @@ -291,7 +290,6 @@ def subtract_dicts(dict_1: dict, dict_2: dict) -> dict: new_dict[key] = subtract_dicts(dict_1[key], dict_2[key]) # Remove entry if it results in an empty dict - # TODO: write test to prevent empty dicts from showing up if not new_dict[key]: del new_dict[key] else: @@ -355,7 +353,7 @@ def inspect_dict( type(input_dict[key]) (except if the value is a dict) """ - structure_dict = collections.OrderedDict() # TODO: check if this still works + structure_dict = collections.OrderedDict() # for key, value in input_dict.items(): @@ -398,6 +396,10 @@ def merge_dicts(dict_1: dict, dict_2: dict) -> dict: - dictionaries will be merged by calling recursively calling this function again - numbers will be added - (opt) lists will be appended + - booleans are merged with logical OR + - identical strings are just set to the string + - non-identical strings are concatenated + - NoneTypes are set to None - In the case that the instances do not match: for now I will raise an error Args: @@ -410,7 +412,7 @@ def merge_dicts(dict_1: dict, dict_2: dict) -> dict: """ # Set up new dict - new_dict = collections.OrderedDict() # TODO: check if this still necessary + new_dict = collections.OrderedDict() # keys_1 = dict_1.keys() @@ -449,16 +451,51 @@ def merge_dicts(dict_1: dict, dict_2: dict) -> dict: ): new_dict[key] = merge_dicts(dict_1[key], dict_2[key]) + # string-int clash : convert both to ints and save + elif ( + isinstance(dict_1[key], str) + and isinstance(dict_2[key], int) + or isinstance(dict_1[key], int) + and isinstance(dict_2[key], str) + ): + try: + new_dict[key] = int(dict_1[key]) + int(dict_2[key]) + except: + print( + "key", + key, + ': Failed to convert string, one of "{}" or "{}" to an int'.format( + dict_1[key], dict_2[key] + ), + ) + + # string-float clash : convert both to floats and save + elif ( + isinstance(dict_1[key], str) + and isinstance(dict_2[key], float) + or isinstance(dict_1[key], float) + and isinstance(dict_2[key], str) + ): + try: + new_dict[key] = float(dict_1[key]) + float(dict_2[key]) + except: + print( + "key", + key, + ': Failed to convert string, one of "{}" or "{}" to a float'.format( + dict_1[key], dict_2[key] + ), + ) + # If the above cases have not dealt with it, then we should raise an error else: print( - "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( - key, - dict_1[key], - type(dict_1[key]), - key, - dict_2[key], - type(dict_2[key]), + "merge_dicts error: key: {key} value: {value1} type: {type1} and key: {key} value: {value2} type: {type2} are not of the same type and cannot be merged".format( + key=key, + value1=dict_1[key], + type1=type(dict_1[key]), + value2=dict_2[key], + type2=type(dict_2[key]), ) ) raise ValueError @@ -493,6 +530,19 @@ def merge_dicts(dict_1: dict, dict_2: dict) -> dict: elif isinstance(dict_1[key], dict) and isinstance(dict_2[key], dict): new_dict[key] = merge_dicts(dict_1[key], dict_2[key]) + # strings + elif isinstance(dict_1[key], str) and isinstance(dict_2[key], str): + if dict_1[key] == dict_2[key]: + # same strings + new_dict[key] = dict_1[key] + else: + # different strings: just concatenate them + new_dict[key] = dict_1[key] + dict_2[key] + + # None types + elif dict_1[key] is None and dict_2[key] is None: + new_dict[key] = None + else: print( "Object types {}: {} ({}), {} ({}) not supported.".format( @@ -531,14 +581,9 @@ def update_dicts(dict_1: dict, dict_2: dict) -> dict: """ # Set up new dict of the same type as dict_1 - # - # Note: setting directly to OrderedDict fails in some cases - # so instead we take a (shallow) copy of dict_1 which will - # have the same type as dict_1, then clear it. (There must - # be a better way to do this...) - new_dict = dict_1.copy() # OrderedDict() # TODO: check if this still works - new_dict.clear() + new_dict = dict_1.__class__() + # Get keys keys_1 = dict_1.keys() keys_2 = dict_2.keys() @@ -569,13 +614,12 @@ def update_dicts(dict_1: dict, dict_2: dict) -> dict: else: print( - "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( - key, - dict_1[key], - type(dict_1[key]), - key, - dict_2[key], - type(dict_2[key]), + "Error key: {key} value: {value1} type: {type1} and key: {key} value: {value2} type: {type2} are not of the same type and cannot be merged".format( + key=key, + value1=dict_1[key], + type1=type(dict_1[key]), + value2=dict_2[key], + type2=type(dict_2[key]), ) ) raise ValueError @@ -639,6 +683,7 @@ def custom_sort_dict(input_dict): # If there are multiple types, then we loop over them and do a piece wise sort if len(all_types_keys) > 1: msg = "Different types in the same dictionary key set" + print(msg) # Create a string repr of the type name to sort them afterwards str_types = {repr(el): el for el in all_types_keys} @@ -660,6 +705,7 @@ def custom_sort_dict(input_dict): new_dict[key] = custom_sort_dict(input_dict[key]) return new_dict + return input_dict @@ -701,3 +747,61 @@ def filter_dict_through_values(arg_dict: dict, filter_list: list) -> dict: new_dict[key] = arg_dict[key] return new_dict + + +def prepare_dict(global_dict: dict, list_of_sub_keys: list) -> None: + """ + Function that makes sure that the global dict is prepared to have a value set there. + This dictionary will store values and factors for the distribution functions, + so that they don't have to be calculated each time. + + Args: + global_dict: globally accessible dictionary where factors are stored in + list_of_sub_keys: List of keys that must become be(come) present in the global_dict + """ + + internal_dict_value = global_dict + + # This loop almost mimics a recursive loop into the dictionary. + # It checks whether the first key of the list is present, if not; set it with an empty dict. + # Then it overrides itself to be that (new) item, and goes on to do that again, until the list + # exhausted + for k in list_of_sub_keys: + # If the sub key doesnt exist then make an empty dict + if not internal_dict_value.get(k, None): + internal_dict_value[k] = {} + internal_dict_value = internal_dict_value[k] + + +def set_opts(opts: dict, newopts: dict) -> dict: + """ + Function to take a default dict and override it with newer values. + + # TODO: consider changing this to just a dict.update + + Args: + opts: dictionary with default values + newopts: dictionary with new values + + Returns: + returns an updated dictionary + """ + + if newopts: + for opt in newopts.keys(): + if opt in opts.keys(): + opts[opt] = newopts[opt] + + return opts + + +def normalize_dict(result_dict): + """ + Function to normalise a dictionary + """ + + sum_result = sum(list(result_dict.values())) + for key in result_dict.keys(): + result_dict[key] = result_dict[key] / sum_result + + return result_dict diff --git a/binarycpython/utils/distribution_functions.py b/binarycpython/utils/distribution_functions.py index b0369aa0a68ab48d5b0ca20db6913828baf44fe7..92f26cbc1127a25fe278b059e7c79fc0faaacc05 100644 --- a/binarycpython/utils/distribution_functions.py +++ b/binarycpython/utils/distribution_functions.py @@ -4,6 +4,9 @@ Module containing the predefined distribution functions The user can use any of these distribution functions to generate probability distributions for sampling populations +To add custom functions you can take any function and add it to the class instance before running the code. +See https://stackoverflow.com/a/28060251 for some tips on how to do that + There are distributions for the following parameters: - mass - period @@ -20,16 +23,16 @@ Tasks: - TODO: make an n-part power law that's general enough to fix the three part and the 4 part """ -import functools +# pylint: disable=E1101, R0201 + import math import json - -import traceback -import sys - from typing import Union import numpy as np +import cachetools + +import py_rinterpolate from binarycpython.utils.useful_funcs import calc_period_from_sep, calc_sep_from_period from binarycpython.utils.functions import verbose_print @@ -37,6 +40,10 @@ from binarycpython.utils.grid_options_defaults import ( _MOE2017_VERBOSITY_LEVEL, _MOE2017_VERBOSITY_INTERPOLATOR_LEVEL, ) +from binarycpython.utils.dicts import prepare_dict + +# Global dictinary to store values in +Moecache = {} ### # File containing probability distributions @@ -45,1390 +52,1239 @@ LOG_LN_CONVERTER = 1.0 / math.log(10.0) distribution_constants = {} # To store the constants in -def prepare_dict(global_dict: dict, list_of_sub_keys: list) -> None: - """ - Function that makes sure that the global dict is prepared to have a value set there. - This dictionary will store values and factors for the distribution functions, - so that they don't have to be calculated each time. - - Args: - global_dict: globally accessible dictionary where factors are stored in - list_of_sub_keys: List of keys that must become be(come) present in the global_dict - """ - - internal_dict_value = global_dict - - # This loop almost mimics a recursive loop into the dictionary. - # It checks whether the first key of the list is present, if not; set it with an empty dict. - # Then it overrides itself to be that (new) item, and goes on to do that again, until the list - # exhausted - for k in list_of_sub_keys: - # If the sub key doesnt exist then make an empty dict - if not internal_dict_value.get(k, None): - internal_dict_value[k] = {} - internal_dict_value = internal_dict_value[k] - - -def set_opts(opts: dict, newopts: dict) -> dict: - """ - Function to take a default dict and override it with newer values. - - # TODO: consider changing this to just a dict.update - - Args: - opts: dictionary with default values - newopts: dictionary with new values - - Returns: - returns an updated dictionary - """ - - if newopts: - for opt in newopts.keys(): - if opt in opts.keys(): - opts[opt] = newopts[opt] - - return opts - - -def flat() -> float: - """ - Dummy distribution function that returns 1 - - Returns: - a flat uniform distribution: 1 - """ - - return 1.0 - - -def number(value: Union[int, float]) -> Union[int, float]: - """ - Dummy distribution function that returns the input - - Args: - value: the value that will be returned by this function. - - Returns: - the value that was provided - """ - - return value - - -def const( - min_bound: Union[int, float], max_bound: Union[int, float], val: float = None -) -> Union[int, float]: - """ - a constant distribution function between min=min_bound and max=max_bound. - - Args: - min_bound: lower bound of the range - max_bound: upper bound of the range - - Returns: - returns the value of 1/(max_bound-min_bound). If val is provided, it will check whether min_bound < val <= max_bound. if not: returns 0 - """ - - if val: - if not min_bound < val <= max_bound: - print("out of bounds") - prob = 0 - return prob - prob = 1.0 / (max_bound - min_bound) - return prob - - -@functools.lru_cache(maxsize=16) -def powerlaw_constant( - min_val: Union[int, float], max_val: Union[int, float], k: Union[int, float] -) -> Union[int, float]: - """ - Function that returns the constant to normalise a power law - - TODO: what if k is -1? - - Args: - min_val: lower bound of the range - max_val: upper bound of the range - k: power law slope - - Returns: - constant to normalise the given power law between the min_val and max_val range +class distribution_functions: """ - - k1 = k + 1.0 - # print( - # "Powerlaw consts from {} to {}, k={} where k1={}".format( - # min_val, max_val, k, k1 - # ) - # ) - - powerlaw_const = k1 / (max_val ** k1 - min_val ** k1) - return powerlaw_const - - -def powerlaw( - min_val: Union[int, float], - max_val: Union[int, float], - k: Union[int, float], - x: Union[int, float], -) -> Union[int, float]: + Extension for the Population grid object that contains the distribution functions """ - Single power law with index k at x from min to max - Args: - min_val: lower bound of the power law - max_val: upper bound of the power law - k: slope of the power law - x: position at which we want to evaluate + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return - Returns: - `probability` at the given position(x) - """ + def flat(self) -> float: + """ + Dummy distribution function that returns 1 - # Handle faulty value - if k == -1: - msg = "wrong value for k" - raise ValueError(msg) - - if (x < min_val) or (x > max_val): - print("input value is out of bounds!") - return 0 - - powerlaw_const = powerlaw_constant(min_val, max_val, k) - - # power law - prob = powerlaw_const * (x ** k) - # print( - # "Power law from {} to {}: const = {}, y = {}".format( - # min_val, max_val, const, y - # ) - # ) - return prob - - -@functools.lru_cache(maxsize=16) -def calculate_constants_three_part_powerlaw( - m0: Union[int, float], - m1: Union[int, float], - m2: Union[int, float], - m_max: Union[int, float], - p1: Union[int, float], - p2: Union[int, float], - p3: Union[int, float], -) -> Union[int, float]: - """ - Function to calculate the constants for a three-part power law + Returns: + a flat uniform distribution: 1 + """ - TODO: use the power law_constant function to calculate all these values + return 1.0 - Args: - m0: lower bound mass - m1: second boundary, between the first slope and the second slope - m2: third boundary, between the second slope and the third slope - m_max: upper bound mass - p1: first slope - p2: second slope - p3: third slope + def number(self, value: Union[int, float]) -> Union[int, float]: + """ + Dummy distribution function that returns the input - Returns: - array of normalisation constants - """ + Args: + value: the value that will be returned by this function. - # print("Initialising constants for the three-part powerlaw: m0={} m1={} m2={}\ - # m_max={} p1={} p2={} p3={}\n".format(m0, m1, m2, m_max, p1, p2, p3)) + Returns: + the value that was provided + """ + return value - array_constants_three_part_powerlaw = [0, 0, 0] + def const_distribution( + self, + min_bound: Union[int, float], + max_bound: Union[int, float], + val: float = None, + ) -> Union[int, float]: + """ + a constant distribution function between min=min_bound and max=max_bound. - array_constants_three_part_powerlaw[1] = ( - ((m1 ** p2) * (m1 ** (-p1))) - * (1.0 / (1.0 + p1)) - * (m1 ** (1.0 + p1) - m0 ** (1.0 + p1)) - ) - array_constants_three_part_powerlaw[1] += ( - (m2 ** (1.0 + p2) - m1 ** (1.0 + p2)) - ) * (1.0 / (1.0 + p2)) - array_constants_three_part_powerlaw[1] += ( - ((m2 ** p2) * (m2 ** (-p3))) - * (1.0 / (1.0 + p3)) - * (m_max ** (1.0 + p3) - m2 ** (1.0 + p3)) - ) - array_constants_three_part_powerlaw[1] = 1.0 / ( - array_constants_three_part_powerlaw[1] + 1e-50 - ) + Args: + min_bound: lower bound of the range + max_bound: upper bound of the range - array_constants_three_part_powerlaw[0] = array_constants_three_part_powerlaw[1] * ( - (m1 ** p2) * (m1 ** (-p1)) - ) - array_constants_three_part_powerlaw[2] = array_constants_three_part_powerlaw[1] * ( - (m2 ** p2) * (m2 ** (-p3)) - ) + Returns: + returns the value of 1/(max_bound-min_bound). If val is provided, it will check whether min_bound < val <= max_bound. if not: returns 0 + """ - return array_constants_three_part_powerlaw - # $$array[1]=(($m1**$p2)*($m1**(-$p1)))* - # (1.0/(1.0+$p1))* - # ($m1**(1.0+$p1)-$m0**(1.0+$p1))+ - # (($m2**(1.0+$p2)-$m1**(1.0+$p2)))* - # (1.0/(1.0+$p2))+ - # (($m2**$p2)*($m2**(-$p3)))* - # (1.0/(1.0+$p3))* - # ($mmax**(1.0+$p3)-$m2**(1.0+$p3)); - # $$array[1]=1.0/($$array[1]+1e-50); - # $$array[0]=$$array[1]*$m1**$p2*$m1**(-$p1); - # $$array[2]=$$array[1]*$m2**$p2*$m2**(-$p3); - # #print "ARRAY SET @_ => @$array\n"; - # $threepart_powerlaw_consts{"@_"}=[@$array]; - - -def three_part_powerlaw( - m: Union[int, float], - m0: Union[int, float], - m1: Union[int, float], - m2: Union[int, float], - m_max: Union[int, float], - p1: Union[int, float], - p2: Union[int, float], - p3: Union[int, float], -) -> Union[int, float]: - """ - Generalised three-part power law, usually used for mass distributions - - Args: - m: mass at which we want to evaluate the distribution. - m0: lower bound mass - m1: second boundary, between the first slope and the second slope - m2: third boundary, between the second slope and the third slope - m_max: upper bound mass - p1: first slope - p2: second slope - p3: third slope - - Returns: - 'probability' at given mass m - """ + if val and not min_bound < val <= max_bound: + prob = 0.0 + else: + prob = 1.0 / (max_bound - min_bound) + return prob + + def powerlaw_constant_nocache( + self, + min_val: Union[int, float], + max_val: Union[int, float], + k: Union[int, float], + ) -> Union[int, float]: + """ + Function that returns the constant to normalise a power law + + TODO: what if k is -1? + + Args: + min_val: lower bound of the range + max_val: upper bound of the range + k: power law slope + + Returns: + constant to normalise the given power law between the min_val and max_val range + """ + + k1 = k + 1.0 + # print( + # "Powerlaw consts from {} to {}, k={} where k1={}".format( + # min_val, max_val, k, k1 + # ) + # ) - # TODO: add check on whether the values exist + powerlaw_const = k1 / (max_val ** k1 - min_val ** k1) + return powerlaw_const - three_part_powerlaw_constants = calculate_constants_three_part_powerlaw( - m0, m1, m2, m_max, p1, p2, p3 + @cachetools.cachedmethod( + lambda self: self.caches["distribution_functions.powerlaw_constant"] ) + def powerlaw_constant( + self, + min_val: Union[int, float], + max_val: Union[int, float], + k: Union[int, float], + ) -> Union[int, float]: + """ + Function that returns the constant to normalise a power law + + TODO: what if k is -1? + + Args: + min_val: lower bound of the range + max_val: upper bound of the range + k: power law slope + + Returns: + constant to normalise the given power law between the min_val and max_val range + """ + + k1 = k + 1.0 + # print( + # "Powerlaw consts from {} to {}, k={} where k1={}".format( + # min_val, max_val, k, k1 + # ) + # ) - if m < m0: - prob = 0.0 # Below lower bound - elif m <= m1: - prob = three_part_powerlaw_constants[0] * (m ** p1) # Between m0 and m1 - elif m <= m2: - prob = three_part_powerlaw_constants[1] * (m ** p2) # Between m1 and m2 - elif m <= m_max: - prob = three_part_powerlaw_constants[2] * (m ** p3) # Between m2 and m_max - else: - prob = 0 # Above m_max - - return prob - - -@functools.lru_cache(maxsize=16) -def gaussian_normalizing_const( - mean: Union[int, float], - sigma: Union[int, float], - gmin: Union[int, float], - gmax: Union[int, float], -) -> Union[int, float]: - """ - Function to calculate the normalisation constant for the Gaussian - - Args: - mean: mean of the Gaussian - sigma: standard deviation of the Gaussian - gmin: lower bound of the range to calculate the probabilities in - gmax: upper bound of the range to calculate the probabilities in - - Returns: - normalisation constant for the Gaussian distribution(mean, sigma) between gmin and gmax - """ - - # First time; calculate multiplier for given mean and sigma - ptot = 0 - resolution = 1000 - d = (gmax - gmin) / resolution - - for i in range(resolution): - y = gmin + i * d - ptot += d * gaussian_func(y, mean, sigma) - - # TODO: Set value in global - return ptot - - -def gaussian_func( - x: Union[int, float], mean: Union[int, float], sigma: Union[int, float] -) -> Union[int, float]: - """ - Function to evaluate a Gaussian at a given point, but this time without any boundaries. - - Args: - x: location at which to evaluate the distribution - mean: mean of the Gaussian - sigma: standard deviation of the Gaussian - - Returns: - value of the Gaussian at x - """ - gaussian_prefactor = 1.0 / math.sqrt(2.0 * math.pi) - - r = 1.0 / (sigma) - y = (x - mean) * r - return gaussian_prefactor * r * math.exp(-0.5 * y ** 2) - - -def gaussian( - x: Union[int, float], - mean: Union[int, float], - sigma: Union[int, float], - gmin: Union[int, float], - gmax: Union[int, float], -) -> Union[int, float]: - """ - Gaussian distribution function. used for e.g. Duquennoy + Mayor 1991 - - Args: - x: location at which to evaluate the distribution - mean: mean of the Gaussian - sigma: standard deviation of the Gaussian - gmin: lower bound of the range to calculate the probabilities in - gmax: upper bound of the range to calculate the probabilities in - - Returns: - 'probability' of the Gaussian distribution between the boundaries, evaluated at x - """ - - # # location (X value), mean and sigma, min and max range - # my ($x,$mean,$sigma,$gmin,$gmax) = @_; - - if (x < gmin) or (x > gmax): - prob = 0 - else: - # normalise over given range - # TODO: add loading into global var - normalisation = gaussian_normalizing_const(mean, sigma, gmin, gmax) - prob = gaussian_func(x, mean, sigma) / normalisation - - return prob - - -##### -# Mass distributions -##### - + powerlaw_const = k1 / (max_val ** k1 - min_val ** k1) + return powerlaw_const + + def powerlaw( + self, + min_val: Union[int, float], + max_val: Union[int, float], + k: Union[int, float], + x: Union[int, float], + ) -> Union[int, float]: + """ + Single power law with index k at x from min to max + + Args: + min_val: lower bound of the power law + max_val: upper bound of the power law + k: slope of the power law + x: position at which we want to evaluate + + Returns: + `probability` at the given position(x) + """ + + # Handle faulty value + if k == -1: + msg = "wrong value for k" + raise ValueError(msg) -def Kroupa2001(m: Union[int, float], newopts: dict = None) -> Union[int, float]: - """ - Probability distribution function for Kroupa 2001 IMF, where the default values to the - three_part_powerlaw are: default = {"m0": 0.1, "m1": 0.5, "m2": 1, "mmax": 100, "p1": -1.3, "p2": -2.3,"p3": -2.3} + if (x < min_val) or (x > max_val): + print("input value is out of bounds!") + return 0 - Args: - m: mass to evaluate the distribution at - newopts: optional dict to override the default values. + powerlaw_const = self.powerlaw_constant(min_val, max_val, k) - Returns: - 'probability' of distribution function evaluated at m - """ + # power law + prob = powerlaw_const * (x ** k) + return prob - # Default parameters and override them - default = { - "m0": 0.1, - "m1": 0.5, - "m2": 1, - "mmax": 100, - "p1": -1.3, - "p2": -2.3, - "p3": -2.3, - } - - value_dict = default.copy() - - if newopts: - value_dict.update(newopts) - - return three_part_powerlaw( - m, - value_dict["m0"], - value_dict["m1"], - value_dict["m2"], - value_dict["mmax"], - value_dict["p1"], - value_dict["p2"], - value_dict["p3"], + @cachetools.cachedmethod( + lambda self: self.caches[ + "distribution_functions.calculate_constants_three_part_powerlaw" + ] ) + def calculate_constants_three_part_powerlaw( + self, + m0: Union[int, float], + m1: Union[int, float], + m2: Union[int, float], + m_max: Union[int, float], + p1: Union[int, float], + p2: Union[int, float], + p3: Union[int, float], + ) -> Union[int, float]: + """ + Function to calculate the constants for a three-part power law + + TODO: use the power law_constant function to calculate all these values + + Args: + m0: lower bound mass + m1: second boundary, between the first slope and the second slope + m2: third boundary, between the second slope and the third slope + m_max: upper bound mass + p1: first slope + p2: second slope + p3: third slope + + Returns: + array of normalisation constants + """ + + array_constants_three_part_powerlaw = [0, 0, 0] + + array_constants_three_part_powerlaw[1] = ( + ((m1 ** p2) * (m1 ** (-p1))) + * (1.0 / (1.0 + p1)) + * (m1 ** (1.0 + p1) - m0 ** (1.0 + p1)) + ) + array_constants_three_part_powerlaw[1] += ( + (m2 ** (1.0 + p2) - m1 ** (1.0 + p2)) + ) * (1.0 / (1.0 + p2)) + array_constants_three_part_powerlaw[1] += ( + ((m2 ** p2) * (m2 ** (-p3))) + * (1.0 / (1.0 + p3)) + * (m_max ** (1.0 + p3) - m2 ** (1.0 + p3)) + ) + array_constants_three_part_powerlaw[1] = 1.0 / ( + array_constants_three_part_powerlaw[1] + 1e-50 + ) + array_constants_three_part_powerlaw[0] = array_constants_three_part_powerlaw[ + 1 + ] * ((m1 ** p2) * (m1 ** (-p1))) + array_constants_three_part_powerlaw[2] = array_constants_three_part_powerlaw[ + 1 + ] * ((m2 ** p2) * (m2 ** (-p3))) + + return array_constants_three_part_powerlaw + + def three_part_powerlaw( + self, + m: Union[int, float], + m0: Union[int, float], + m1: Union[int, float], + m2: Union[int, float], + m_max: Union[int, float], + p1: Union[int, float], + p2: Union[int, float], + p3: Union[int, float], + ) -> Union[int, float]: + """ + Generalised three-part power law, usually used for mass distributions + + Args: + m: mass at which we want to evaluate the distribution. + m0: lower bound mass + m1: second boundary, between the first slope and the second slope + m2: third boundary, between the second slope and the third slope + m_max: upper bound mass + p1: first slope + p2: second slope + p3: third slope + + Returns: + 'probability' at given mass m + """ + + three_part_powerlaw_constants = self.calculate_constants_three_part_powerlaw( + m0, m1, m2, m_max, p1, p2, p3 + ) -def ktg93(m: Union[int, float], newopts: dict = None) -> Union[int, float]: - """ - Probability distribution function for KTG93 IMF, where the default values to the three_part_powerlaw are: default = {"m0": 0.1, "m1": 0.5, "m2": 1, "mmax": 80, "p1": -1.3, "p2": -2.2,"p3": -2.7} + if m < m0: + prob = 0.0 # Below lower bound + elif m <= m1: + prob = three_part_powerlaw_constants[0] * (m ** p1) # Between m0 and m1 + elif m <= m2: + prob = three_part_powerlaw_constants[1] * (m ** p2) # Between m1 and m2 + elif m <= m_max: + prob = three_part_powerlaw_constants[2] * (m ** p3) # Between m2 and m_max + else: + prob = 0 # Above m_max - Args: - m: mass to evaluate the distribution at - newopts: optional dict to override the default values. + return prob - Returns: - 'probability' of distribution function evaluated at m - """ - # TODO: ask rob what this means - - # if($m eq 'uncertainties') - # { - # # return (pointer to) the uncertainties hash - # return { - # m0=>{default=>0.1, - # fixed=>1}, - # m1=>{default=>0.5, - # fixed=>1}, - # m2=>{default=>1.0, - # fixed=>1}, - # mmax=>{default=>80.0, - # fixed=>1}, - # p1=>{default=>-1.3, - # low=>-1.3, - # high=>-1.3}, - # p2=>{default=>-2.2, - # low=>-2.2, - # high=>-2.2}, - # p3=>{default=>-2.7, - # low=>-2.7, - # high=>-2.7} - # }; - # } - - # set options - # opts = set_opts({'m0':0.1, 'm1':0.5, 'm2':1.0, 'mmax':80, 'p1':-1.3, 'p2':-2.2, 'p3':-2.7}, - # newopts) - - defaults = { - "m0": 0.1, - "m1": 0.5, - "m2": 1.0, - "mmax": 80, - "p1": -1.3, - "p2": -2.2, - "p3": -2.7, - } - value_dict = defaults.copy() - - if newopts: - value_dict.update(newopts) - - return three_part_powerlaw( - m, - value_dict["m0"], - value_dict["m1"], - value_dict["m2"], - value_dict["mmax"], - value_dict["p1"], - value_dict["p2"], - value_dict["p3"], + @cachetools.cachedmethod( + lambda self: self.caches["distribution_functions.gaussian_normalizing_const"] ) + def gaussian_normalizing_const( + self, + mean: Union[int, float], + sigma: Union[int, float], + gmin: Union[int, float], + gmax: Union[int, float], + ) -> Union[int, float]: + """ + Function to calculate the normalisation constant for the Gaussian + + Args: + mean: mean of the Gaussian + sigma: standard deviation of the Gaussian + gmin: lower bound of the range to calculate the probabilities in + gmax: upper bound of the range to calculate the probabilities in + + Returns: + normalisation constant for the Gaussian distribution(mean, sigma) between gmin and gmax + """ + + # First time; calculate multiplier for given mean and sigma + ptot = 0 + resolution = 1000 + d = (gmax - gmin) / resolution + + for i in range(resolution): + y = gmin + i * d + ptot += d * self.gaussian_func(y, mean, sigma) + + return ptot + + def gaussian_func( + self, x: Union[int, float], mean: Union[int, float], sigma: Union[int, float] + ) -> Union[int, float]: + """ + Function to evaluate a Gaussian at a given point, but this time without any boundaries. + + Args: + x: location at which to evaluate the distribution + mean: mean of the Gaussian + sigma: standard deviation of the Gaussian + + Returns: + value of the Gaussian at x + """ + gaussian_prefactor = 1.0 / math.sqrt(2.0 * math.pi) + + r = 1.0 / (sigma) + y = (x - mean) * r + return gaussian_prefactor * r * math.exp(-0.5 * y ** 2) + + def gaussian( + self, + x: Union[int, float], + mean: Union[int, float], + sigma: Union[int, float], + gmin: Union[int, float], + gmax: Union[int, float], + ) -> Union[int, float]: + """ + Gaussian distribution function. used for e.g. Duquennoy + Mayor 1991 + + Args: + x: location at which to evaluate the distribution + mean: mean of the Gaussian + sigma: standard deviation of the Gaussian + gmin: lower bound of the range to calculate the probabilities in + gmax: upper bound of the range to calculate the probabilities in + + Returns: + 'probability' of the Gaussian distribution between the boundaries, evaluated at x + """ + + # # location (X value), mean and sigma, min and max range + # my ($x,$mean,$sigma,$gmin,$gmax) = @_; + + if (x < gmin) or (x > gmax): + prob = 0 + else: + # normalise over given range + normalisation = self.gaussian_normalizing_const(mean, sigma, gmin, gmax) + prob = self.gaussian_func(x, mean, sigma) / normalisation + + return prob + + ##### + # Mass distributions + ##### + + def Kroupa2001( + self, m: Union[int, float], newopts: dict = None + ) -> Union[int, float]: + """ + Probability distribution function for Kroupa 2001 IMF, + where the default values to the three_part_powerlaw are: + default = { + "m0": 0.1, + "m1": 0.5, + "m2": 1, + "mmax": 100, + "p1": -1.3, + "p2": -2.3, + "p3": -2.3 + } + + Args: + m: mass to evaluate the distribution at + newopts: optional dict to override the default values. + + Returns: + 'probability' of distribution function evaluated at m + """ + + # Default parameters and override them + default = { + "m0": 0.1, + "m1": 0.5, + "m2": 1, + "mmax": 100, + "p1": -1.3, + "p2": -2.3, + "p3": -2.3, + } + + value_dict = default.copy() + + if newopts: + value_dict.update(newopts) + + return self.three_part_powerlaw( + m, + value_dict["m0"], + value_dict["m1"], + value_dict["m2"], + value_dict["mmax"], + value_dict["p1"], + value_dict["p2"], + value_dict["p3"], + ) + def ktg93(self, m: Union[int, float], newopts: dict = None) -> Union[int, float]: + """ + Probability distribution function for KTG93 IMF, where the default values to the three_part_powerlaw are: default = {"m0": 0.1, "m1": 0.5, "m2": 1, "mmax": 80, "p1": -1.3, "p2": -2.2,"p3": -2.7} + + Args: + m: mass to evaluate the distribution at + newopts: optional dict to override the default values. + + Returns: + 'probability' of distribution function evaluated at m + """ + + defaults = { + "m0": 0.1, + "m1": 0.5, + "m2": 1.0, + "mmax": 80, + "p1": -1.3, + "p2": -2.2, + "p3": -2.7, + } + value_dict = defaults.copy() + + if newopts: + value_dict.update(newopts) + + return self.three_part_powerlaw( + m, + value_dict["m0"], + value_dict["m1"], + value_dict["m2"], + value_dict["mmax"], + value_dict["p1"], + value_dict["p2"], + value_dict["p3"], + ) -# sub ktg93_lnspace -# { -# # wrapper for KTG93 on a ln(m) grid -# my $m=$_[0]; -# return ktg93(@_) * $m; -# } - - -def imf_tinsley1980(m: Union[int, float]) -> Union[int, float]: - """ - Probability distribution function for Tinsley 1980 IMF (defined up until 80Msol): three_part_powerlaw(m, 0.1, 2.0, 10.0, 80.0, -2.0, -2.3, -3.3) - - Args: - m: mass to evaluate the distribution at - - Returns: - 'probability' of distribution function evaluated at m - """ - - return three_part_powerlaw(m, 0.1, 2.0, 10.0, 80.0, -2.0, -2.3, -3.3) - - -def imf_scalo1986(m: Union[int, float]) -> Union[int, float]: - """ - Probability distribution function for Scalo 1986 IMF (defined up until 80Msol): three_part_powerlaw(m, 0.1, 1.0, 2.0, 80.0, -2.35, -2.35, -2.70) - - Args: - m: mass to evaluate the distribution at - - Returns: - 'probability' of distribution function evaluated at m - """ - return three_part_powerlaw(m, 0.1, 1.0, 2.0, 80.0, -2.35, -2.35, -2.70) - - -def imf_scalo1998(m: Union[int, float]) -> Union[int, float]: - """ - From Scalo 1998 + def imf_tinsley1980(self, m: Union[int, float]) -> Union[int, float]: + """ + Probability distribution function for Tinsley 1980 IMF (defined up until 80Msol): self.three_part_powerlaw(m, 0.1, 2.0, 10.0, 80.0, -2.0, -2.3, -3.3) - Probability distribution function for Scalo 1998 IMF (defined up until 80Msol): three_part_powerlaw(m, 0.1, 1.0, 10.0, 80.0, -1.2, -2.7, -2.3) + Args: + m: mass to evaluate the distribution at - Args: - m: mass to evaluate the distribution at + Returns: + 'probability' of distribution function evaluated at m + """ - Returns: - 'probability' of distribution function evaluated at m - """ + return self.three_part_powerlaw(m, 0.1, 2.0, 10.0, 80.0, -2.0, -2.3, -3.3) - return three_part_powerlaw(m, 0.1, 1.0, 10.0, 80.0, -1.2, -2.7, -2.3) + def imf_scalo1986(self, m: Union[int, float]) -> Union[int, float]: + """ + Probability distribution function for Scalo 1986 IMF (defined up until 80Msol): self.three_part_powerlaw(m, 0.1, 1.0, 2.0, 80.0, -2.35, -2.35, -2.70) + Args: + m: mass to evaluate the distribution at -def imf_chabrier2003(m: Union[int, float]) -> Union[int, float]: - """ - Probability distribution function for IMF of Chabrier 2003 PASP 115:763-795 + Returns: + 'probability' of distribution function evaluated at m + """ + return self.three_part_powerlaw(m, 0.1, 1.0, 2.0, 80.0, -2.35, -2.35, -2.70) - Args: - m: mass to evaluate the distribution at + def imf_scalo1998(self, m: Union[int, float]) -> Union[int, float]: + """ + From Scalo 1998 - Returns: - 'probability' of distribution function evaluated at m - """ + Probability distribution function for Scalo 1998 IMF (defined up until 80Msol): self.three_part_powerlaw(m, 0.1, 1.0, 10.0, 80.0, -1.2, -2.7, -2.3) - chabrier_logmc = math.log10(0.079) - chabrier_sigma2 = 0.69 * 0.69 - chabrier_a1 = 0.158 - chabrier_a2 = 4.43e-2 - chabrier_x = -1.3 - if m <= 0: - msg = "below bounds" - raise ValueError(msg) - if 0 < m < 1.0: - A = 0.158 - dm = math.log10(m) - chabrier_logmc - prob = chabrier_a1 * math.exp(-(dm ** 2) / (2.0 * chabrier_sigma2)) - else: - prob = chabrier_a2 * (m ** chabrier_x) - prob = prob / (0.1202462 * m * math.log(10)) - return prob - - -######################################################################## -# Binary fractions -######################################################################## - - -def Arenou2010_binary_fraction(m: Union[int, float]) -> Union[int, float]: - """ - Arenou 2010 function for the binary fraction as f(M1) + Args: + m: mass to evaluate the distribution at - GAIA-C2-SP-OPM-FA-054 - www.rssd.esa.int/doc_fetch.php?id=2969346 + Returns: + 'probability' of distribution function evaluated at m + """ - Args: - m: mass to evaluate the distribution at + return self.three_part_powerlaw(m, 0.1, 1.0, 10.0, 80.0, -1.2, -2.7, -2.3) - Returns: - binary fraction at m - """ + def imf_chabrier2003(self, m: Union[int, float]) -> Union[int, float]: + """ + Probability distribution function for IMF of Chabrier 2003 PASP 115:763-795 - return 0.8388 * math.tanh(0.688 * m + 0.079) + Args: + m: mass to evaluate the distribution at + Returns: + 'probability' of distribution function evaluated at m + """ -# print(Arenou2010_binary_fraction(0.4)) + chabrier_logmc = math.log10(0.079) + chabrier_sigma2 = 0.69 * 0.69 + chabrier_a1 = 0.158 + chabrier_a2 = 4.43e-2 + chabrier_x = -1.3 + if m <= 0: + msg = "below bounds" + raise ValueError(msg) -def raghavan2010_binary_fraction(m: Union[int, float]) -> Union[int, float]: - """ - Fit to the Raghavan 2010 binary fraction as a function of - spectral type (Fig 12). Valid for local stars (Z=Zsolar). + if 0 < m < 1.0: + dm = math.log10(m) - chabrier_logmc + prob = chabrier_a1 * math.exp(-(dm ** 2) / (2.0 * chabrier_sigma2)) + else: + prob = chabrier_a2 * (m ** chabrier_x) - The spectral type is converted mass by use of the ZAMS - effective temperatures from binary_c/BSE (at Z=0.02) - and the new "long_spectral_type" function of binary_c - (based on Jaschek+Jaschek's Teff-spectral type table). + prob = prob / (0.1202462 * m * math.log(10)) + return prob - Rob then fitted the result + ######################################################################## + # Binary fractions + ######################################################################## - Args: - m: mass to evaluate the distribution at + def Arenou2010_binary_fraction(self, m: Union[int, float]) -> Union[int, float]: + """ + Arenou 2010 function for the binary fraction as f(M1) - Returns: - binary fraction at m - """ + GAIA-C2-SP-OPM-FA-054 + www.rssd.esa.int/doc_fetch.php?id=2969346 - return min( - 1.0, - max( - (m ** 0.1) * (5.12310e-01) + (-1.02070e-01), - (1.10450e00) * (m ** (4.93670e-01)) + (-6.95630e-01), - ), - ) + Args: + m: mass to evaluate the distribution at + Returns: + binary fraction at m + """ -# print(raghavan2010_binary_fraction(2)) + return 0.8388 * math.tanh(0.688 * m + 0.079) -######################################################################## -# Period distributions -######################################################################## + def raghavan2010_binary_fraction(self, m: Union[int, float]) -> Union[int, float]: + """ + Fit to the Raghavan 2010 binary fraction as a function of + spectral type (Fig 12). Valid for local stars (Z=Zsolar). + The spectral type is converted mass by use of the ZAMS + effective temperatures from binary_c/BSE (at Z=0.02) + and the new "long_spectral_type" function of binary_c + (based on Jaschek+Jaschek's Teff-spectral type table). -def duquennoy1991(logper: Union[int, float]) -> Union[int, float]: - """ - Period distribution from Duquennoy + Mayor 1991. Evaluated the function gaussian(logper, 4.8, 2.3, -2, 12) + Rob then fitted the result - Args: - logper: logarithm of period to evaluate the distribution at + Args: + m: mass to evaluate the distribution at - Returns: - 'probability' at gaussian(logper, 4.8, 2.3, -2, 12) - """ - return gaussian(logper, 4.8, 2.3, -2, 12) - - -def sana12( - M1: Union[int, float], - M2: Union[int, float], - a: Union[int, float], - P: Union[int, float], - amin: Union[int, float], - amax: Union[int, float], - x0: Union[int, float], - x1: Union[int, float], - p: Union[int, float], -) -> Union[int, float]: - """ - distribution of initial orbital periods as found by Sana et al. (2012) - which is a flat distribution in ln(a) and ln(P) respectively for stars - * less massive than 15Msun (no O-stars) - * mass ratio q=M2/M1<0.1 - * log(P)<0.15=x0 and log(P)>3.5=x1 - and is be given by dp/dlogP ~ (logP)^p for all other binary configurations (default p=-0.55) - - arguments are M1, M2, a, Period P, amin, amax, x0=log P0, x1=log P1, p - - example args: 10, 5, sep(M1, M2, P), sep, ?, -2, 12, -0.55 - - # TODO: Fix this function! - - Args: - M1: Mass of primary - M2: Mass of secondary - a: separation of binary - P: period of binary - amin: minimum separation of the distribution (lower bound of the range) - amax: maximum separation of the distribution (upper bound of the range) - x0: log of minimum period of the distribution (lower bound of the range) - x1: log of maximum period of the distribution (upper bound of the range) - p: slope of the distribution - - Returns: - 'probability' of orbital period P given the other parameters - """ + Returns: + binary fraction at m + """ - res = 0 - if (M1 < 15.0) or (M2 / M1 < 0.1): - res = 1.0 / (math.log(amax) - math.log(amin)) - else: - p1 = 1.0 + p - - # For more details see the LyX document of binary_c for this distribution - # where the variables and normalisations are given - # we use the notation x=log(P), xmin=log(Pmin), x0=log(P0), ... to determine the - x = LOG_LN_CONVERTER * math.log(P) - xmin = LOG_LN_CONVERTER * math.log(calc_period_from_sep(M1, M2, amin)) - xmax = LOG_LN_CONVERTER * math.log(calc_period_from_sep(M1, M2, amax)) - - # print("M1 M2 amin amax P x xmin xmax") - # print(M1, M2, amin, amax, P, x, xmin, xmax) - # my $x0 = 0.15; - # my $x1 = 3.5; - - A1 = 1.0 / ( - x0 ** p * (x0 - xmin) + (x1 ** p1 - x0 ** p1) / p1 + x1 ** p * (xmax - x1) + return min( + 1.0, + max( + (m ** 0.1) * (5.12310e-01) + (-1.02070e-01), + (1.10450e00) * (m ** (4.93670e-01)) + (-6.95630e-01), + ), ) - A0 = A1 * x0 ** p - A2 = A1 * x1 ** p - if x < x0: - res = 3.0 / 2.0 * LOG_LN_CONVERTER * A0 - elif x > x1: - res = 3.0 / 2.0 * LOG_LN_CONVERTER * A2 + ######################################################################## + # Period distributions + ######################################################################## + + def duquennoy1991(self, logper: Union[int, float]) -> Union[int, float]: + """ + Period distribution from Duquennoy + Mayor 1991. Evaluated the function self.gaussian(logper, 4.8, 2.3, -2, 12) + + Args: + logper: logarithm of period to evaluate the distribution at + + Returns: + 'probability' at self.gaussian(logper, 4.8, 2.3, -2, 12) + """ + return self.gaussian(logper, 4.8, 2.3, -2, 12) + + def sana12( + self, + M1: Union[int, float], + M2: Union[int, float], + a: Union[int, float], + P: Union[int, float], + amin: Union[int, float], + amax: Union[int, float], + x0: Union[int, float], + x1: Union[int, float], + p: Union[int, float], + ) -> Union[int, float]: + """ + distribution of initial orbital periods as found by Sana et al. (2012) + which is a flat distribution in ln(a) and ln(P) respectively for stars + * less massive than 15Msun (no O-stars) + * mass ratio q=M2/M1<0.1 + * log(P)<0.15=x0 and log(P)>3.5=x1 + and is be given by dp/dlogP ~ (logP)^p for all other binary configurations (default p=-0.55) + + arguments are M1, M2, a, Period P, amin, amax, x0=log P0, x1=log P1, p + + example args: 10, 5, sep(M1, M2, P), sep, ?, -2, 12, -0.55 + + # TODO: Fix this function! Half of the input here can be taken out and calculated within the function itself. + + Args: + M1: Mass of primary + M2: Mass of secondary + a: separation of binary + P: period of binary + amin: minimum separation of the distribution (lower bound of the range) + amax: maximum separation of the distribution (upper bound of the range) + x0: log of minimum period of the distribution (lower bound of the range) + x1: log of maximum period of the distribution (upper bound of the range) + p: slope of the distribution + + Returns: + 'probability' of orbital period P given the other parameters + """ + + res = 0 + if (M1 < 15.0) or (M2 / M1 < 0.1): + res = 1.0 / (math.log(amax) - math.log(amin)) else: - res = 3.0 / 2.0 * LOG_LN_CONVERTER * A1 * x ** p - - return res - - -# print(sana12(10, 2, 10, 100, 1, 1000, math.log(10), math.log(1000), 6)) + p1 = 1.0 + p + + # For more details see the LyX document of binary_c for this distribution + # where the variables and normalisations are given + # we use the notation x=log(P), xmin=log(Pmin), x0=log(P0), ... to determine the + x = LOG_LN_CONVERTER * math.log(P) + xmin = LOG_LN_CONVERTER * math.log(calc_period_from_sep(M1, M2, amin)) + xmax = LOG_LN_CONVERTER * math.log(calc_period_from_sep(M1, M2, amax)) + + A1 = 1.0 / ( + x0 ** p * (x0 - xmin) + + (x1 ** p1 - x0 ** p1) / p1 + + x1 ** p * (xmax - x1) + ) + A0 = A1 * x0 ** p + A2 = A1 * x1 ** p + if x < x0: + res = A0 + elif x > x1: + res = A2 + else: + res = A1 * x ** p + res *= 3.0 / 2.0 * LOG_LN_CONVERTER + return res -def interpolate_in_mass_izzard2012( - M: Union[int, float], high: Union[int, float], low: Union[int, float] -) -> Union[int, float]: - """ - Function to interpolate in mass + def interpolate_in_mass_izzard2012( + self, M: Union[int, float], high: Union[int, float], low: Union[int, float] + ) -> Union[int, float]: + """ + Function to interpolate in mass - TODO: fix this function. - TODO: describe the args - high: at M=16.3 - low: at 1.15 + TODO: fix this function. + TODO: describe the args + high: at M=16.3 + low: at 1.15 - Args: - M: mass - high: - low: + Args: + M: mass + high: + low: - Returns: + Returns: - """ + """ - log_interpolation = False + log_interpolation = False - if log_interpolation: - return (high - low) / (math.log10(16.3) - math.log10(1.15)) * ( - math.log10(M) - math.log10(1.15) - ) + low - else: + if log_interpolation: + return (high - low) / (math.log10(16.3) - math.log10(1.15)) * ( + math.log10(M) - math.log10(1.15) + ) + low return (high - low) / (16.3 - 1.15) * (M - 1.15) + low + def Izzard2012_period_distribution( + self, + P: Union[int, float], + M1: Union[int, float], + log10Pmin: Union[int, float] = -1.0, + ) -> Union[int, float]: + """ + period distribution which interpolates between + Duquennoy and Mayor 1991 at low mass (G/K spectral type <~1.15Msun) + and Sana et al 2012 at high mass (O spectral type >~16.3Msun) + + This gives dN/dlogP, i.e. DM/Raghavan's Gaussian in log10P at low mass + and Sana's power law (as a function of logP) at high mass + + TODO: fix this function + + Args: + P: period + M1: Primary star mass + log10Pmin: minimum period in base log10 (optional) + + Returns: + 'probability' of interpolated distribution function at P and M1 + + """ + + # Check if there is input and force it to be at least 1 + log10Pmin = max(-1.0, log10Pmin) + + # save mass input and limit mass used (M1 from now on) to fitted range + Mwas = M1 + M1 = max(1.15, min(16.3, M1)) + + # Calculate the normalisations + # need to normalise the distribution for this mass + # (and perhaps secondary mass) + prepare_dict(distribution_constants, ["Izzard2012", M1]) + if not distribution_constants["Izzard2012"][M1].get(log10Pmin): + distribution_constants["Izzard2012"][M1][ + log10Pmin + ] = 1 # To prevent this loop from going recursive + N = 200.0 # Resolution for normalisation. I hope 1000 is enough + dlP = (10.0 - log10Pmin) / N + C = 0 # normalisation constant. + for lP in np.arange(log10Pmin, 10, dlP): + C += dlP * self.Izzard2012_period_distribution(10 ** lP, M1, log10Pmin) + + distribution_constants["Izzard2012"][M1][log10Pmin] = 1.0 / C + + lP = math.log10(P) + # log period + + # # fits + mu = self.interpolate_in_mass_izzard2012(M1, -17.8, 5.03) + sigma = self.interpolate_in_mass_izzard2012(M1, 9.18, 2.28) + K = self.interpolate_in_mass_izzard2012(M1, 6.93e-2, 0.0) + nu = self.interpolate_in_mass_izzard2012(M1, 0.3, -1) + g = 1.0 + 1e-30 ** (lP - nu) + lPmu = lP - mu + + if (lP < log10Pmin) or (lP > 10.0): + return 0 -def Izzard2012_period_distribution( - P: Union[int, float], M1: Union[int, float], log10Pmin: Union[int, float] = -1.0 -) -> Union[int, float]: - """ - period distribution which interpolates between - Duquennoy and Mayor 1991 at low mass (G/K spectral type <~1.15Msun) - and Sana et al 2012 at high mass (O spectral type >~16.3Msun) - - This gives dN/dlogP, i.e. DM/Raghavan's Gaussian in log10P at low mass - and Sana's power law (as a function of logP) at high mass - - TODO: fix this function + return ( + distribution_constants["Izzard2012"][M1][log10Pmin] + * (math.exp(-lPmu * lPmu / (2.0 * sigma * sigma)) + K / max(0.1, lP)) + / g + ) - Args: - P: period - M1: Primary star mass - log10Pmin: minimum period in base log10 (optional) + ######################################################################## + # Mass ratio distributions + ######################################################################## + def flatsections(self, x: float, opts: dict) -> Union[float, int]: + """ + Function to generate flat distributions, possibly in multiple sections + + Args: + x: mass ratio value + opts: list containing the flat sections. Which are themselves dictionaries, with keys "max": upper bound, "min": lower bound and "height": value + + Returns: + probability of that mass ratio. + """ + + c = 0 + y = 0 + for opt in opts: + dc = (opt["max"] - opt["min"]) * opt["height"] + c += dc + if opt["min"] <= x <= opt["max"]: + y = opt["height"] + y /= c + return y + + ######################################################################## + # Eccentricity distributions + ######################################################################## + + ######################################################################## + # Star formation histories + ######################################################################## + + def cosmic_SFH_madau_dickinson2014(self, z): + """ + Cosmic star formation history distribution from Madau & Dickonson 2014 (https://arxiv.org/pdf/1403.0007.pdf) + + Args: + z: redshift + + Returns: + Cosmic star formation rate in Solar mass year^-1 mega parsec^-3 + """ + + CSFH = 0.015 * ((1 + z) ** 2.7) / (1 + (((1 + z) / 2.9) ** 5.6)) + return CSFH + + ######################################################################## + # Metallicity distributions + ######################################################################## + + ######################################################################## + # Moe & DiStefano 2017 functions + # + # The code below are functions that are used to set up and interpolate + # on the Moe & DiStefano 2017 data. The interpolators take the last + # known value if we try to interpolate outside of the tables. + # There are still some open tasks and improvements that can be made: + # + # TODO: Parallelize the setting up of the interpolators + # TODO: Generalise the code such that we can input other/newer tables. + ######################################################################## - Returns: - 'probability' of interpolated distribution function at P and M1 + def poisson(self, lambda_val, n, nmax=None): + """ + Function that calculates the Poisson value and normalises + TODO: improve the description + """ - """ + cachekey = "{} {} {}".format(lambda_val, n, nmax) - # Check if there is input and force it to be at least 1 - log10Pmin = max(-1.0, log10Pmin) - - # save mass input and limit mass used (M1 from now on) to fitted range - Mwas = M1 - M1 = max(1.15, min(16.3, M1)) - # print("Izzard2012 called for M={} (truncated to {}), P={}\n".format(Mwas, M1, P)) - - # Calculate the normalisations - # need to normalise the distribution for this mass - # (and perhaps secondary mass) - prepare_dict(distribution_constants, ["Izzard2012", M1]) - if not distribution_constants["Izzard2012"][M1].get(log10Pmin): - distribution_constants["Izzard2012"][M1][ - log10Pmin - ] = 1 # To prevent this loop from going recursive - N = 200.0 # Resolution for normalisation. I hope 1000 is enough - dlP = (10.0 - log10Pmin) / N - C = 0 # normalisation constant. - # print("LOOP",log10Pmin) - for lP in np.arange(log10Pmin, 10, dlP): - C += dlP * Izzard2012_period_distribution(10 ** lP, M1, log10Pmin) - - distribution_constants["Izzard2012"][M1][log10Pmin] = 1.0 / C - # print( - # "Normalisation constant for Izzard2012 M={} (log10Pmin={}) is\ - # {}\n".format( - # M1, log10Pmin, distribution_constants["Izzard2012"][M1][log10Pmin] - # ) - # ) + if distribution_constants.get("poisson_cache", None): + if distribution_constants["poisson_cache"].get(cachekey, None): + p_val = distribution_constants["poisson_cache"][cachekey] + return p_val - lP = math.log10(P) - # log period + # Poisson distribution : note, n can be zero + # + # nmax is the truncation : if set, we normalise + # correctly. + p_val = self._poisson(lambda_val, n) + + if nmax: + I_poisson = 0 + for i in range(nmax + 1): + I_poisson += self._poisson(lambda_val, i) + p_val /= I_poisson + + # Add to cache + if not distribution_constants.get("poisson_cache", None): + distribution_constants["poisson_cache"] = {} + distribution_constants["poisson_cache"][cachekey] = p_val + + return p_val + + def _poisson(self, lambda_val, n): + """ + Function to return the Poisson value + """ + return (lambda_val ** n) * np.exp(-lambda_val) / (1.0 * math.factorial(n)) + + def get_max_multiplicity(self, multiplicity_array): + """ + Function to get the maximum multiplicity + """ + max_multiplicity = 0 + for n in range(4): + if multiplicity_array[n] > 0: + max_multiplicity = n + 1 + return max_multiplicity + + def merge_multiplicities(self, result_array, max_multiplicity, verbosity=0): + """ + Function to fold the multiplicities higher than the max_multiplicity onto the max_multiplicity + + if max_multiplicity == 1: + All the multiplicities are folded onto multiplicity == 1. This will always total to 1 + if max_multiplicity == 2: + The multiplicity fractions of the triple and quadruples are folded onto that of the binary multiplicity fraction + if max_multiplicity == 3: + The multiplicity fractions of the quadruples are folded onto that of the triples + """ + + if not max_multiplicity in range(1, 5): + msg = "\tMoe and di Stefano 2017: merge_multiplicities: max_multiplicity has to be between 1 and 4. It is {} now".format( + max_multiplicity + ) + verbose_print( + msg, + verbosity, + 0, + ) + raise ValueError(msg) - # # fits - mu = interpolate_in_mass_izzard2012(M1, -17.8, 5.03) - sigma = interpolate_in_mass_izzard2012(M1, 9.18, 2.28) - K = interpolate_in_mass_izzard2012(M1, 6.93e-2, 0.0) - nu = interpolate_in_mass_izzard2012(M1, 0.3, -1) - g = 1.0 / (1.0 + 1e-30 ** (lP - nu)) + # Fold multiplicities: + verbose_print( + "\tMoe and di Stefano 2017: merge_multiplicities: Merging multiplicities with initial array {} and max multiplicity {}".format( + result_array, max_multiplicity + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + for i in range(max_multiplicity, len(result_array))[::-1]: + result_array[i - 1] += result_array[i] + result_array[i] = 0 + verbose_print( + "\tMoe and di Stefano 2017: merge_multiplicities: Merging multiplicities to new array {}".format( + result_array + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - lPmu = lP - mu - # print( - # "M={} ({}) P={} : mu={} sigma={} K={} nu={} norm=%g\n".format( - # Mwas, M1, P, mu, sigma, K, nu - # ) - # ) + return result_array - # print "FUNC $distdata{Izzard2012}{$M}{$log10Pmin} * (exp(- (x-$mu)**2/(2.0*$sigma*$sigma) ) + $K/MAX(0.1,$lP)) * $g;\n"; + def Moe_di_Stefano_2017_multiplicity_fractions(self, options, verbosity=0): + """ + Function that creates a list of probability fractions and + normalises and merges them according to the users choice. - if (lP < log10Pmin) or (lP > 10.0): - return 0 + TODO: make an extrapolation functionality in this. log10(1.6e1) + is quite low. - else: - return ( - distribution_constants["Izzard2012"][M1][log10Pmin] - * (math.exp(-lPmu * lPmu / (2.0 * sigma * sigma)) + K / max(0.1, lP)) - * g - ) + The default result that is returned when sampling the mass outside + of the mass range is now the last known value + Returns a list of multiplicity fractions for a given input of mass + """ -######################################################################## -# Mass ratio distributions -######################################################################## + # Use the global Moecache + global Moecache + multiplicity_modulator_array = np.array( + options["multiplicity_modulator"] + ) # Modulator array -def flatsections(x: float, opts: dict) -> Union[float, int]: - """ - Function to generate flat distributions, possibly in multiple sections + # Check for length + if len(multiplicity_modulator_array) != 4: + msg = "Multiplicity modulator has to have 4 elements. Now it is {}, len: {}".format( + multiplicity_modulator_array, len(multiplicity_modulator_array) + ) + verbose_print( + msg, + verbosity, + 0, + ) + raise ValueError(msg) - Args: - x: mass ratio value - opts: list containing the flat sections. Which are themselves dictionaries, with keys "max": upper bound, "min": lower bound and "height": value + # Set up some arrays + full_fractions_array = np.zeros(4) # Meant to contain the real fractions + weighted_fractions_array = np.zeros( + 4 + ) # Meant to contain the fractions multiplied by the multiplicity modulator + + # Get max multiplicity + max_multiplicity = self.get_max_multiplicity(multiplicity_modulator_array) + + # ... it's better to interpolate the multiplicity and then + # use a Poisson distribution to calculate the fractions + # (this is more accurate) + + # Set up the multiplicity interpolator + if not Moecache.get("rinterpolator_multiplicity", None): + Moecache["rinterpolator_multiplicity"] = py_rinterpolate.Rinterpolate( + table=Moecache["multiplicity_table"], # Contains the table of data + nparams=1, # logM1 + ndata=4, # The number of datapoints (the parameters that we want to interpolate) + verbosity=verbosity - (_MOE2017_VERBOSITY_INTERPOLATOR_LEVEL - 1), + ) - Returns: - probability of that mass ratio. - """ + if options["multiplicity_model"] == "Poisson": + multiplicity = Moecache["rinterpolator_multiplicity"].interpolate( + [np.log10(options["M_1"])] + )[0] - c = 0 - y = 0 + # Fill the multiplicity array + for n in range(4): + full_fractions_array[n] = self.poisson(multiplicity, n, 3) - for opt in opts: - dc = (opt["max"] - opt["min"]) * opt["height"] - # print("added flatsection ({}-{})*{} = {}\n".format( - # opt['max'], opt['min'], opt['height'], dc)) - c += dc - if opt["min"] <= x <= opt["max"]: - y = opt["height"] - # print("Use this\n") + # Normalize it so it fills to one when taking all the multiplicities: + full_fractions_array = full_fractions_array / np.sum(full_fractions_array) - c = 1.0 / c - y = y * c + verbose_print( + "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: using model {}: full_fractions_array: {}".format( + "Poisson", full_fractions_array + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - # print("flatsections gives C={}: y={}\n",c,y) - return y + elif options["multiplicity_model"] == "data": + # use the fractions calculated from Moe's data directly + # + # note that in this case, there are no quadruples: these + # are combined with triples + # Fill with the raw values + for n in range(3): + full_fractions_array[n] = Moecache[ + "rinterpolator_multiplicity" + ].interpolate([np.log10(options["M_1"])])[n + 1] -# print(flatsections(1, [{'min': 0, 'max': 2, 'height': 3}])) + # Set last value + full_fractions_array[3] = 0.0 # no quadruples + verbose_print( + "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: using model {}: full_fractions_array: {}".format( + "data", full_fractions_array + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) -######################################################################## -# Eccentricity distributions -######################################################################## + # Normalisation: + if options["normalize_multiplicities"] == "raw": + # Don't multiply by the multiplicity_array, but do give a fractions array + verbose_print( + "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: Not normalising (using raw results): results: {}".format( + full_fractions_array + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + result = full_fractions_array -######################################################################## -# Star formation histories -######################################################################## + elif options["normalize_multiplicities"] == "norm": + # Multiply the full_multiplicity_fraction array by the multiplicity_multiplier_array, creating a weighted fractions array + weighted_fractions_array = ( + full_fractions_array * multiplicity_modulator_array + ) + # Normalise this so it is in total 1: + result = weighted_fractions_array / np.sum(weighted_fractions_array) -def cosmic_SFH_madau_dickinson2014(z): - """ - Cosmic star formation history distribution from Madau & Dickonson 2014 (https://arxiv.org/pdf/1403.0007.pdf) + verbose_print( + "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: Normalising with {}. result: {}".format( + "norm", result + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - Args: - z: redshift + elif options["normalize_multiplicities"] == "merge": + # We first take the full multiplicity array + # (i.e. not multiplied by multiplier) and do the merging + result = self.merge_multiplicities( + full_fractions_array, max_multiplicity, verbosity=verbosity + ) - Returns: - Cosmic star formation rate in Solar mass year^-1 mega parsec^-3 - """ + # Then normalise to be sure + result = result / np.sum(result) - CSFH = 0.015 * ((1 + z) ** 2.7) / (1 + (((1 + z) / 2.9) ** 5.6)) + verbose_print( + "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: Normalising with {}, max_multiplicity={} result={}".format( + "merge", max_multiplicity, result + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - return CSFH + verbose_print( + "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: {}".format( + str(result) + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + # return array reference + return result -######################################################################## -# Metallicity distributions -######################################################################## + def build_q_table(self, options, m, p, verbosity=0): + """ + Build an interpolation table for q, given a mass and + orbital period. + m and p are labels which determine which system(s) + to look up from Moe's data: -######################################################################## -# Moe & DiStefano 2017 functions -# -# The code below are functions that are used to set up and interpolate -# on the Moe & DiStefano 2017 data. The interpolators take the last -# known value if we try to interpolate outside of the tables. -# There are still some open tasks and improvements that can be made: -# -# TODO: Solve the memory issues that are present. -# Are the interpolators not cleaned? -# TODO: Parallelize the setting up of the interpolators -# TODO: Generalise the code such that we can input other/newer tables + m can be M1, M2, M3, M4, or if set M1+M2 etc. + p can be P, P2, P3 -######################################################################## + The actual values are in $opts: -import py_rinterpolate + mass is in $opts->{m} + period is $opts->{p} -# Global dictionary to store values in -Moecache = {} + Since the information from the table for Moe and di Stefano 2017 is independent of any choice we make, + we need to take into account that for example our choice of minimum mass leads to + a minimum q_min that is not the same as in the table + We should ignore those parts of the table and renormalise. + If we are below the lowest value of qmin in the table we need to extrapolate the data + Anyway, the goal of this function is to provide some extrapolated values for q when we should sample outside of the boundaries + TODO: fix description to be correct for python + """ -def poisson(lambda_val, n, nmax=None, verbosity=0): - """ - Function that calculates the Poisson value and normalises - TODO: improve the description - """ + # We can check if we have a cached value for this already: + # TODO: fix this cache check. + incache = False + if Moecache.get("rinterpolator_q_metadata", None): + if (Moecache["rinterpolator_q_metadata"].get(m, None)) and ( + Moecache["rinterpolator_q_metadata"].get(p, None) + ): + if (Moecache["rinterpolator_q_metadata"][m] == options[m]) and ( + Moecache["rinterpolator_q_metadata"][p] == options[p] + ): + incache = True - cachekey = "{} {} {}".format(lambda_val, n, nmax) + verbose_print( + "\tMoe and di Stefano 2017: build_q_table: Found cached values for m={} p={}".format( + options[m], options[p] + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + else: + verbose_print( + "\tMoe and di Stefano 2017: build_q_table: Cached values for different m={} p={}. Freeing current table and making new table".format( + options[m], options[p] + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - if distribution_constants.get("poisson_cache", None): - if distribution_constants["poisson_cache"].get(cachekey, None): - p_val = distribution_constants["poisson_cache"][cachekey] + # + if not incache: + # trim and/or expand the table to the range qmin to qmax. + # qmin is set by the minimum stellar mass + qmin = options["Mmin"] / options["M_1"] verbose_print( - "\tMoe and di Stefano 2017: found cached value for poisson({}, {}, {}): {}".format( - lambda_val, n, nmax, p_val + "\tMoe and di Stefano 2017: build_q_table qmin: {}".format( + qmin, ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - return p_val + # qmax = maximum_mass_ratio_for_RLOF(options[m], options[p]) + # TODO: change this to the above + qmax = 1 - # Poisson distribution : note, n can be zero - # - # nmax is the truncation : if set, we normalise - # correctly. - p_val = _poisson(lambda_val, n) - - if nmax: - I_poisson = 0 - for i in range(nmax + 1): - I_poisson += _poisson(lambda_val, i) - p_val = p_val / I_poisson - - # Add to cache - if not distribution_constants.get("poisson_cache", None): - distribution_constants["poisson_cache"] = {} - distribution_constants["poisson_cache"][cachekey] = p_val - - verbose_print( - "\tMoe and di Stefano 2017: Poisson({}, {}, {}): {}".format( - lambda_val, n, nmax, p_val - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - return p_val + # qdata contains the table that we modify: we get + # the original data by interpolating Moe's table + qdata = {} + can_renormalize = 1 + qeps = 1e-8 # small number but such that qeps+1 != 1 + if qeps + 1 == 1.0: + verbose_print( + "\tMoe and di Stefano 2017: build_q_table: qeps (= {}) +1 == 1. Make qeps larger".format( + qeps, + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) -def _poisson(lambda_val, n): - """ - Function to return the Poisson value - """ + if qmin >= qmax: + # there may be NO binaries in this part of the parameter space: + # in which case, set up a table with lots of zero in it - return (lambda_val ** n) * np.exp(-lambda_val) / (1.0 * math.factorial(n)) + qdata = {0: 0, 1: 0} + can_renormalize = 0 + else: + # qmin and qmax mean we'll get something non-zero + can_renormalize = 1 -def get_max_multiplicity(multiplicity_array): - """ - Function to get the maximum multiplicity - """ - - max_multiplicity = 0 - for n in range(4): - if multiplicity_array[n] > 0: - max_multiplicity = n + 1 - return max_multiplicity - - -def merge_multiplicities(result_array, max_multiplicity, verbosity=0): - """ - Function to fold the multiplicities higher than the max_multiplicity onto the max_multiplicity - - if max_multiplicity == 1: - All the multiplicities are folded onto multiplicity == 1. This will always total to 1 - if max_multiplicity == 2: - The multiplicity fractions of the triple and quadruples are folded onto that of the binary multiplicity fraction - if max_multiplicity == 3: - The multiplicity fractions of the quadruples are folded onto that of the triples - """ - - if not max_multiplicity in range(1, 5): - msg = "\tMoe and di Stefano 2017: merge_multiplicities: max_multiplicity has to be between 1 and 4. It is {} now".format( - max_multiplicity - ) - verbose_print( - msg, - verbosity, - 0, - ) - raise ValueError(msg) - - # Fold multiplicities: - verbose_print( - "\tMoe and di Stefano 2017: merge_multiplicities: Merging multiplicities with initial array {} and max multiplicity {}".format( - result_array, max_multiplicity - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - for i in range(max_multiplicity, len(result_array))[::-1]: - result_array[i - 1] += result_array[i] - result_array[i] = 0 - verbose_print( - "\tMoe and di Stefano 2017: merge_multiplicities: Merging multiplicities to new array {}".format( - result_array - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - return result_array - - -def normalize_dict(result_dict, verbosity=0): - """ - Function to normalise a dictionary - """ - - sum_result = sum([result_dict[key] for key in result_dict.keys()]) - for key in result_dict.keys(): - result_dict[key] = result_dict[key] / sum_result - return result_dict - - -def Moe_di_Stefano_2017_multiplicity_fractions(options, verbosity=0): - """ - Function that creates a list of probability fractions and - normalises and merges them according to the users choice. - - TODO: make an extrapolation functionality in this. log10(1.6e1) - is low, we can probably go a bit further - - The default result that is returned when sampling the mass outside - of the mass range is now the last known value - - Returns a list of multiplicity fractions for a given input of mass - """ - - # Use the global Moecache - global Moecache - - multiplicity_modulator_array = np.array( - options["multiplicity_modulator"] - ) # Modulator array - - # Check for length - if not len(multiplicity_modulator_array) == 4: - msg = "Multiplicity modulator has to have 4 elements. Now it is {}, len: {}".format( - multiplicity_modulator_array, len(multiplicity_modulator_array) - ) - verbose_print( - msg, - verbosity, - 0, - ) - raise ValueError(msg) - - # Set up some arrays - full_fractions_array = np.zeros(4) # Meant to contain the real fractions - weighted_fractions_array = np.zeros( - 4 - ) # Meant to contain the fractions multiplied by the multiplicity modulator - - # Get max multiplicity - max_multiplicity = get_max_multiplicity(multiplicity_modulator_array) - - # ... it's better to interpolate the multiplicity and then - # use a Poisson distribution to calculate the fractions - # (this is more accurate) - - # Set up the multiplicity interpolator - if not Moecache.get("rinterpolator_multiplicity", None): - Moecache["rinterpolator_multiplicity"] = py_rinterpolate.Rinterpolate( - table=Moecache["multiplicity_table"], # Contains the table of data - nparams=1, # logM1 - ndata=4, # The number of datapoints (the parameters that we want to interpolate) - verbosity=verbosity - (_MOE2017_VERBOSITY_INTERPOLATOR_LEVEL - 1), - ) - - if options["multiplicity_model"] == "Poisson": - multiplicity = Moecache["rinterpolator_multiplicity"].interpolate( - [np.log10(options["M_1"])] - )[0] - - # Fill the multiplicity array - for n in range(4): - full_fractions_array[n] = poisson(multiplicity, n, 3, verbosity) - - # Normalize it so it fills to one when taking all the multiplicities: - full_fractions_array = full_fractions_array / np.sum(full_fractions_array) - - verbose_print( - "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: using model {}: full_fractions_array: {}".format( - "Poisson", full_fractions_array - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - elif options["multiplicity_model"] == "data": - # use the fractions calculated from Moe's data directly - # - # note that in this case, there are no quadruples: these - # are combined with triples - - # Fill with the raw values - for n in range(3): - full_fractions_array[n] = Moecache[ - "rinterpolator_multiplicity" - ].interpolate([np.log10(options["M_1"])])[n + 1] - - # Set last value - full_fractions_array[3] = 0.0 # no quadruples - verbose_print( - "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: using model {}: full_fractions_array: {}".format( - "data", full_fractions_array - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - # Normalisation: - if options["normalize_multiplicities"] == "raw": - # Don't multiply by the multiplicity_array, but do give a fractions array - verbose_print( - "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: Not normalising (using raw results): results: {}".format( - full_fractions_array - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - result = full_fractions_array - - elif options["normalize_multiplicities"] == "norm": - # Multiply the full_multiplicity_fraction array by the multiplicity_multiplier_array, creating a weighted fractions array - weighted_fractions_array = full_fractions_array * multiplicity_modulator_array - - # Normalise this so it is in total 1: - result = weighted_fractions_array / np.sum(weighted_fractions_array) - - verbose_print( - "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: Normalising with {}. result: {}".format( - "norm", result - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - elif options["normalize_multiplicities"] == "merge": - # We first take the full multiplicity array - # (i.e. not multiplied by multiplier) and do the merging - result = merge_multiplicities( - full_fractions_array, max_multiplicity, verbosity=verbosity - ) - - # Then normalise to be sure - result = result / np.sum(result) - - verbose_print( - "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: Normalising with {}, max_multiplicity={} result={}".format( - "merge", max_multiplicity, result - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - verbose_print( - "\tMoe and di Stefano 2017: Moe_di_Stefano_2017_multiplicity_fractions: {}".format( - str(result) - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - # return array reference - return result - - -def build_q_table(options, m, p, verbosity=0): - ############################################################ - # - # Build an interpolation table for q, given a mass and - # orbital period. - # - # $m and $p are labels which determine which system(s) - # to look up from Moe's data: - # - # $m can be M1, M2, M3, M4, or if set M1+M2 etc. - # $p can be P, P2, P3 - # - # The actual values are in $opts: - # - # mass is in $opts->{$m} - # period is $opts->{$p} - # - # Since the information from the table for Moe and di Stefano 2017 is independent of any choice we make, - # we need to take into account that for example our choice of minimum mass leads to - # a minimum q_min that is not the same as in the table - # We should ignore those parts of the table and renormalise. - # If we are below the lowest value of qmin in the table we need to extrapolate the data - # - # Anyway, the goal of this function is to provide some extrapolated values for q when we should sample outside of the boundaries - ############################################################ - - # We can check if we have a cached value for this already: - # TODO: fix this cache check. - incache = False - if Moecache.get("rinterpolator_q_metadata", None): - if (Moecache["rinterpolator_q_metadata"].get(m, None)) and ( - Moecache["rinterpolator_q_metadata"].get(p, None) - ): - if (Moecache["rinterpolator_q_metadata"][m] == options[m]) and ( - Moecache["rinterpolator_q_metadata"][p] == options[p] - ): - incache = True - - verbose_print( - "\tMoe and di Stefano 2017: build_q_table: Found cached values for m={} p={}".format( - options[m], options[p] - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - else: - verbose_print( - "\tMoe and di Stefano 2017: build_q_table: Cached values for different m={} p={}. Freeing current table and making new table".format( - options[m], options[p] - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) + # require extrapolation sets whether we need to extrapolate + # at the low and high ends + require_extrapolation = {} - # - if not incache: - # trim and/or expand the table to the range $qmin to $qmax. - - # qmin is set by the minimum stellar mass : below this - # the companions are planets - # qmin = options["ranges"]["M"][ - # 0 - # ] # TODO: this lower range must not be lower than Mmin. - - qmin = options["Mmin"] / options["M_1"] - verbose_print( - "\tMoe and di Stefano 2017: build_q_table qmin: {}".format( - qmin, - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - # qmax = maximum_mass_ratio_for_RLOF(options[m], options[p]) - # TODO: change this to the above - qmax = 1 - - # qdata contains the table that we modify: we get - # the original data by interpolating Moe's table - qdata = {} - can_renormalize = 1 - - qeps = 1e-8 # small number but such that qeps+1 != 1 - if qeps + 1 == 1.0: - verbose_print( - "\tMoe and di Stefano 2017: build_q_table: qeps (= {}) +1 == 1. Make qeps larger".format( - qeps, - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - if qmin >= qmax: - # there may be NO binaries in this part of the parameter space: - # in which case, set up a table with lots of zero in it - - qdata = {0: 0, 1: 0} - can_renormalize = 0 - - else: - # qmin and qmax mean we'll get something non-zero - can_renormalize = 1 - - # require extrapolation sets whether we need to extrapolate - # at the low and high ends - require_extrapolation = {} - - if qmin >= 0.15: - # qmin is inside Moe's table : this is easy, - # we just keep points from qmin at the low - # end to qmax at the high end. - require_extrapolation["low"] = 0 - require_extrapolation[ - "high" - ] = 1 # TODO: shouldn't the extrapolation need to happen if qmax > 0.95 - qdata[qmin] = Moecache["rinterpolator_q"].interpolate( - [np.log10(options[m]), np.log10(options[p]), qmin] - )[0] + if qmin >= 0.15: + # qmin is inside Moe's table : this is easy, + # we just keep points from qmin at the low + # end to qmax at the high end. + require_extrapolation["low"] = 0 + require_extrapolation[ + "high" + ] = 1 # TODO: shouldn't the extrapolation need to happen if qmax > 0.95 + qdata[qmin] = Moecache["rinterpolator_q"].interpolate( + [np.log10(options[m]), np.log10(options[p]), qmin] + )[0] - for q in np.arange(0.15, 0.950001, 0.1): - if (q >= qmin) and (q <= qmax): - qdata[q] = Moecache["rinterpolator_q"].interpolate( - [np.log10(options[m]), np.log10(options[p]), q] - )[0] - else: - require_extrapolation["low"] = 1 - require_extrapolation["high"] = 1 - if qmax < 0.15: - # qmax < 0.15 which is off the edge - # of the table. In this case, choose - # two points at q=0.15 and 0.16 and interpolate - # at these in case we want to extrapolate. - for q in [0.15, 0.16]: - qdata[q] = Moecache["rinterpolator_q"].interpolate( - [np.log10(options[m]), np.log10(options[p]), q] - )[0] + for q in np.arange(0.15, 0.950001, 0.1): + if qmin<= q <= qmax: + qdata[q] = Moecache["rinterpolator_q"].interpolate( + [np.log10(options[m]), np.log10(options[p]), q] + )[0] else: - # qmin < 0.15 and qmax > 0.15, so we - # have to generate Moe's table for - # q = 0.15 (i.e. 0.1 to 0.2) to 0.95 (0.9 to 1) - # as a function of M1 and orbital period, - # to obtain the q distribution data. - - for q in np.arange(0.15, np.min([0.950001, qmax + 0.0001]), 0.1): - val = Moecache["rinterpolator_q"].interpolate( - [np.log10(options[m]), np.log10(options[p]), q] - )[0] - qdata[q] = val - - # just below qmin, if qmin>qeps, we want nothing - if qmin - 0.15 > qeps: - q = qmin - qeps + require_extrapolation["low"] = 1 + require_extrapolation["high"] = 1 + if qmax < 0.15: + # qmax < 0.15 which is off the edge + # of the table. In this case, choose + # two points at q=0.15 and 0.16 and interpolate + # at these in case we want to extrapolate. + for q in [0.15, 0.16]: + qdata[q] = Moecache["rinterpolator_q"].interpolate( + [np.log10(options[m]), np.log10(options[p]), q] + )[0] + else: + # qmin < 0.15 and qmax > 0.15, so we + # have to generate Moe's table for + # q = 0.15 (i.e. 0.1 to 0.2) to 0.95 (0.9 to 1) + # as a function of M1 and orbital period, + # to obtain the q distribution data. + + for q in np.arange( + 0.15, np.min([0.950001, qmax + 0.0001]), 0.1 + ): + val = Moecache["rinterpolator_q"].interpolate( + [np.log10(options[m]), np.log10(options[p]), q] + )[0] + qdata[q] = val + + # just below qmin, if qmin>qeps, we want nothing + if qmin - 0.15 > qeps: + q = qmin - qeps + qdata[q] = 0 + require_extrapolation["low"] = 0 + + # just above qmax, if qmax<1, we want nothing + if qmax < 0.95: + q = qmax + qeps qdata[q] = 0 - require_extrapolation["low"] = 0 - - # just above qmax, if qmax<1, we want nothing - if qmax < 0.95: - q = qmax + qeps - qdata[q] = 0 - require_extrapolation["high"] = 0 - - # sorted list of qs - qs = sorted(qdata.keys()) - - if len(qs) == 0: - msg = "No qs found error" - raise ValueError(msg) + require_extrapolation["high"] = 0 + + # sorted list of qs + qs = sorted(qdata.keys()) + + if len(qs) == 0: + msg = "No qs found error" + raise ValueError(msg) + + if len(qs) == 1: + # only one q value : pretend there are two + # with a flat distribution up to 1.0. + if qs[0] == 1.0: + qs[0] = 1.0 - 1e-6 + qs.append(1) + qdata[qs[0]] = 1 + qdata[qs[1]] = 1 + else: + qs.append(1) + qdata[qs[1]] = qs[0] - elif len(qs) == 1: - # only one q value : pretend there are two - # with a flat distribution up to 1.0. - if qs[0] == 1.0: - qs[0] = 1.0 - 1e-6 - qs.append(1) - qdata[qs[0]] = 1 - qdata[qs[1]] = 1 + # We actually should do the extrapolation now. else: - qs.append(1) - qdata[qs[1]] = qs[0] + # Loop over both the lower end and the upper end + for pre in ["low", "high"]: + if require_extrapolation[pre] == 0: + continue - # We actually should do the extrapolation now. - else: - # Loop over both the lower end and the upper end - for pre in ["low", "high"]: - if require_extrapolation[pre] == 0: - continue - else: sign = -1 if pre == "low" else 1 end_index = 0 if pre == "low" else len(qs) - 1 indices = ( @@ -1450,25 +1306,25 @@ def build_q_table(options, m, p, verbosity=0): # truncate the distribution qdata[max(0.0, min(1.0, qlimit + sign * qeps))] = 0 - if method == None: + if method is None: # no extrapolation : just interpolate between 0.10 and 0.95 verbose_print( - "\tMoe and di Stefano 2017: build_q_table: using no extrapolations".format(), + "\tMoe and di Stefano 2017: build_q_table: using no extrapolations", verbosity, _MOE2017_VERBOSITY_LEVEL, ) continue - elif method == "flat": + if method == "flat": # use the end value and extrapolate it # with zero slope qdata[qlimit] = qdata[qs[end_index]] verbose_print( - "\tMoe and di Stefano 2017: build_q_table: using constant extrapolation".format(), + "\tMoe and di Stefano 2017: build_q_table: using constant extrapolation", verbosity, _MOE2017_VERBOSITY_LEVEL, ) elif method == "linear": - qdata[qlimit] = linear_extrapolation_q( + qdata[qlimit] = self.linear_extrapolation_q( qs=qs, indices=indices, qlimit=qlimit, @@ -1478,7 +1334,7 @@ def build_q_table(options, m, p, verbosity=0): ) verbose_print( - "\tMoe and di Stefano 2017: build_q_table: using linear extrapolation".format(), + "\tMoe and di Stefano 2017: build_q_table: using linear extrapolation", verbosity, _MOE2017_VERBOSITY_LEVEL, ) @@ -1496,12 +1352,15 @@ def build_q_table(options, m, p, verbosity=0): ) elif method == "plaw2": - qdata[qlimit] = powerlaw_extrapolation_q( - qs=qs, indices=indices, qdata=qdata, verbosity=verbosity + qdata[qlimit] = self.powerlaw_extrapolation_q( + qs=qs, + indices=indices, + qdata=qdata, + verbosity=verbosity, ) verbose_print( - "\tMoe and di Stefano 2017: build_q_table: using powerlaw extrapolation".format(), + "\tMoe and di Stefano 2017: build_q_table: using powerlaw extrapolation", verbosity, _MOE2017_VERBOSITY_LEVEL, ) @@ -1509,7 +1368,7 @@ def build_q_table(options, m, p, verbosity=0): newq = 0.05 qdata[newq] = 0 verbose_print( - "\tMoe and di Stefano 2017: build_q_table: setting lowq to 0".format(), + "\tMoe and di Stefano 2017: build_q_table: setting lowq to 0", verbosity, _MOE2017_VERBOSITY_LEVEL, ) @@ -1530,108 +1389,110 @@ def build_q_table(options, m, p, verbosity=0): ) raise ValueError(msg) - # regenerate qs in new table. This is now the updated list of qs where we have some extrapolated numbers - tmp_table = [] - for q in sorted(qdata.keys()): - tmp_table.append([q, qdata[q]]) - - # Make an interpolation table to contain our modified data - q_interpolator = py_rinterpolate.Rinterpolate( - table=tmp_table, - nparams=1, - ndata=1, # Contains the table of data # q # - verbosity=verbosity - (_MOE2017_VERBOSITY_INTERPOLATOR_LEVEL - 1), - ) - verbose_print( - "\tMoe and di Stefano 2017: build_q_table: Created a new Q table", - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) + # regenerate qs in new table. This is now the updated list of qs where we have some extrapolated numbers + tmp_table = [] + for q in sorted(qdata.keys()): + tmp_table.append([q, qdata[q]]) - if can_renormalize: + # Make an interpolation table to contain our modified data + q_interpolator = py_rinterpolate.Rinterpolate( + table=tmp_table, + nparams=1, + ndata=1, # Contains the table of data # q # + verbosity=verbosity - (_MOE2017_VERBOSITY_INTERPOLATOR_LEVEL - 1), + ) verbose_print( - "\tMoe and di Stefano 2017: build_q_table: Renormalising table", + "\tMoe and di Stefano 2017: build_q_table: Created a new Q table", verbosity, _MOE2017_VERBOSITY_LEVEL, ) - # now we integrate and renormalise (if the table is not all zero) - I = get_integration_constant_q( - q_interpolator, tmp_table, qdata, verbosity=verbosity - ) - - if I > 0: - # normalise to 1.0 by dividing the data by 1.0/$I - q_interpolator.multiply_table_column(1, 1.0 / I) + if can_renormalize: + verbose_print( + "\tMoe and di Stefano 2017: build_q_table: Renormalising table", + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - # test this - new_I = get_integration_constant_q( + # now we integrate and renormalise (if the table is not all zero) + I = self.get_integration_constant_q( q_interpolator, tmp_table, qdata, verbosity=verbosity ) - # fail if error in integral > 1e-6 (should be ~ machine precision) - if abs(1.0 - new_I) > 1e-6: - verbose_print( - "\tMoe and di Stefano 2017: build_q_table: Error: > 1e-6 in q probability integral: {}".format( - I - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, + if I > 0: + # normalise to 1.0 by dividing the data by 1.0/$I + q_interpolator.multiply_table_column(1, 1.0 / I) + + # test this + new_I = self.get_integration_constant_q( + q_interpolator, tmp_table, qdata, verbosity=verbosity ) - # set this new table in the cache - Moecache["rinterpolator_q_given_{}_log10{}".format(m, p)] = q_interpolator - verbose_print( - "\tMoe and di Stefano 2017: build_q_table: stored q_interpolater as {}".format( - "rinterpolator_q_given_{}_log10{}".format(m, p) - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - # Store the values for which this table was set up in the dict - if not Moecache.get("rinterpolator_q_metadata", None): - Moecache["rinterpolator_q_metadata"] = {} - Moecache["rinterpolator_q_metadata"][m] = options[m] - Moecache["rinterpolator_q_metadata"][p] = options[p] + # fail if error in integral > 1e-6 (should be ~ machine precision) + if abs(1.0 - new_I) > 1e-6: + verbose_print( + "\tMoe and di Stefano 2017: build_q_table: Error: > 1e-6 in q probability integral: {}".format( + I + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + # set this new table in the cache + Moecache["rinterpolator_q_given_{}_log10{}".format(m, p)] = q_interpolator + verbose_print( + "\tMoe and di Stefano 2017: build_q_table: stored q_interpolater as {}".format( + "rinterpolator_q_given_{}_log10{}".format(m, p) + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + # Store the values for which this table was set up in the dict + if not Moecache.get("rinterpolator_q_metadata", None): + Moecache["rinterpolator_q_metadata"] = {} + Moecache["rinterpolator_q_metadata"][m] = options[m] + Moecache["rinterpolator_q_metadata"][p] = options[p] -def powerlaw_extrapolation_q(qdata, qs, indices, verbosity=0): - """ - Function to do the power law extrapolation at the lower end of the q range - """ - newq = 0.05 + def powerlaw_extrapolation_q(self, qdata, qs, indices): + """ + Function to do the power-law extrapolation at the lower end of the q range + """ + newq = 0.05 - # use a power-law extrapolation down to q=0.05, if possible - if (qdata[qs[indices[0]]] == 0.0) and (qdata[qs[indices[1]]] == 0.0): - # not possible - return 0 + # use a power-law extrapolation down to q=0.05, if possible + if (qdata[qs[indices[0]]] == 0.0) and (qdata[qs[indices[1]]] == 0.0): + # not possible + return 0 - else: - slope = (np.log10(qdata[qs[indices[1]]]) - np.log10(qdata[qs[indices[0]]])) / ( - np.log10(qs[indices[1]]) - np.log10(qs[indices[0]]) - ) - intercept = np.log10(qdata[qs[indices[0]]]) - slope * np.log10(qs[indices[0]]) + else: + slope = ( + np.log10(qdata[qs[indices[1]]]) - np.log10(qdata[qs[indices[0]]]) + ) / (np.log10(qs[indices[1]]) - np.log10(qs[indices[0]])) + intercept = np.log10(qdata[qs[indices[0]]]) - slope * np.log10( + qs[indices[0]] + ) - return slope * newq + intercept + return slope * newq + intercept + def linear_extrapolation_q( + self, qs, indices, qlimit, qdata, end_index, verbosity=0 + ): + """ + Function to do the linear extrapolation for q. + """ -def linear_extrapolation_q(qs, indices, qlimit, qdata, end_index, verbosity=0): - """ - Function to do the linear extrapolation for q. - """ + # linear extrapolation + dq = qs[indices[1]] - qs[indices[0]] - # linear extrapolation - dq = qs[indices[1]] - qs[indices[0]] + if dq == 0: + verbose_print( + "\tMoe and di Stefano 2017: build_q_table: linear dq=0", + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + # No change + return qs[end_index] - if dq == 0: - verbose_print( - "\tMoe and di Stefano 2017: build_q_table: linear dq=0".format(), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - # No change - return qs[end_index] - else: slope = (qdata[qs[indices[1]]] - qdata[qs[indices[0]]]) / dq intercept = qdata[qs[indices[0]]] - slope * qs[indices[0]] @@ -1643,634 +1504,554 @@ def linear_extrapolation_q(qs, indices, qlimit, qdata, end_index, verbosity=0): verbosity, _MOE2017_VERBOSITY_LEVEL, ) - return max(0.0, slope * qlimit + intercept) + return max(0.0, slope * qlimit + intercept) -def get_integration_constant_q(q_interpolator, tmp_table, qdata, verbosity=0): - """ - Function to integrate the q interpolator and return the integration constant - """ + def get_integration_constant_q(self, q_interpolator, tmp_table, qdata, verbosity=0): + """ + Function to integrate the q interpolator and return the integration constant + """ - dq = 1e-3 # resolution of the integration/renormalisation - I = 0 + dq = 1e-3 # resolution of the integration/renormalisation + I = 0 + + # integrate: note that the value of the integral is + # meaningless to within a factor (which depends on $dq) + for q in np.arange(0, 1 + 2e-6, dq): + x = q_interpolator.interpolate([q]) + + if len(x) == 0: + msg = "\tMoe and di Stefano 2017: get_integration_constant_q: Q interpolator table interpolation failed.\n\t\ttmp_table = {}\n\t\tq_data = {}".format( + str(tmp_table), str(qdata) + ) + verbose_print( + msg, + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + raise ValueError(msg) - # integrate: note that the value of the integral is - # meaningless to within a factor (which depends on $dq) - for q in np.arange(0, 1 + 2e-6, dq): - x = q_interpolator.interpolate([q]) - if len(x) == 0: - msg = "\tMoe and di Stefano 2017: build_q_table: Q interpolator table interpolation failed.\n\t\ttmp_table = {}\n\t\tq_data = {}".format( - str(tmp_table), str(qdata) - ) - verbose_print( - msg, - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - raise ValueError(msg) - else: I += x[0] * dq - # verbose_print( - # "\tMoe and di Stefano 2017: build_q_table: dn/dq ({}) = {} I -> = {}".format(q, x[0], I), - # verbosity, - # _MOE2017_VERBOSITY_LEVEL, - # ) - return I + return I + def fill_data(self, sample_values, data_dict): + """ + Function that returns the normalised array of values for given logmass and logperiod + used for the e and q values -def fill_data(sample_values, data_dict): - """ - Function that returns the normalised array of values for given logmass and logperiod - used for the e and q values + TODO: make sure we do the correct thing with the dstep + """ - TODO: make sure we do the correct thing with the dstep - """ + data = {} + I = 0 - data = {} - I = 0 + dstep = float(sample_values[1]) - float(sample_values[0]) + + # Read out the data + for sample_value in sample_values: + val = data_dict[sample_value] + data[sample_value] = val + I += val + + # Normalise the data + for sample_value in sample_values: + data[sample_value] = data[sample_value] / I + + return data + + def calc_e_integral( + self, + options, + integrals_string, + interpolator_name, + mass_string, + period_string, + verbosity=0, + ): + """ + Function to calculate the e integral + + We need to renormalise this because min_per > 0, and not all periods should be included + """ + + global Moecache + min_ecc = 0 + max_ecc = 0.9999 + + mass_period_string = "{}_{}".format( + options[mass_string], options[period_string] + ) - dstep = float(sample_values[1]) - float(sample_values[0]) + # Check if the dict exists + if not Moecache.get(integrals_string, None): + Moecache[integrals_string] = {} + + # Check for cached value. If it doesn't exist: calculate + if not Moecache[integrals_string].get(mass_period_string, None): + I = 0 + decc = 1e-3 + + for ecc in np.arange(min_ecc, max_ecc, decc): + # Loop over all the values in the table, between the min and max P + dp_decc = Moecache[interpolator_name].interpolate( + [ + np.log10(options[mass_string]), + np.log10(options[period_string]), + ecc, + ] + )[0] - # Read out the data - for sample_value in sample_values: - val = data_dict[sample_value] - data[sample_value] = val - I += val + I += dp_decc * decc - # Normalise the data - for sample_value in sample_values: - data[sample_value] = data[sample_value] / I + # Set the integral value in the dict + Moecache[integrals_string][mass_period_string] = I + verbose_print( + "\tMoe and di Stefano 2017: calc_ecc_integral: min_ecc: {} max ecc: {} integrals_string: {} interpolator_name: {} mass_string: {} period_string: {} mass: {} period: {} I: {}".format( + min_ecc, + max_ecc, + integrals_string, + interpolator_name, + mass_string, + period_string, + options[mass_string], + options[period_string], + I, + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + else: + verbose_print( + "\tMoe and di Stefano 2017: calc_ecc_integral: Found cached value for min_ecc: {} max ecc: {} integrals_string: {} interpolator_name: {} mass_string: {} period_string: {} mass: {} period: {} I: {}".format( + min_ecc, + max_ecc, + integrals_string, + interpolator_name, + mass_string, + period_string, + options[mass_string], + options[period_string], + Moecache[integrals_string][mass_period_string], + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - return data + def calc_P_integral( + self, + options, + min_logP, + max_logP, + integrals_string, + interpolator_name, + mass_string, + verbosity=0, + ): + """ + Function to calculate the P integral + + We need to renormalise this because min_per > 0, and not all periods should be included + """ + + global Moecache + + # Check if the dict exists + if not Moecache.get(integrals_string, None): + Moecache[integrals_string] = {} + + # Check for cached value. If it doesn't exist: calculate + if not Moecache[integrals_string].get(options[mass_string], None): + I = 0 + dlogP = 1e-3 + + for logP in np.arange(min_logP, max_logP, dlogP): + # Loop over all the values in the table, between the min and max P + dp_dlogP = Moecache[interpolator_name].interpolate( + [np.log10(options[mass_string]), logP] + )[0] + I += dp_dlogP * dlogP -def calc_e_integral( - options, - integrals_string, - interpolator_name, - mass_string, - period_string, - verbosity=0, -): - """ - Function to calculate the P integral + # Set the integral value in the dict + Moecache[integrals_string][options[mass_string]] = I + verbose_print( + "\tMoe and di Stefano 2017: calc_P_integral: min_logP: {} integrals_string: {} interpolator_name: {} mass_string: {} mass: {} I: {}".format( + min_logP, + integrals_string, + interpolator_name, + mass_string, + options[mass_string], + I, + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + else: + verbose_print( + "\tMoe and di Stefano 2017: calc_P_integral: Found cached value for min_logP: {} integrals_string: {} interpolator_name: {} mass_string: {} mass: {} I: {}".format( + min_logP, + integrals_string, + interpolator_name, + mass_string, + options[mass_string], + Moecache[integrals_string][options[mass_string]], + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - We need to renormalise this because min_per > 0, and not all periods should be included - """ + def calc_total_probdens(self, prob_dict): + """ + Function to calculate the total probability density + """ - global Moecache - min_ecc = 0 - max_ecc = 0.9999 + total_probdens = 1 + for key in prob_dict: + total_probdens *= prob_dict[key] + prob_dict["total_probdens"] = total_probdens - mass_period_string = "{}_{}".format(options[mass_string], options[period_string]) + return prob_dict - # Check if the dict exists - if not Moecache.get(integrals_string, None): - Moecache[integrals_string] = {} + def Moe_di_Stefano_2017_pdf(self, options, verbosity=0): + """ + Moe & diStefano function to calculate the probability density. - # Check for cached value. If it doesn't exist: calculate - if not Moecache[integrals_string].get(mass_period_string, None): - I = 0 - decc = 1e-3 + takes a dictionary as input (in options) with options: - for ecc in np.arange(min_ecc, max_ecc, decc): - # Loop over all the values in the table, between the min and max P - dp_decc = Moecache[interpolator_name].interpolate( - [np.log10(options[mass_string]), np.log10(options[period_string]), ecc] - )[0] + M1, M2, M3, M4 => masses (Msun) [M1 required, rest optional] + P, P2, P3 => periods (days) [number: none=binary, 2=triple, 3=quadruple] + ecc, ecc2, ecc3 => eccentricities [numbering as for P above] - I += dp_decc * decc + mmin => minimum allowed stellar mass (default 0.07) + mmax => maximum allowed stellar mass (default 80.0) + """ - # Set the integral value in the dict - Moecache[integrals_string][mass_period_string] = I - verbose_print( - "\tMoe and di Stefano 2017: calc_ecc_integral: min_ecc: {} max ecc: {} integrals_string: {} interpolator_name: {} mass_string: {} period_string: {} mass: {} period: {} I: {}".format( - min_ecc, - max_ecc, - integrals_string, - interpolator_name, - mass_string, - period_string, - options[mass_string], - options[period_string], - I, - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - else: verbose_print( - "\tMoe and di Stefano 2017: calc_ecc_integral: Found cached value for min_ecc: {} max ecc: {} integrals_string: {} interpolator_name: {} mass_string: {} period_string: {} mass: {} period: {} I: {}".format( - min_ecc, - max_ecc, - integrals_string, - interpolator_name, - mass_string, - period_string, - options[mass_string], - options[period_string], - Moecache[integrals_string][mass_period_string], + "\tMoe_di_Stefano_2017_pdf with options:\n\t\t{}".format( + json.dumps(options, ensure_ascii=False) ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) + prob_dict = ( + {} + ) # Dictionary containing all the pdf values for the different parameters -def calc_P_integral( - options, - min_logP, - max_logP, - integrals_string, - interpolator_name, - mass_string, - verbosity=0, -): - """ - Function to calculate the P integral - - We need to renormalise this because min_per > 0, and not all periods should be included - """ - - global Moecache - - # Check if the dict exists - if not Moecache.get(integrals_string, None): - Moecache[integrals_string] = {} + # Get the multiplicity from the options, and if its not there, calculate it based on the + # TODO: the function below makes no sense. We NEED to pass the multiplicity in the + if not options.get("multiplicity", None): + msg = "\tMoe_di_Stefano_2017_pdf: Did not find a multiplicity value in the options dictionary" + verbose_print( + msg, + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + raise ValueError(msg) - # Check for cached value. If it doesn't exist: calculate - if not Moecache[integrals_string].get(options[mass_string], None): - I = 0 - dlogP = 1e-3 + multiplicity = options["multiplicity"] - for logP in np.arange(min_logP, max_logP, dlogP): - # Loop over all the values in the table, between the min and max P - dp_dlogP = Moecache[interpolator_name].interpolate( - [np.log10(options[mass_string]), logP] - )[0] + # Immediately return 0 if the multiplicity modulator is 0 + if options["multiplicity_modulator"][int(multiplicity) - 1] == 0: + verbose_print( + "\tMoe_di_Stefano_2017_pdf: returning 0 because of the multiplicity modulator being 0", + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + return 0 - I += dp_dlogP * dlogP + ############################################################ + # multiplicity fraction + # Calculate the probability, or rather, fraction, of stars that belong to this mass - # Set the integral value in the dict - Moecache[integrals_string][options[mass_string]] = I + multiplicity_probability = self.Moe_di_Stefano_2017_multiplicity_fractions( + options, verbosity + )[int(multiplicity) - 1] + prob_dict["multiplicity"] = multiplicity_probability verbose_print( - "\tMoe and di Stefano 2017: calc_P_integral: min_logP: {} integrals_string: {} interpolator_name: {} mass_string: {} mass: {} I: {}".format( - min_logP, - integrals_string, - interpolator_name, - mass_string, - options[mass_string], - I, + "\tMoe_di_Stefano_2017_pdf: Appended multiplicity (mass1 = {}) probability ({}) to the prob dict ({})".format( + options["M_1"], prob_dict["multiplicity"], prob_dict ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - else: - verbose_print( - "\tMoe and di Stefano 2017: calc_P_integral: Found cached value for min_logP: {} integrals_string: {} interpolator_name: {} mass_string: {} mass: {} I: {}".format( - min_logP, - integrals_string, - interpolator_name, - mass_string, - options[mass_string], - Moecache[integrals_string][options[mass_string]], - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - -def calc_total_probdens(prob_dict): - """ - Function to calculate the total probability density - """ - total_probdens = 1 - for key in prob_dict: - total_probdens *= prob_dict[key] - prob_dict["total_probdens"] = total_probdens - - return prob_dict - - -def Moe_di_Stefano_2017_pdf(options, verbosity=0): - """ - Moe & diStefano function to calculate the probability density. - - takes a dictionary as input (in options) with options: - - M1, M2, M3, M4 => masses (Msun) [M1 required, rest optional] - P, P2, P3 => periods (days) [number: none=binary, 2=triple, 3=quadruple] - ecc, ecc2, ecc3 => eccentricities [numbering as for P above] - - mmin => minimum allowed stellar mass (default 0.07) - mmax => maximum allowed stellar mass (default 80.0) - """ - - verbose_print( - "\tMoe_di_Stefano_2017_pdf with options:\n\t\t{}".format(json.dumps(options)), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - prob_dict = ( - {} - ) # Dictionary containing all the pdf values for the different parameters - - # Get the multiplicity from the options, and if its not there, calculate it based on the - # TODO: the function below makes no sense. We NEED to pass the multiplicity in the - if not options.get("multiplicity", None): - msg = "\tMoe_di_Stefano_2017_pdf: Did not find a multiplicity value in the options dictionary" - verbose_print( - msg, - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - raise ValueError(msg) - # multiplicity = 1 - # for n in range(2, 5): - # multiplicity += 1 if options.get("M{}".format(n), None) else 0 - else: - multiplicity = options["multiplicity"] + ############################################################ + # always require an IMF for the primary star + # + # NB multiply by M1 to convert dN/dM to dN/dlnM + # (dlnM = dM/M, so 1/dlnM = M/dM) - # Immediately return 0 if the multiplicity modulator is 0 - if options["multiplicity_modulator"][int(multiplicity) - 1] == 0: + # TODO: Create an n-part-powerlaw method that can have breakpoints and slopes. I'm using a three-part power law now. + # TODO: is this actually the correct way? putting the M1 in there? Do we sample in log space? + M1_probability = self.Kroupa2001(options["M_1"]) * options["M_1"] + prob_dict["M_1"] = M1_probability verbose_print( - "\tMoe_di_Stefano_2017_pdf: returning 0 because of the multiplicity modulator being 0", + "\tMoe_di_Stefano_2017_pdf: Appended Mass (m={}) probability ({}) to the prob dict ({})".format( + options["M_1"], prob_dict["M_1"], prob_dict + ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - return 0 - - ############################################################ - # multiplicity fraction - # Calculate the probability, or rather, fraction, of stars that belong to this mass - - multiplicity_probability = Moe_di_Stefano_2017_multiplicity_fractions( - options, verbosity - )[int(multiplicity) - 1] - prob_dict["multiplicity"] = multiplicity_probability - verbose_print( - "\tMoe_di_Stefano_2017_pdf: Appended multiplicity (mass1 = {}) probability ({}) to the prob dict ({})".format( - options["M_1"], prob_dict["multiplicity"], prob_dict - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - - ############################################################ - # always require an IMF for the primary star - # - # NB multiply by M1 to convert dN/dM to dN/dlnM - # (dlnM = dM/M, so 1/dlnM = M/dM) - - # TODO: Create an n-part-powerlaw method that can have breakpoints and slopes. I'm using a three-part power law now. - # TODO: is this actually the correct way? putting the M1 in there? Do we sample in log space? - M1_probability = Kroupa2001(options["M_1"]) * options["M_1"] - prob_dict["M_1"] = M1_probability - verbose_print( - "\tMoe_di_Stefano_2017_pdf: Appended Mass (m={}) probability ({}) to the prob dict ({})".format( - options["M_1"], prob_dict["M_1"], prob_dict - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - # if M1_probability == 0: # If the probability is 0 then we don't have to calculate more - # calc_total_probdens(prob_dict) - # return prob_dict - - """ - From here we go through the multiplicities. - """ - if multiplicity >= 2: - # If the multiplicity is higher than 1, we will need to construct the following tables: - # - period distribution table - # - q distribution table - # - eccentricity distribution table - - # Set up the interpolator for the periods - if not Moecache.get("rinterpolator_log10P", None): - Moecache["rinterpolator_log10P"] = py_rinterpolate.Rinterpolate( - table=Moecache["period_distributions"], # Contains the table of data - nparams=2, # log10M, log10P - ndata=2, # binary, triple - verbosity=verbosity - (_MOE2017_VERBOSITY_INTERPOLATOR_LEVEL - 1), - ) - verbose_print( - "\tMoe_di_Stefano_2017_pdf: Created new period interpolator: {}".format( - Moecache["rinterpolator_log10P"] - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) + # if M1_probability == 0: # If the probability is 0 then we don't have to calculate more + # calc_total_probdens(prob_dict) + # return prob_dict - # Make a table storing Moe's data for q distributions - if ( - options.get("M_2", None) - or options.get("M_3", None) - or options.get("M_4", None) - ): - if not Moecache.get("rinterpolator_q", None): - Moecache["rinterpolator_q"] = py_rinterpolate.Rinterpolate( - table=Moecache["q_distributions"], # Contains the table of data - nparams=3, # log10M, log10P, q - ndata=1, # + """ + From here we go through the multiplicities. + """ + if multiplicity >= 2: + # If the multiplicity is higher than 1, we will need to construct the following tables: + # - period distribution table + # - q distribution table + # - eccentricity distribution table + + # Set up the interpolator for the periods + if not Moecache.get("rinterpolator_log10P", None): + Moecache["rinterpolator_log10P"] = py_rinterpolate.Rinterpolate( + table=Moecache[ + "period_distributions" + ], # Contains the table of data + nparams=2, # log10M, log10P + ndata=2, # binary, triple verbosity=verbosity - (_MOE2017_VERBOSITY_INTERPOLATOR_LEVEL - 1), ) verbose_print( - "\tMoe_di_Stefano_2017_pdf: Created new q interpolator: {}".format( - Moecache["rinterpolator_q"] + "\tMoe_di_Stefano_2017_pdf: Created new period interpolator: {}".format( + Moecache["rinterpolator_log10P"] ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - # Make a table storing Moe's data for q distributions, but only if the ecc is actually sampled - if "ecc" in options: - if not options["ecc"] == None: - if not Moecache.get("rinterpolator_e", None): - Moecache["rinterpolator_e"] = py_rinterpolate.Rinterpolate( - table=Moecache[ - "ecc_distributions" - ], # Contains the table of data - nparams=3, # log10M, log10P, e + # Make a table storing Moe's data for q distributions + if ( + options.get("M_2", None) + or options.get("M_3", None) + or options.get("M_4", None) + ): + if not Moecache.get("rinterpolator_q", None): + Moecache["rinterpolator_q"] = py_rinterpolate.Rinterpolate( + table=Moecache["q_distributions"], # Contains the table of data + nparams=3, # log10M, log10P, q ndata=1, # verbosity=verbosity - (_MOE2017_VERBOSITY_INTERPOLATOR_LEVEL - 1), ) verbose_print( - "\tMoe_di_Stefano_2017_pdf: Created new e interpolator: {}".format( - Moecache["rinterpolator_e"] + "\tMoe_di_Stefano_2017_pdf: Created new q interpolator: {}".format( + Moecache["rinterpolator_q"] ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - ############### - # Calculation for period of the binary + # Make a table storing Moe's data for q distributions, but only if the ecc is actually sampled + if "ecc" in options: + if not options["ecc"] is None: + if not Moecache.get("rinterpolator_e", None): + Moecache["rinterpolator_e"] = py_rinterpolate.Rinterpolate( + table=Moecache[ + "ecc_distributions" + ], # Contains the table of data + nparams=3, # log10M, log10P, e + ndata=1, # + verbosity=verbosity + - (_MOE2017_VERBOSITY_INTERPOLATOR_LEVEL - 1), + ) + verbose_print( + "\tMoe_di_Stefano_2017_pdf: Created new e interpolator: {}".format( + Moecache["rinterpolator_e"] + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - if options.get("M_2", None): - # Separation of the inner binary - options["sep"] = calc_sep_from_period( - options["M_1"], options["M_2"], options["P"] - ) - # TODO: add check for min_logP with instant RLOF? - # TODO: Actually use the value above. - # Total mass inner binary: - options["M_1+M_2"] = options["M_1"] + options["M_2"] - - # Calculate P integral or use cached value - - # get the periods from the Moecahe - min_logP = float(Moecache["logperiods"][0]) - max_logP = float(Moecache["logperiods"][-1]) - - calc_P_integral( - options, - min_logP, - max_logP, - "P_integrals", - "rinterpolator_log10P", - "M_1", - verbosity, - ) + ############### + # Calculation for period of the binary - # Set probabilty for P1 - p_val = Moecache["rinterpolator_log10P"].interpolate( - [np.log10(options["M_1"]), np.log10(options["P"])] - )[0] - p_val = p_val / Moecache["P_integrals"][options["M_1"]] - prob_dict["P"] = p_val - verbose_print( - "\tMoe_di_Stefano_2017_pdf: Appended period (m={}, P={}) probability ({}) to the prob list ({})".format( - options["M_1"], options["P"], prob_dict["P"], prob_dict - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - # if prob_dict['P'] == 0: # If the probability is 0 then we don't have to calculate more - # calc_total_probdens(prob_dict) - # return prob_dict + if options.get("M_2", None): + # Separation of the inner binary + options["sep"] = calc_sep_from_period( + options["M_1"], options["M_2"], options["P"] + ) + # TODO: add check for min_logP with instant RLOF? + # TODO: Actually use the value above. + # Total mass inner binary: + options["M_1+M_2"] = options["M_1"] + options["M_2"] - ############################################################ - # mass ratio (0 < q = M2/M1 < qmax) - # - # we need to construct the q table for the given M1 - # subject to qmin = Mmin/M1 - - if options.get("M_2", None): - # Build the table for q - primary_mass = options["M_1"] - secondary_mass = options["M_2"] - m_label = "M_1" - p_label = "P" - - # Construct the q table - build_q_table(options, m_label, p_label, verbosity=verbosity) - verbose_print( - "\tMoe_di_Stefano_2017_pdf: Created q_table ({}) for m={} p={}".format( - Moecache[ - "rinterpolator_q_given_{}_log10{}".format(m_label, p_label) - ], - options[m_label], - options[p_label], - ), + # Calculate P integral or use cached value + + # get the periods from the Moecahe + min_logP = float(Moecache["logperiods"][0]) + max_logP = float(Moecache["logperiods"][-1]) + + self.calc_P_integral( + options, + min_logP, + max_logP, + "P_integrals", + "rinterpolator_log10P", + "M_1", verbosity, - _MOE2017_VERBOSITY_LEVEL, ) - # Add probability for the mass ratio - q_prob = Moecache[ - "rinterpolator_q_given_{}_log10{}".format(m_label, p_label) - ].interpolate([secondary_mass / primary_mass])[0] - prob_dict["q"] = q_prob + # Set probabilty for P1 + p_val = Moecache["rinterpolator_log10P"].interpolate( + [np.log10(options["M_1"]), np.log10(options["P"])] + )[0] + p_val = p_val / Moecache["P_integrals"][options["M_1"]] + prob_dict["P"] = p_val verbose_print( - "\tMoe_di_Stefano_2017_pdf: appended mass ratio (M={} P={} q={}) probability ({}) to the prob list ({}) ".format( - options["M_1"], - options["P"], - options["M_2"] / options["M_1"], - prob_dict["q"], - prob_dict, + "\tMoe_di_Stefano_2017_pdf: Appended period (m={}, P={}) probability ({}) to the prob list ({})".format( + options["M_1"], options["P"], prob_dict["P"], prob_dict ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - # if prob_dict['q'] == 0: # If the probability is 0 then we don't have to calculate more + # if prob_dict['P'] == 0: # If the probability is 0 then we don't have to calculate more # calc_total_probdens(prob_dict) # return prob_dict - ############################################################ - # Eccentricity - # TODO: ask rob if the eccentricity requires an extrapolation as well. - - # Only do this if the eccentricity is sampled - if "ecc" in options: - if not options["ecc"] == None: - # Calculate ecc integral or use cached value - calc_e_integral( - options, "ecc_integrals", "rinterpolator_e", "M_1", "P", verbosity - ) - mass_period_string = "{}_{}".format(options["M_1"], options["P"]) - - # Set probability for ecc - ecc_val = Moecache["rinterpolator_e"].interpolate( - [np.log10(options["M_1"]), np.log10(options["P"]), options["ecc"]] - )[0] - ecc_val = ecc_val / Moecache["ecc_integrals"][mass_period_string] - prob_dict["ecc"] = ecc_val - verbose_print( - "\tMoe_di_Stefano_2017_pdf: Appended eccentricity (m={}, P={}, ecc={}) probability ({}) to the prob list ({})".format( - options["M_1"], - options["P"], - options["ecc"], - prob_dict["ecc"], - prob_dict, - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - # if prob_dict['ecc'] == 0: # If the probability is 0 then we don't have to calculate more - # calc_total_probdens(prob_dict) - # return prob_dict - - # Calculations for when multiplicity is bigger than 3 - # BEWARE: binary_c does not evolve these systems actually and the code below should be revised for when binary_c actually evolves triples. - # For that reason, I would not advise to use things with multiplicity > 3 - if multiplicity >= 3: - ############################################################ - # orbital period 2 = - # orbital period of star 3 (multiplicity==3) or - # the star3+star4 binary (multiplicity==4) + # mass ratio (0 < q = M2/M1 < qmax) # - # we assume the same period distribution for star 3 - # (or stars 3 and 4) but with a separation that is >10*a*(1+e) - # where 10*a*(1+e) is the maximum apastron separation of - # stars 1 and 2 - - # TODO: Is this a correct assumption? - max_sep = 10.0 * options["sep"] * (1.0 + options["ecc"]) - min_P2 = calc_period_from_sep(options["M_1+M_2"], options["mmin"], max_sep) - min_logP2 = math.log10(min_P2) - # max_logP2 = 10.0 - # min_logP = Moecache['logperiods'][0] - max_logP2 = float(Moecache["logperiods"][-1]) - - if options["P2"] < min_P2: - # period is too short : system is not hierarchical - prob_dict["P2"] = 0 + # we need to construct the q table for the given M1 + # subject to qmin = Mmin/M1 + + if options.get("M_2", None): + # Build the table for q + primary_mass = options["M_1"] + secondary_mass = options["M_2"] + m_label = "M_1" + p_label = "P" + + # Construct the q table + self.build_q_table(options, m_label, p_label, verbosity=verbosity) verbose_print( - "\tMoe_di_Stefano_2017_pdf: period2 is too short: {} < {}, system is not hierarchical. Added 0 to probability list".format( - options["P1"], min_P2 + "\tMoe_di_Stefano_2017_pdf: Created q_table ({}) for m={} p={}".format( + Moecache[ + "rinterpolator_q_given_{}_log10{}".format(m_label, p_label) + ], + options[m_label], + options[p_label], ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - # if prob_dict['P2'] == 0: # If the probability is 0 then we don't have to calculate more - # calc_total_probdens(prob_dict) - # return prob_dict - - else: - # period is long enough that the system is hierarchical - # hence the separation between the outer star - # and inner binary - options["sep2"] = calc_sep_from_period( - options["M_3"], options["M_1+M_2"], options["P2"] - ) - # Check for cached value of P integral or calculate - calc_P_integral( - options, - min_logP2, - max_logP2, - "P2_integrals", - "rinterpolator_log10P", - "M_1+M_2", - verbosity, - ) - - # Add the probability - p_val = Moecache["rinterpolator_log10P"].interpolate( - [np.log10(options["M_1+M_2"]), np.log10(options["P2"])] - )[0] - p_val = p_val / Moecache["P2_integrals"][options["M_1+M_2"]] - prob_dict["P2"] = p_val + # Add probability for the mass ratio + q_prob = Moecache[ + "rinterpolator_q_given_{}_log10{}".format(m_label, p_label) + ].interpolate([secondary_mass / primary_mass])[0] + prob_dict["q"] = q_prob verbose_print( - "\tMoe_di_Stefano_2017_pdf: Appended period2 (m1={} m2={}, P2={}) probability ({}) to the prob list ({})".format( + "\tMoe_di_Stefano_2017_pdf: appended mass ratio (M={} P={} q={}) probability ({}) to the prob list ({}) ".format( options["M_1"], - options["M_2"], - options["P2"], - prob_dict["P2"], + options["P"], + options["M_2"] / options["M_1"], + prob_dict["q"], prob_dict, ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - # if prob_dict['P2'] == 0: # If the probability is 0 then we don't have to calculate more + # if prob_dict['q'] == 0: # If the probability is 0 then we don't have to calculate more # calc_total_probdens(prob_dict) # return prob_dict - ############################################################ - # mass ratio 2 = q2 = M3 / (M1+M2) - # - # we need to construct the q table for the given M1 - # subject to qmin = Mmin/(M1+M2) - - # Set the variables for the masses and their names - primary_mass = options["M_1+M_2"] - secondary_mass = options["M_3"] - m_label = "M_1+M_2" - p_label = "P2" + ############################################################ + # Eccentricity + # TODO: ask rob if the eccentricity requires an extrapolation as well. + + # Only do this if the eccentricity is sampled + if "ecc" in options: + if not options["ecc"] is None: + # Calculate ecc integral or use cached value + self.calc_e_integral( + options, + "ecc_integrals", + "rinterpolator_e", + "M_1", + "P", + verbosity, + ) + mass_period_string = "{}_{}".format(options["M_1"], options["P"]) + + # Set probability for ecc + ecc_val = Moecache["rinterpolator_e"].interpolate( + [ + np.log10(options["M_1"]), + np.log10(options["P"]), + options["ecc"], + ] + )[0] + ecc_val = ecc_val / Moecache["ecc_integrals"][mass_period_string] + prob_dict["ecc"] = ecc_val + verbose_print( + "\tMoe_di_Stefano_2017_pdf: Appended eccentricity (m={}, P={}, ecc={}) probability ({}) to the prob list ({})".format( + options["M_1"], + options["P"], + options["ecc"], + prob_dict["ecc"], + prob_dict, + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + # if prob_dict['ecc'] == 0: # If the probability is 0 then we don't have to calculate more + # calc_total_probdens(prob_dict) + # return prob_dict - # Build q table - build_q_table(options, m_label, p_label, verbosity=verbosity) - verbose_print( - "\tMoe_di_Stefano_2017_pdf: Called build_q_table", - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) + # Calculations for when multiplicity is bigger than 3 + # BEWARE: binary_c does not evolve these systems actually and the code below should be revised for when binary_c actually evolves triples. + # For that reason, I would not advise to use things with multiplicity > 3 + if multiplicity >= 3: - # Add the probability - q2_val = Moecache[ - "rinterpolator_q_given_{}_log10{}".format(m_label, p_label) - ].interpolate([secondary_mass / primary_mass])[0] - prob_dict["q2"] = q2_val - verbose_print( - "\tMoe_di_Stefano_2017_pdf: appended mass ratio (M_1+M_2={} M_3={} P={} q={}) probability ({}) to the prob list ({}) ".format( - options["M_1+M_2"], - options["M_3"], - options["P"], - secondary_mass / primary_mass, - prob_dict["q2"], - prob_dict, - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, + ############################################################ + # orbital period 2 = + # orbital period of star 3 (multiplicity==3) or + # the star3+star4 binary (multiplicity==4) + # + # we assume the same period distribution for star 3 + # (or stars 3 and 4) but with a separation that is >10*a*(1+e) + # where 10*a*(1+e) is the maximum apastron separation of + # stars 1 and 2 + + # TODO: Is this a correct assumption? + max_sep = 10.0 * options["sep"] * (1.0 + options["ecc"]) + min_P2 = calc_period_from_sep( + options["M_1+M_2"], options["mmin"], max_sep ) - # if prob_dict['q2'] == 0: # If the probability is 0 then we don't have to calculate more - # calc_total_probdens(prob_dict) - # return prob_dict - - # TODO: Implement ecc2 calculation - if multiplicity == 4: - # quadruple system. - # TODO: Ask Rob about the structure of the quadruple. Is this only double binary quadruples? + min_logP2 = math.log10(min_P2) + # max_logP2 = 10.0 + # min_logP = Moecache['logperiods'][0] + max_logP2 = float(Moecache["logperiods"][-1]) + + if options["P2"] < min_P2: + # period is too short : system is not hierarchical + prob_dict["P2"] = 0 + verbose_print( + "\tMoe_di_Stefano_2017_pdf: period2 is too short: {} < {}, system is not hierarchical. Added 0 to probability list".format( + options["P1"], min_P2 + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + # if prob_dict['P2'] == 0: # If the probability is 0 then we don't have to calculate more + # calc_total_probdens(prob_dict) + # return prob_dict - ############################################################ - # orbital period 3 - # - # we assume the same period distribution for star 4 - # as for any other stars but Pmax must be such that - # sep3 < sep2 * 0.2 - - # TODO: fix this here - max_sep3 = 0.2 * options["sep2"] * (1.0 + options["ecc2"]) - max_per3 = calc_period_from_sep( - options["M_1+M_2"], options["mmin"], max_sep3 + else: + # period is long enough that the system is hierarchical + # hence the separation between the outer star + # and inner binary + options["sep2"] = calc_sep_from_period( + options["M_3"], options["M_1+M_2"], options["P2"] ) - # Calculate P integral or use the cached value - # TODO: Make sure we use the correct period idea here. - calc_P_integral( + # Check for cached value of P integral or calculate + self.calc_P_integral( options, min_logP2, max_logP2, @@ -2280,145 +2061,247 @@ def Moe_di_Stefano_2017_pdf(options, verbosity=0): verbosity, ) - # Set probability + # Add the probability p_val = Moecache["rinterpolator_log10P"].interpolate( [np.log10(options["M_1+M_2"]), np.log10(options["P2"])] )[0] p_val = p_val / Moecache["P2_integrals"][options["M_1+M_2"]] - prob_dict["P3"] = p_val + prob_dict["P2"] = p_val verbose_print( - "\tMoe_di_Stefano_2017_pdf: Appended period2 (M=4) (M_1={} M_2={}, P2={}) probability ({}) to the prob list ({})".format( + "\tMoe_di_Stefano_2017_pdf: Appended period2 (m1={} m2={}, P2={}) probability ({}) to the prob list ({})".format( options["M_1"], options["M_2"], options["P2"], - prob_dict["P3"], + prob_dict["P2"], prob_dict, ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - # if prob_dict['P3'] == 0: # If the probability is 0 then we don't have to calculate more + # if prob_dict['P2'] == 0: # If the probability is 0 then we don't have to calculate more # calc_total_probdens(prob_dict) # return prob_dict ############################################################ - # mass ratio 2 + # mass ratio 2 = q2 = M3 / (M1+M2) # # we need to construct the q table for the given M1 # subject to qmin = Mmin/(M1+M2) - # Make a table storing Moe's data for q distributions - # Build the table for q2 + # Set the variables for the masses and their names primary_mass = options["M_1+M_2"] secondary_mass = options["M_3"] m_label = "M_1+M_2" p_label = "P2" - # Calculate new q table - build_q_table(options, m_label, p_label, verbosity=verbosity) + # Build q table + self.build_q_table(options, m_label, p_label, verbosity=verbosity) verbose_print( - "\tMoe_di_Stefano_2017_pdf: Created q_table ".format(), + "\tMoe_di_Stefano_2017_pdf: Called build_q_table", verbosity, _MOE2017_VERBOSITY_LEVEL, ) # Add the probability - q3_prob = Moecache[ + q2_val = Moecache[ "rinterpolator_q_given_{}_log10{}".format(m_label, p_label) ].interpolate([secondary_mass / primary_mass])[0] - prob_dict["q3"] = q3_prob + prob_dict["q2"] = q2_val verbose_print( "\tMoe_di_Stefano_2017_pdf: appended mass ratio (M_1+M_2={} M_3={} P={} q={}) probability ({}) to the prob list ({}) ".format( options["M_1+M_2"], options["M_3"], options["P"], secondary_mass / primary_mass, - prob_dict["q3"], + prob_dict["q2"], prob_dict, ), verbosity, _MOE2017_VERBOSITY_LEVEL, ) - # if prob_dict['q3'] == 0: # If the probability is 0 then we don't have to calculate more + # if prob_dict['q2'] == 0: # If the probability is 0 then we don't have to calculate more # calc_total_probdens(prob_dict) # return prob_dict - # TODO ecc 3 + # TODO: Implement ecc2 calculation + if multiplicity == 4: + # quadruple system. + # TODO: Ask Rob about the structure of the quadruple. Is this only double binary quadruples? + + ############################################################ + # orbital period 3 + # + # we assume the same period distribution for star 4 + # as for any other stars but Pmax must be such that + # sep3 < sep2 * 0.2 + + # TODO: fix this here + max_sep3 = 0.2 * options["sep2"] * (1.0 + options["ecc2"]) + max_per3 = calc_period_from_sep( + options["M_1+M_2"], options["mmin"], max_sep3 + ) - # check for input of multiplicity - elif multiplicity not in range(1, 5): - msg = "\tMoe_di_Stefano_2017_pdf: Unknown multiplicity {}".format(multiplicity) - verbose_print( - msg, - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - raise ValueError(msg) + # Calculate P integral or use the cached value + # TODO: Make sure we use the correct period idea here. + self.calc_P_integral( + options, + min_logP2, + max_logP2, + "P2_integrals", + "rinterpolator_log10P", + "M_1+M_2", + verbosity, + ) + + # Set probability + p_val = Moecache["rinterpolator_log10P"].interpolate( + [np.log10(options["M_1+M_2"]), np.log10(options["P2"])] + )[0] + p_val = p_val / Moecache["P2_integrals"][options["M_1+M_2"]] + prob_dict["P3"] = p_val + verbose_print( + "\tMoe_di_Stefano_2017_pdf: Appended period2 (M=4) (M_1={} M_2={}, P2={}) probability ({}) to the prob list ({})".format( + options["M_1"], + options["M_2"], + options["P2"], + prob_dict["P3"], + prob_dict, + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + # if prob_dict['P3'] == 0: # If the probability is 0 then we don't have to calculate more + # calc_total_probdens(prob_dict) + # return prob_dict + + ############################################################ + # mass ratio 2 + # + # we need to construct the q table for the given M1 + # subject to qmin = Mmin/(M1+M2) + # Make a table storing Moe's data for q distributions + + # Build the table for q2 + primary_mass = options["M_1+M_2"] + secondary_mass = options["M_3"] + m_label = "M_1+M_2" + p_label = "P2" + + # Calculate new q table + self.build_q_table( + options, m_label, p_label, verbosity=verbosity + ) + verbose_print( + "\tMoe_di_Stefano_2017_pdf: Created q_table ", + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) - # Calculate total probdens: - prob_dict = calc_total_probdens(prob_dict) + # Add the probability + q3_prob = Moecache[ + "rinterpolator_q_given_{}_log10{}".format(m_label, p_label) + ].interpolate([secondary_mass / primary_mass])[0] + prob_dict["q3"] = q3_prob + verbose_print( + "\tMoe_di_Stefano_2017_pdf: appended mass ratio (M_1+M_2={} M_3={} P={} q={}) probability ({}) to the prob list ({}) ".format( + options["M_1+M_2"], + options["M_3"], + options["P"], + secondary_mass / primary_mass, + prob_dict["q3"], + prob_dict, + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + # if prob_dict['q3'] == 0: # If the probability is 0 then we don't have to calculate more + # calc_total_probdens(prob_dict) + # return prob_dict - # Some info - if multiplicity == 1: - verbose_print( - "\tMoe_di_Stefano_2017_pdf: M_1={} q=N/A log10P=N/A ({}): {} -> {}\n".format( - options["M_1"], - len(prob_dict), - str(prob_dict), - prob_dict["total_probdens"], - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - elif multiplicity == 2: - verbose_print( - "\tMoe_di_Stefano_2017_pdf: M_1={} q={} log10P={} ecc={} ({}): {} -> {}\n".format( - options["M_1"], - options["M_2"] / options["M_1"] if options.get("M_2", None) else "N/A", - np.log10(options["P"]), - options["ecc"] if options.get("ecc", None) else "N/A", - len(prob_dict), - str(prob_dict), - prob_dict["total_probdens"], - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - elif multiplicity == 3: - verbose_print( - "\tMoe_di_Stefano_2017_pdf: M_1={} q={} log10P={} ecc={} M_3={} log10P2={} ecc2={} ({}): {} -> {}".format( - options["M_1"], - options["M_2"] / options["M_1"] if options.get("M_2", None) else "N/A", - np.log10(options["P"]), - options["ecc"] if options.get("ecc", None) else "N/A", - options["M_3"], - np.log10(options["P2"]), - options["ecc2"] if options.get("ecc2", None) else "N/A", - len(prob_dict), - str(prob_dict), - prob_dict["total_probdens"], - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - elif multiplicity == 4: - verbose_print( - "Moe_di_Stefano_2017_pdf: M_1={} q={} log10P={} ecc={} M_3={} log10P2={} ecc2={} M_4={} log10P3={} ecc3={} ({}) : {} -> {}".format( - options["M_1"], - options["M_2"] / options["M_1"] if options.get("M_2", None) else "N/A", - np.log10(options["P"]), - options["ecc"] if options.get("ecc", None) else "N/A", - options["M_3"], - np.log10(options["P2"]), - options["ecc2"] if options.get("ecc2", None) else "N/A", - options["M_4"], - np.log10(options["P3"]), - options["ecc3"] if options.get("ecc3", None) else "N/A", - len(prob_dict), - str(prob_dict), - prob_dict["total_probdens"], - ), - verbosity, - _MOE2017_VERBOSITY_LEVEL, - ) - return prob_dict + # TODO: ecc 3 + + # check for input of multiplicity + elif multiplicity not in range(1, 5): + msg = "\tMoe_di_Stefano_2017_pdf: Unknown multiplicity {}".format( + multiplicity + ) + verbose_print( + msg, + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + raise ValueError(msg) + + # Calculate total probdens: + prob_dict = self.calc_total_probdens(prob_dict) + + # Some info + if multiplicity == 1: + verbose_print( + "\tMoe_di_Stefano_2017_pdf: M_1={} q=N/A log10P=N/A ({}): {} -> {}\n".format( + options["M_1"], + len(prob_dict), + str(prob_dict), + prob_dict["total_probdens"], + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + elif multiplicity == 2: + verbose_print( + "\tMoe_di_Stefano_2017_pdf: M_1={} q={} log10P={} ecc={} ({}): {} -> {}\n".format( + options["M_1"], + options["M_2"] / options["M_1"] + if options.get("M_2", None) + else "N/A", + np.log10(options["P"]), + options["ecc"] if options.get("ecc", None) else "N/A", + len(prob_dict), + str(prob_dict), + prob_dict["total_probdens"], + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + elif multiplicity == 3: + verbose_print( + "\tMoe_di_Stefano_2017_pdf: M_1={} q={} log10P={} ecc={} M_3={} log10P2={} ecc2={} ({}): {} -> {}".format( + options["M_1"], + options["M_2"] / options["M_1"] + if options.get("M_2", None) + else "N/A", + np.log10(options["P"]), + options["ecc"] if options.get("ecc", None) else "N/A", + options["M_3"], + np.log10(options["P2"]), + options["ecc2"] if options.get("ecc2", None) else "N/A", + len(prob_dict), + str(prob_dict), + prob_dict["total_probdens"], + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + elif multiplicity == 4: + verbose_print( + "Moe_di_Stefano_2017_pdf: M_1={} q={} log10P={} ecc={} M_3={} log10P2={} ecc2={} M_4={} log10P3={} ecc3={} ({}) : {} -> {}".format( + options["M_1"], + options["M_2"] / options["M_1"] + if options.get("M_2", None) + else "N/A", + np.log10(options["P"]), + options["ecc"] if options.get("ecc", None) else "N/A", + options["M_3"], + np.log10(options["P2"]), + options["ecc2"] if options.get("ecc2", None) else "N/A", + options["M_4"], + np.log10(options["P3"]), + options["ecc3"] if options.get("ecc3", None) else "N/A", + len(prob_dict), + str(prob_dict), + prob_dict["total_probdens"], + ), + verbosity, + _MOE2017_VERBOSITY_LEVEL, + ) + return prob_dict diff --git a/binarycpython/utils/ensemble.py b/binarycpython/utils/ensemble.py index 72a0ced7d9879a9398135c70adb028e729b447d5..80328c299e415b5fd6c9a4b65344534fb1d49bc9 100644 --- a/binarycpython/utils/ensemble.py +++ b/binarycpython/utils/ensemble.py @@ -13,15 +13,12 @@ import gc import gzip import inspect - from halo import Halo import msgpack import py_rinterpolate import simplejson -# import orjson - from binarycpython.utils.dicts import ( keys_to_floats, recursive_change_key_to_float, @@ -72,21 +69,21 @@ def ensemble_setting(ensemble, parameter_name): return value -def open_ensemble(filename): +def open_ensemble(filename, encoding="utf-8"): """ Function to open an ensemble at filename for reading and decompression if required. """ compression = ensemble_compression(filename) - if ensemble_file_type(filename) is "msgpack": + if ensemble_file_type(filename) == "msgpack": flags = "rb" else: flags = "rt" - if compression is "bzip2": - file_object = bz2.open(filename, flags) - elif compression is "gzip": - file_object = gzip.open(filename, flags) + if compression == "bzip2": + file_object = bz2.open(filename, flags, encoding=encoding) + elif compression == "gzip": + file_object = gzip.open(filename, flags, encoding=encoding) else: - file_object = open(filename, flags) + file_object = open(filename, flags, encoding=encoding) return file_object @@ -94,12 +91,12 @@ def ensemble_compression(filename): """ Return the compression type of the ensemble file, based on its filename extension. """ + if filename.endswith(".bz2"): return "bzip2" - elif filename.endswith(".gz"): + if filename.endswith(".gz"): return "gzip" - else: - return None + return None def ensemble_file_type(filename): @@ -115,7 +112,9 @@ def ensemble_file_type(filename): return filetype -def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=False): +def load_ensemble( + filename, convert_float_keys=True, select_keys=None, timing=False, flush=False +): """ Function to load an ensemeble file, even if it is compressed, and return its contents to as a Python dictionary. @@ -128,7 +127,7 @@ def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=Fa # open the file # load with some info to the terminal - print("Loading JSON...") + print("Loading JSON...", flush=flush) # open the ensemble and get the file type file_object = open_ensemble(filename) @@ -136,7 +135,8 @@ def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=Fa if not filetype or not file_object: print( - "Unknown filetype : your ensemble should be saved either as JSON or msgpack data." + "Unknown filetype : your ensemble should be saved either as JSON or msgpack data.", + flush=flush, ) sys.exit() @@ -146,7 +146,7 @@ def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=Fa def _hook(obj): nonlocal _loaded - if _loaded == False: + if _loaded is False: _loaded = True print( "\nLoaded {} data, now putting in a dictionary".format(filetype), @@ -154,7 +154,7 @@ def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=Fa ) return obj - if filetype is "JSON": + if filetype == "JSON": # orjson promises to be fast, but it doesn't seem to be # and fails on "Infinity"... oops # data = orjson.loads(file_object.read()) @@ -167,7 +167,7 @@ def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=Fa # on the big Moe set takes 42s # data = json.load(file_object, # object_hook=_hook) - elif filetype is "msgpack": + elif filetype == "msgpack": data = msgpack.load(file_object, object_hook=_hook) if timing: @@ -261,8 +261,6 @@ def handle_ensemble_string_to_json(raw_output): json.loads(raw_output, cls=binarycDecoder) """ - - # return json.loads(json.dumps(ast.literal_eval(raw_output)), cls=binarycDecoder) return json.loads(raw_output, cls=binarycDecoder) diff --git a/binarycpython/utils/functions.py b/binarycpython/utils/functions.py index 0a1211067f0daadbddeed7281ba6a2f67b21630a..524ae85e4a70827d9883695837a9676bf88d9407 100644 --- a/binarycpython/utils/functions.py +++ b/binarycpython/utils/functions.py @@ -8,52 +8,62 @@ Tasks: - TODO: change all prints to verbose_prints """ - -import bz2 -import collections -from colorama import Fore, Back, Style -import copy -import datetime as dt -import gc -import gzip -from halo import Halo -import h5py -import humanize -import inspect -from io import StringIO -import json -import msgpack -import numpy as np import os -import psutil -import py_rinterpolate -import re -import resource import sys -import subprocess -import tempfile import time +import json +import datetime +import collections +import resource +import tempfile +import subprocess import types -from typing import Union, Any - -import simplejson -# import orjson +from typing import Union +from io import StringIO -import astropy.units as u -import binarycpython.utils.moe_di_stefano_2017_data as moe_di_stefano_2017_data +import h5py +import humanize +import numpy as np +import psutil +from colorama import Fore, Back, Style from binarycpython import _binary_c_bindings -from binarycpython.utils.dicts import filter_dict, filter_dict_through_values - +from binarycpython.utils.dicts import filter_dict_through_values ######################################################## # Unsorted ######################################################## +def now(now_object=None, style=None, specifier=None): + """ + convenience function to return a string of the current time, + using the format "%m/%d/%Y %H:%M:%S" + + Args: + style : if "nospace" then return the date/time with the format + "%Y%m%d_%H%M%S" + + specifier: if set, uses this as a specifier rather than whatever is set by default or in the style variable + """ + if not now_object: + now_object = datetime.datetime.now() + if not specifier: + if style == "nospace": + # special case + specifier = "%Y%m%d_%H%M%S" + else: + # our default + specifier = "%m/%d/%Y %H:%M:%S" + + return datetime.datetime.strftime(now_object, specifier) + + def format_number(number): - # compact number formatter + """ + Function to take a number, express format it in scientific notation, and remove the trailing 0 if the exponent is 0 + """ string = "{number:.2g}".format(number=number) string = string.replace("e+0", "e+") @@ -67,12 +77,7 @@ def check_if_in_shell(): Function to check whether the script is running from a shell """ - if sys.stdin and sys.stdin.isatty(): - in_shell = True - else: - in_shell = False - - return in_shell + return bool(sys.stdin and sys.stdin.isatty()) def timedelta(delta): @@ -82,7 +87,7 @@ def timedelta(delta): """ # currently use the humanize module to do this t = humanize.time.precisedelta( - dt.timedelta(seconds=delta), + datetime.timedelta(seconds=delta), format="%0.2f", minimum_unit="milliseconds", suppress=["milliseconds"], @@ -100,6 +105,10 @@ def timedelta(delta): def get_ANSI_colours(): + """ + Function that returns a dictionary with text-colors in ANSI formatting + """ + # ANSI colours dictionary foreground_colours = { "red": Fore.RED, @@ -110,6 +119,7 @@ def get_ANSI_colours(): "magenta": Fore.MAGENTA, "white": Fore.WHITE, "black": Fore.BLACK, + "bold": Style.BRIGHT, } background_colours = { @@ -126,10 +136,10 @@ def get_ANSI_colours(): default_style = Style.BRIGHT colours = {} - for c in foreground_colours: - colours[c] = default_style + foreground_colours[c] - for d in background_colours: - colours[c + " on " + d] = foreground_colours[c] + background_colours[d] + for c, foreground_colour in foreground_colours.items(): + colours[c] = default_style + foreground_colour + for d, background_colour in background_colours.items(): + colours[c + " on " + d] = foreground_colour + background_colour colours["reset"] = Style.RESET_ALL return colours @@ -217,69 +227,44 @@ def get_size(obj, seen=None): return size -def get_moe_di_stefano_dataset(options, verbosity=0): +def imports(): """ - Function to get the default Moe and di Stefano dataset or accept a user input. - - Returns a dict containing the (JSON) data. + Generator that generates the names of all the modules that are loaded in the globals """ - json_data = None - - if "JSON" in options: - # use the JSON data passed in - json_data = options["JSON"] - - elif "file" in options: - # use the file passed in, if provided - if not os.path.isfile(options["file"]): - verbose_print( - "The provided 'file' Moe and de Stefano JSON file does not seem to exist at {}".format( - options["file"] - ), - verbosity, - 1, - ) + for _, val in globals().items(): + if isinstance(val, types.ModuleType): + yield val.__name__ - raise ValueError - if not options["file"].endswith(".json"): - verbose_print( - "Provided filename is not a json file", - verbosity, - 1, - ) - else: - # Read input data and Clean up the data if there are white spaces around the keys - with open(options["file"], "r") as data_filehandle: - datafile_data = data_filehandle.read() - datafile_data = datafile_data.replace('" ', '"') - datafile_data = datafile_data.replace(' "', '"') - datafile_data = datafile_data.replace(' "', '"') - json_data = json.loads(datafile_data) - - if not json_data: - # no JSON data or filename given, use the default 2017 dataset - verbose_print( - "Using the default Moe and de Stefano 2017 datafile", - verbosity, - 1, - ) - json_data = copy.deepcopy(moe_di_stefano_2017_data.moe_di_stefano_2017_data) +def isfloat(x): + """ + Return True if the "number" x, which could be a string, is a float, otherwise return False. + """ + try: + _ = float(x) + return True + except ValueError: + return False - return json_data +def isint(x): + """ + Return True if the "number" x, which could be a string, is an int, otherwise return False. + """ -def imports(): - for name, val in globals().items(): - if isinstance(val, types.ModuleType): - yield val.__name__ + try: + _ = int(x) + return True + except ValueError: + return False def convfloat(x): """ Convert scalar x to a float if we can, in which case return the float, otherwise just return x without changing it. Usually, x is a string, but could be anything that float() can handle without failure. """ + try: y = float(x) return y @@ -321,19 +306,19 @@ def pad_output_distribution(dist: dict, binwidth: float): return dist -class catchtime(object): +class catchtime(): """ Context manager to calculate time spent """ def __enter__(self): """On entry we start the clock""" - self.t = time.clock() + self.t = time.process_time() return self - def __exit__(self, type, value, traceback): + def __exit__(self, exc_type, exc_val, exc_tb): """On exit we stop the clock and measure the time spent""" - self.t = time.clock() - self.t + self.t = time.process_time() - self.t print("Took {}s".format(self.t)) @@ -399,7 +384,9 @@ def call_binary_c_config(argument): ######################################################## -def verbose_print(message: str, verbosity: int, minimal_verbosity: int) -> None: +def verbose_print( + message: str, verbosity: int, minimal_verbosity: int, newline: str = "\n" +) -> None: """ Function that decides whether to print a message based on the current verbosity and its minimum verbosity @@ -410,10 +397,14 @@ def verbose_print(message: str, verbosity: int, minimal_verbosity: int) -> None: message: message to print verbosity: current verbosity level minimal_verbosity: threshold verbosity above which to print + newline: newline character (or set of characters), defaults to "\n" but "\x0d" (carriage return) might be useful. """ if verbosity >= minimal_verbosity: - print(message) + if newline == "\n": + print(message) + else: + print(message, newline, sep="", end="") sys.stdout.flush() @@ -449,9 +440,15 @@ def remove_file(file: str, verbosity: int = 0) -> None: 1, ) + def get_username(): + """ + Function to get the username of the user that spawned the current process + """ + return psutil.Process().username() + def temp_dir(*args: str) -> str: """ Function to create directory within the TMP directory of the file system @@ -504,21 +501,23 @@ def create_hdf5(data_dir: str, name: str) -> None: content_data_dir = os.listdir(data_dir) # Settings - if any([file.endswith("_settings.json") for file in content_data_dir]): + if any(file.endswith("_settings.json") for file in content_data_dir): print("Adding settings to HDF5 file") settings_file = os.path.join( data_dir, [file for file in content_data_dir if file.endswith("_settings.json")][0], ) - with open(settings_file, "r") as settings_file: + with open(settings_file, "r", encoding="utf-8") as settings_file: settings_json = json.load(settings_file) # Create settings group settings_grp = hdf5_file.create_group("settings") # Write version_string to settings_group - settings_grp.create_dataset("used_settings", data=json.dumps(settings_json)) + settings_grp.create_dataset( + "used_settings", data=json.dumps(settings_json, ensure_ascii=False) + ) # Get data files data_files = [el for el in content_data_dir if el.endswith(".dat")] @@ -549,325 +548,6 @@ def create_hdf5(data_dir: str, name: str) -> None: hdf5_file.close() -######################################################## -# version_info functions -######################################################## - - -def return_binary_c_version_info(parsed: bool = True) -> Union[str, dict]: - """ - Function that returns the version information of binary_c. This function calls the function - _binary_c_bindings.return_version_info() - - Args: - parsed: Boolean flag whether to parse the version_info output of binary_c. default = False - - Returns: - Either the raw string of binary_c or a parsed version of this in the form of a nested - dictionary - """ - - found_prev = False - if "BINARY_C_MACRO_HEADER" in os.environ: - # the env var is already present. lets save that and put that back later - found_prev = True - prev_value = os.environ["BINARY_C_MACRO_HEADER"] - - # - os.environ["BINARY_C_MACRO_HEADER"] = "macroxyz" - - # Get version_info - version_info = _binary_c_bindings.return_version_info().strip() - - # parse if wanted - if parsed: - version_info = parse_binary_c_version_info(version_info) - - # delete value - del os.environ["BINARY_C_MACRO_HEADER"] - - # put stuff back if we found a previous one - if found_prev: - os.environ["BINARY_C_MACRO_HEADER"] = prev_value - - return version_info - - -def parse_binary_c_version_info(version_info_string: str) -> dict: - """ - Function that parses the binary_c version info. Long function with a lot of branches - - TODO: fix this function. stuff is missing: isotopes, macros, nucleosynthesis_sources - - Args: - version_info_string: raw output of version_info call to binary_c - - Returns: - Parsed version of the version info, which is a dictionary containing the keys: 'isotopes' for isotope info, 'argpairs' for argument pair info (TODO: explain), 'ensembles' for ensemble settings/info, 'macros' for macros, 'elements' for atomic element info, 'DTlimit' for (TODO: explain), 'nucleosynthesis_sources' for nucleosynthesis sources, and 'miscellaneous' for all those that were not caught by the previous groups. 'git_branch', 'git_build', 'revision' and 'email' are also keys, but its clear what those contain. - """ - - version_info_dict = {} - - # Clean data and put in correct shape - splitted = version_info_string.strip().splitlines() - cleaned = {el.strip() for el in splitted if not el == ""} - - ########################## - # Network: - # Split off all the networks and parse the info. - - networks = {el for el in cleaned if el.startswith("Network ")} - cleaned = cleaned - networks - - networks_dict = {} - for el in networks: - network_dict = {} - split_info = el.split("Network ")[-1].strip().split("==") - - network_number = int(split_info[0]) - network_dict["network_number"] = network_number - - network_info_split = split_info[1].split(" is ") - - shortname = network_info_split[0].strip() - network_dict["shortname"] = shortname - - if not network_info_split[1].strip().startswith(":"): - network_split_info_extra = network_info_split[1].strip().split(":") - - longname = network_split_info_extra[0].strip() - network_dict["longname"] = longname - - implementation = ( - network_split_info_extra[1].strip().replace("implemented in", "") - ) - if implementation: - network_dict["implemented_in"] = implementation.strip().split() - - networks_dict[network_number] = copy.deepcopy(network_dict) - version_info_dict["networks"] = networks_dict if networks_dict else None - - ########################## - # Isotopes: - # Split off - isotopes = {el for el in cleaned if el.startswith("Isotope ")} - cleaned = cleaned - isotopes - - isotope_dict = {} - for el in isotopes: - split_info = el.split("Isotope ")[-1].strip().split(" is ") - - isotope_info = split_info[-1] - name = isotope_info.split(" ")[0].strip() - - # Get details - mass_g = float( - isotope_info.split(",")[0].split("(")[1].split("=")[-1][:-2].strip() - ) - mass_amu = float( - isotope_info.split(",")[0].split("(")[-1].split("=")[-1].strip() - ) - mass_mev = float( - isotope_info.split(",")[-3].split("=")[-1].replace(")", "").strip() - ) - A = int(isotope_info.split(",")[-1].strip().split("=")[-1].replace(")", "")) - Z = int(isotope_info.split(",")[-2].strip().split("=")[-1]) - - # - isotope_dict[int(split_info[0])] = { - "name": name, - "Z": Z, - "A": A, - "mass_mev": mass_mev, - "mass_g": mass_g, - "mass_amu": mass_amu, - } - version_info_dict["isotopes"] = isotope_dict if isotope_dict else None - - ########################## - # Arg pairs: - # Split off - argpairs = set([el for el in cleaned if el.startswith("ArgPair")]) - cleaned = cleaned - argpairs - - argpair_dict = {} - for el in sorted(argpairs): - split_info = el.split("ArgPair ")[-1].split(" ") - - if not argpair_dict.get(split_info[0], None): - argpair_dict[split_info[0]] = {split_info[1]: split_info[2]} - else: - argpair_dict[split_info[0]][split_info[1]] = split_info[2] - - version_info_dict["argpairs"] = argpair_dict if argpair_dict else None - - ########################## - # ensembles: - # Split off - ensembles = {el for el in cleaned if el.startswith("Ensemble")} - cleaned = cleaned - ensembles - - ensemble_dict = {} - ensemble_filter_dict = {} - for el in ensembles: - split_info = el.split("Ensemble ")[-1].split(" is ") - - if len(split_info) > 1: - if not split_info[0].startswith("filter"): - ensemble_dict[int(split_info[0])] = split_info[-1] - else: - filter_no = int(split_info[0].replace("filter ", "")) - ensemble_filter_dict[filter_no] = split_info[-1] - - version_info_dict["ensembles"] = ensemble_dict if ensemble_dict else None - version_info_dict["ensemble_filters"] = ( - ensemble_filter_dict if ensemble_filter_dict else None - ) - - ########################## - # macros: - # Split off - macros = {el for el in cleaned if el.startswith("macroxyz")} - cleaned = cleaned - macros - - param_type_dict = { - "STRING": str, - "FLOAT": float, - "MACRO": str, - "INT": int, - "LONG_INT": int, - "UINT": int, - } - - macros_dict = {} - for el in macros: - split_info = el.split("macroxyz ")[-1].split(" : ") - param_type = split_info[0] - - new_split = "".join(split_info[1:]).split(" is ") - param_name = new_split[0] - param_value = " is ".join(new_split[1:]) - - # Sometimes the macros have extra information behind it. Needs an update in outputting by binary_c - try: - macros_dict[param_name] = param_type_dict[param_type](param_value) - except ValueError: - macros_dict[param_name] = str(param_value) - version_info_dict["macros"] = macros_dict if macros_dict else None - - ########################## - # Elements: - # Split off: - elements = {el for el in cleaned if el.startswith("Element")} - cleaned = cleaned - elements - - # Fill dict: - elements_dict = {} - for el in elements: - split_info = el.split("Element ")[-1].split(" : ") - name_info = split_info[0].split(" is ") - - # get isotope info - isotopes = {} - if not split_info[-1][0] == "0": - isotope_string = split_info[-1].split(" = ")[-1] - isotopes = { - int(split_isotope.split("=")[0]): split_isotope.split("=")[1] - for split_isotope in isotope_string.split(" ") - } - - elements_dict[int(name_info[0])] = { - "name": name_info[-1], - "atomic_number": int(name_info[0]), - "amt_isotopes": len(isotopes), - "isotopes": isotopes, - } - version_info_dict["elements"] = elements_dict if elements_dict else None - - ########################## - # dt_limits: - # split off - dt_limits = {el for el in cleaned if el.startswith("DTlimit")} - cleaned = cleaned - dt_limits - - # Fill dict - dt_limits_dict = {} - for el in dt_limits: - split_info = el.split("DTlimit ")[-1].split(" : ") - dt_limits_dict[split_info[1].strip()] = { - "index": int(split_info[0]), - "value": float(split_info[-1]), - } - - version_info_dict["dt_limits"] = dt_limits_dict if dt_limits_dict else None - - ########################## - # Nucleosynthesis sources: - # Split off - nucsyn_sources = {el for el in cleaned if el.startswith("Nucleosynthesis")} - cleaned = cleaned - nucsyn_sources - - # Fill dict - nucsyn_sources_dict = {} - for el in nucsyn_sources: - split_info = el.split("Nucleosynthesis source")[-1].strip().split(" is ") - nucsyn_sources_dict[int(split_info[0])] = split_info[-1] - - version_info_dict["nucleosynthesis_sources"] = ( - nucsyn_sources_dict if nucsyn_sources_dict else None - ) - - ########################## - # miscellaneous: - # All those that I didn't catch with the above filters. Could try to get some more out though. - # TODO: filter a bit more. - - misc_dict = {} - - # Filter out git revision - git_revision = [el for el in cleaned if el.startswith("git revision")] - misc_dict["git_revision"] = ( - git_revision[0].split("git revision ")[-1].replace('"', "") - ) - cleaned = cleaned - set(git_revision) - - # filter out git url - git_url = [el for el in cleaned if el.startswith("git URL")] - misc_dict["git_url"] = git_url[0].split("git URL ")[-1].replace('"', "") - cleaned = cleaned - set(git_url) - - # filter out version - version = [el for el in cleaned if el.startswith("Version")] - misc_dict["version"] = str(version[0].split("Version ")[-1]) - cleaned = cleaned - set(version) - - git_branch = [el for el in cleaned if el.startswith("git branch")] - misc_dict["git_branch"] = git_branch[0].split("git branch ")[-1].replace('"', "") - cleaned = cleaned - set(git_branch) - - build = [el for el in cleaned if el.startswith("Build")] - misc_dict["build"] = build[0].split("Build: ")[-1].replace('"', "") - cleaned = cleaned - set(build) - - email = [el for el in cleaned if el.startswith("Email")] - misc_dict["email"] = email[0].split("Email ")[-1].split(",") - cleaned = cleaned - set(email) - - other_items = set([el for el in cleaned if " is " in el]) - cleaned = cleaned - other_items - - for el in other_items: - split = el.split(" is ") - key = split[0].strip() - val = " is ".join(split[1:]).strip() - misc_dict[key] = val - - misc_dict["uncaught"] = list(cleaned) - - version_info_dict["miscellaneous"] = misc_dict if misc_dict else None - return version_info_dict - - ######################################################## # binary_c output functions ######################################################## @@ -1084,9 +764,6 @@ def get_help( This function reads out that structure and catches the different components of this output - Tasks: - - TODO: consider not returning None, but return empty dict - Args: param_name: name of the parameter that you want info from. Will get checked whether its a valid parameter name @@ -1164,20 +841,20 @@ def get_help( help_info_dict["macros"] = macros if print_help: - for key in help_info_dict: - print("{}:\n\t{}".format(key, help_info_dict[key])) + for key, value in help_info_dict.items(): + print("{}:\n\t{}".format(key, value)) return help_info_dict - else: - if not fail_silently: - print( - "{} is not a valid parameter name. Please choose from the \ - following parameters:\n\t{}".format( - param_name, list(available_arg_keys) - ) + if not fail_silently: + print( + "{} is not a valid parameter name. Please choose from the \ + following parameters:\n\t{}".format( + param_name, list(available_arg_keys) ) - return None + ) + + return {} def get_help_all(print_help: bool = True) -> dict: @@ -1206,11 +883,11 @@ def get_help_all(print_help: bool = True) -> dict: help_all_dict = {} # Select the section name and the contents of that section. Note, not all sections have content! - for i in range(len(section_nums)): + for i, section_num in enumerate(section_nums): if not i == len(section_nums) - 1: - params = cleaned[section_nums[i] + 1 : section_nums[i + 1]] + params = cleaned[section_num + 1 : section_nums[i + 1]] else: - params = cleaned[section_nums[i] + 1 : len(cleaned)] + params = cleaned[section_num + 1 : len(cleaned)] section_name = ( cleaned[section_nums[i]] .lstrip("#####") @@ -1308,21 +985,14 @@ def get_help_super(print_help: bool = False, fail_silently: bool = True) -> dict # Get help_all information help_all_dict = get_help_all(print_help=False) - for section_name in help_all_dict: - section = help_all_dict[section_name] - - # print(section_name) - # for parameter_name in section["parameters"].keys(): - # print("\t", parameter_name) + # help_all_super_dict = help_all_dict.copy() # Loop over all sections and stuff - for section_name in help_all_dict: + for section_name, section in help_all_dict.items(): # Skipping the section i/o because that one shouldn't be available to python anyway if not section_name == "i/o": - section = help_all_dict[section_name] - for parameter_name in section["parameters"].keys(): parameter = section["parameters"][parameter_name] @@ -1337,7 +1007,7 @@ def get_help_super(print_help: bool = False, fail_silently: bool = True) -> dict # check whether the descriptions of help_all and detailed help are the same if not fail_silently: if not parameter["description"] == detailed_help["description"]: - print(json.dumps(parameter, indent=4)) + print(json.dumps(parameter, indent=4, ensure_ascii=False)) ## put values into help all super dict # input type @@ -1349,13 +1019,13 @@ def get_help_super(print_help: bool = False, fail_silently: bool = True) -> dict parameter["default"] = detailed_help["default"] # macros - if "macros" in detailed_help.keys(): + if "macros" in detailed_help: parameter["macros"] = detailed_help["macros"] section["parameters"][parameter_name] = parameter if print_help: - print(json.dumps(help_all_super_dict, indent=4)) + print(json.dumps(help_all_super_dict, indent=4, ensure_ascii=False)) return help_all_super_dict @@ -1388,11 +1058,8 @@ def write_binary_c_parameter_descriptions_to_rst_file(output_file: str) -> None: """ Function that calls the get_help_super() to get the help text/descriptions for all the parameters available in that build. - Writes the results to a .rst file that can be included in the docs. - Tasks: - - TODO: add the specific version git branch, git build, git commit, and binary_c version to - this document + Writes the results to a .rst file that can be included in the docs. Args: output_file: name of the output .rst file containing the ReStructuredText formatted output @@ -1405,10 +1072,9 @@ def write_binary_c_parameter_descriptions_to_rst_file(output_file: str) -> None: build_info = make_build_text() if not output_file.endswith(".rst"): - print("Filename doesn't end with .rst, please provide a proper filename") - return None + raise ValueError("Filename ({}) doesn't end with .rst, please provide a proper filename.".format(output_file)) - with open(output_file, "w") as f: + with open(output_file, "w", encoding="utf-8") as f: print("Binary\\_c parameters", file=f) print("{}".format("=" * len("Binary\\_c parameters")), file=f) @@ -1423,7 +1089,6 @@ def write_binary_c_parameter_descriptions_to_rst_file(output_file: str) -> None: for el in arguments_dict.keys(): print("Section: {}".format(el), file=f) print("{}\n".format("-" * len("Section: {}".format(el))), file=f) - # print(arguments_dict[el]['parameters'].keys()) for arg in arguments_dict[el]["parameters"].keys(): argdict = arguments_dict[el]["parameters"][arg] @@ -1458,7 +1123,7 @@ def load_logfile(logfile: str) -> None: This function is not finished and shouldn't be used yet. Tasks: - - TODO: + - TODO: fix this function Args: - logfile: filename of the log file you want to parse @@ -1467,7 +1132,7 @@ def load_logfile(logfile: str) -> None: """ - with open(logfile, "r") as file: + with open(logfile, "r", encoding="utf-8") as file: logfile_data = file.readlines() time_list = [] diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index d4f2f830f2da13bbffeb8e777a9ef9e158999fed..815a53fe18c81332c906d1a950535556dc6419cb 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -15,117 +15,89 @@ Tasks: - TODO: consider spreading the functions over more files. - TODO: type the private functions - TODO: fix the correct object types for the default values of the bse_options - - TODO: uncomment and implement the HPC functionality - TODO: think of a clean and nice way to unload and remove the custom_logging_info library from memory (and from disk) - TODO: think of a nice way to remove the loaded grid_code/ generator from memory. """ -import argparse -import bz2 -import copy -import datetime -import json -import gc -import gzip -import importlib.util -import logging -import msgpack -import multiprocessing import os -import py_rinterpolate -import re -import resource -import setproctitle -import strip_ansi +import gc import sys import time +import copy +import json import uuid +import queue +import signal +import datetime +import functools +import traceback +import multiprocessing -_count = 0 -from typing import Union, Any from collections import ( OrderedDict, ) from collections.abc import Iterable # drop `.abc` with Python 2.7 or lower +from typing import Union, Any +import psutil import setproctitle -import py_rinterpolate - +import str2bool from colorama import init as colorama_init -colorama_init() - -from binarycpython.utils.grid_options_defaults import ( - grid_options_defaults_dict, - moe_di_stefano_default_options, - _MOE2017_VERBOSITY_LEVEL, - _CUSTOM_LOGGING_VERBOSITY_LEVEL, - _LOGGER_VERBOSITY_LEVEL, -) - -from binarycpython.utils.custom_logging_functions import ( - autogen_C_logging_code, - binary_c_log_code, - create_and_load_logging_function, -) - from binarycpython.utils.functions import ( - get_defaults, - remove_file, + check_if_in_shell, filter_arg_dict, + get_ANSI_colours, + get_defaults, get_help_all, - return_binary_c_version_info, - verbose_print, - get_moe_di_stefano_dataset, - trem, - conv_time_units, mem_use, - get_ANSI_colours, - check_if_in_shell, - format_number, timedelta, + now ) from binarycpython.utils.ensemble import ( binaryc_json_serializer, - ensemble_compression, - ensemble_file_type, extract_ensemble_json_from_string, format_ensemble_results, ) from binarycpython.utils.dicts import ( AutoVivificationDict, - custom_sort_dict, merge_dicts, - multiply_values_dict, - recursive_change_key_to_float, - recursive_change_key_to_string, - update_dicts, + keys_to_floats, ) -# from binarycpython.utils.hpc_functions import ( -# get_condor_version, -# get_slurm_version, -# create_directories_hpc, -# path_of_calling_script, -# get_python_details, -# ) - -from binarycpython.utils.distribution_functions import ( - Moecache, - LOG_LN_CONVERTER, - fill_data, - get_max_multiplicity, - Arenou2010_binary_fraction, - raghavan2010_binary_fraction, - Moe_di_Stefano_2017_multiplicity_fractions, - normalize_dict, -) +from binarycpython.utils.analytics import analytics +from binarycpython.utils.cache import cache +from binarycpython.utils.dataIO import dataIO +from binarycpython.utils.distribution_functions import distribution_functions +from binarycpython.utils.grid_logging import grid_logging +from binarycpython.utils.grid_options_defaults import grid_options_defaults +from binarycpython.utils.gridcode import gridcode +from binarycpython.utils.HPC import HPC +from binarycpython.utils.metadata import metadata +from binarycpython.utils.Moe_di_Stefano_2017 import Moe_di_Stefano_2017 +from binarycpython.utils.spacing_functions import spacing_functions +from binarycpython.utils.version_info import version_info + from binarycpython import _binary_c_bindings -secs_per_day = 86400 # probably needs to go somewhere more sensible +# Initialise the colorama stuff +colorama_init() -class Population: +class Population( + analytics, + cache, + dataIO, + distribution_functions, + grid_logging, + grid_options_defaults, + gridcode, + HPC, + metadata, + Moe_di_Stefano_2017, + spacing_functions, + version_info, +): """ Population Object. Contains all the necessary functions to set up, run and process a population of systems @@ -136,8 +108,26 @@ class Population: Initialisation function of the population class """ - # Different sections of options + # Initialise the parent classes + analytics.__init__(self) + cache.__init__(self) + dataIO.__init__(self) + distribution_functions.__init__(self) + grid_logging.__init__(self) + grid_options_defaults.__init__(self) + gridcode.__init__(self) + HPC.__init__(self) + metadata.__init__(self) + Moe_di_Stefano_2017.__init__(self) + spacing_functions.__init__(self) + version_info.__init__(self) + + # caches + self.caches = {} + self.cached_function_cache = {} + self.original_function_cache = {} + # Different sections of options # get binary_c defaults and create a cleaned up dict # Setting stuff will check against the defaults to see if the input is correct. self.defaults = get_defaults() @@ -146,42 +136,54 @@ class Population: self.special_params = [ el for el in list(self.defaults.keys()) if el.endswith("%d") ] + self.preloaded_population = None + self.signal_count = {} # make the input dictionary self.bse_options = {} # bse_options is just empty. # Grid options - self.grid_options = copy.deepcopy(grid_options_defaults_dict) + self.grid_options = copy.deepcopy(self.get_grid_options_defaults_dict()) # Custom options - self.custom_options = {} + self.custom_options = { + "save_snapshot": False, + } # grid code generation self.indent_depth = 0 self.indent_string = " " self.code_string = "" + # cached value of minimum stellar mass + self._minimum_stellar_mass = None + + # logging levels + self._LOGGER_VERBOSITY_LEVEL = 1 + self._CUSTOM_LOGGING_VERBOSITY_LEVEL = 2 + # Set the options that are passed at creation of the object self.set(**kwargs) # Load Moe and di Stefano options self.grid_options["Moe2017_options"] = copy.deepcopy( - moe_di_stefano_default_options + self.get_Moe_di_Stefano_2017_default_options() ) # Write MOE2017 options to a file. NOTE: not sure why i put this here anymore os.makedirs( os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), exist_ok=True ) - with open( + with self.open( os.path.join( os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), "moeopts.dat", ), "w", ) as f: - f.write(json.dumps(self.grid_options["Moe2017_options"], indent=4)) - f.close() + json.dump( + self.grid_options["Moe2017_options"], f, indent=4, ensure_ascii=False + ) # Argline dict self.argline_dict = {} @@ -197,7 +199,7 @@ class Population: # ANSI colours: use them if in a shell self.ANSI_colours = get_ANSI_colours() - if self.in_shell == False: + if self.in_shell is False: for c in self.ANSI_colours: self.ANSI_colours[c] = "" @@ -208,12 +210,54 @@ class Population: self.process_ID = 0 # Create location to store results. Users should write to this dictionary. - # The AutoVivificationDict allows for perls method of accessing possibly non-existant subdicts + # The AutoVivificationDict allows for Perl-like addition of possibly + # non-existant subdicts. self.grid_results = AutoVivificationDict() # Create location where ensemble results are written to self.grid_ensemble_results = {} + # add metadata + self.add_system_metadata() + + def jobID(self): + """ + Function to return the job ID number of this process + + Normal processes return their process ID (PID) + HPC processes return whatever HPC_jobID() gives. + """ + if self.HPC_job(): + jobID = self.HPC_jobID() + else: + jobID = "{}".format(self.process_ID) + return jobID + + def exit(self, code=None, message=True, flush=True, stacktrace=False): + """ + Exit function: use this to exit from a Population object. + Really it's just a wrapper for sys.exit() to return the correct exit code, + but also to post a message (if message is True, default is True) + and perhaps a stacktrace (if stacktrace is True, default is False). + """ + # if we've been killed, set exit code to 1 + if self.grid_options["exit_code"] == 0 and self.grid_options["_killed"]: + self.grid_options["exit_code"] = 1 + # but override with code passed in + if code: + self.grid_options["exit_code"] = code + if message: + print( + "exit from binary_c-python Population with code {}".format( + self.grid_options["exit_code"] + ) + ) + if flush: + sys.stdout.flush() + if stacktrace or self.grid_options["print_stack_on_exit"]: + traceback.print_stack() + sys.exit(self.grid_options["exit_code"]) + ################################################### # Argument functions ################################################### @@ -253,24 +297,21 @@ class Population: # Go over all the input for key in kwargs: # Filter out keys for the bse_options - if key in self.defaults.keys(): - verbose_print( + if key in self.defaults: + self.verbose_print( "adding: {}={} to BSE_options".format(key, kwargs[key]), self.grid_options["verbosity"], - 1, + 2, ) self.bse_options[key] = kwargs[key] # Extra check to check if the key fits one of parameter names that end with %d + # TODO: abstract this function elif any( - [ - True - if (key.startswith(param[:-2]) and len(param[:-2]) < len(key)) - else False - for param in self.special_params - ] + bool(key.startswith(param[:-2]) and len(param[:-2]) < len(key)) + for param in self.special_params ): - verbose_print( + self.verbose_print( "adding: {}={} to BSE_options by catching the %d".format( key, kwargs[key] ), @@ -281,7 +322,7 @@ class Population: # Filter out keys for the grid_options elif key in self.grid_options.keys(): - verbose_print( + self.verbose_print( "adding: {}={} to grid_options".format(key, kwargs[key]), self.grid_options["verbosity"], 1, @@ -290,7 +331,7 @@ class Population: # The of the keys go into a custom_options dict else: - verbose_print( + self.verbose_print( "<<<< Warning: Key does not match previously known parameter: \ adding: {}={} to custom_options >>>>".format( key, kwargs[key] @@ -320,7 +361,7 @@ class Population: cmdline_args = sys.argv[1:] if cmdline_args: - verbose_print( + self.verbose_print( "Found cmdline args. Parsing them now", self.grid_options["verbosity"], 1, @@ -334,47 +375,93 @@ class Population: cmdline_dict = {} for cmdline_arg in cmdline_args: split = cmdline_arg.split("=") - parameter = split[0] - value = split[1] - old_value_found = False - - # Find an old value - if parameter in self.grid_options: - old_value = self.grid_options[parameter] - old_value_found = True - - elif parameter in self.defaults: - old_value = self.defaults[parameter] - old_value_found = True - - elif parameter in self.custom_options: - old_value = self.custom_options[parameter] - old_value_found = True - - # (attempt to) convert - if old_value_found: - try: - verbose_print( - "Converting type of {} from {} to {}".format( - parameter, type(value), type(old_value) - ), - self.grid_options["verbosity"], - 2, - ) - value = type(old_value)(value) - verbose_print("Success!", self.grid_options["verbosity"], 2) - except ValueError: - verbose_print( - "Tried to convert the given parameter {}/value {} to its correct type {} (from old value {}). But that wasn't possible.".format( - parameter, value, type(old_value), old_value - ), - self.grid_options["verbosity"], - 0, - ) + if len(split) == 2: + parameter = split[0] + value = split[1] + old_value_found = False + + # Find an old value + if parameter in self.grid_options: + old_value = self.grid_options[parameter] + old_value_found = True + + elif parameter in self.defaults: + old_value = self.defaults[parameter] + old_value_found = True + + elif parameter in self.custom_options: + old_value = self.custom_options[parameter] + old_value_found = True + + # (attempt to) convert type + if old_value_found: + if old_value is not None: + try: + self.verbose_print( + "Converting type of {} from {} to {}".format( + parameter, type(value), type(old_value) + ), + self.grid_options["verbosity"], + 3, + ) + try: + if isinstance(old_value, bool): + value = str2bool.str2bool(value) + else: + value = type(old_value)(value) + self.verbose_print( + "Success!", self.grid_options["verbosity"], 2 + ) + except Exception as e: + print( + "Failed to convert {param} value with type {type}: old_value is '{old}', new value is '{new}', {e}".format( + param=parameter, + old=old_value, + type=type(old_value), + new=split[1], + e=e, + ) + ) + self.exit(code=1) + + except ValueError: + + # might be able to eval the parameter, e.g. + # an expression like "2-1" can eval to "1" + # which would be valid + try: + evaled = eval(value) + value = type(old_value)(evaled) + self.verbose_print( + "Success! (evaled)", + self.grid_options["verbosity"], + 2, + ) + + except ValueError: + self.verbose_print( + "Tried to convert the given parameter {}/value {} to its correct type {} (from old value {}). But that wasn't possible.".format( + parameter, value, type(old_value), old_value + ), + self.grid_options["verbosity"], + 0, + ) + # Add to dict + self.verbose_print( + "setting {} = {} ".format(parameter, value), + self.grid_options["verbosity"], + 3, + ) + cmdline_dict[parameter] = value - # Add to dict - cmdline_dict[parameter] = value + else: + print( + "Error: I do not know how to process", + cmdline_arg, + " : cmdline args should be in the format x=y, yours appears not to be.", + ) + self.exit(1) # unpack the dictionary into the setting function that handles where the values are set self.set(**cmdline_dict) @@ -392,253 +479,8 @@ class Population: for param_name in sorted(parameter_dict): argline += "{} {} ".format(param_name, parameter_dict[param_name]) argline = argline.strip() - return argline - - def _last_grid_variable(self): - """ - Function that returns the last grid variable - (i.e. the one with the highest grid_variable_number) - """ - - number = len(self.grid_options["_grid_variables"]) - for grid_variable in self.grid_options["_grid_variables"]: - if ( - self.grid_options["_grid_variables"][grid_variable][ - "grid_variable_number" - ] - == number - 1 - ): - return grid_variable - - def update_grid_variable(self, name: str, **kwargs) -> None: - """ - Function to update the values of a grid variable. - - Args: - name: - name of the grid variable to be changed. - **kwargs: - key-value pairs to override the existing grid variable data. See add_grid_variable for these names. - """ - - grid_variable = None - try: - grid_variable = self.grid_options["_grid_variables"][name] - except KeyError: - msg = "Unknown grid variable {} - please create it with the add_grid_variable() method.".format( - name - ) - raise KeyError(msg) - - for key, value in kwargs.items(): - grid_variable[key] = value - verbose_print( - "Updated grid variable: {}".format(json.dumps(grid_variable, indent=4)), - self.grid_options["verbosity"], - 1, - ) - - def delete_grid_variable( - self, - name: str, - ) -> None: - try: - del self.grid_options["_grid_variables"][name] - verbose_print( - "Deleted grid variable: {}".format(name), - self.grid_options["verbosity"], - 1, - ) - except: - msg = "Failed to remove grid variable {} : please check it exists.".format( - name - ) - raise ValueError(msg) - - def rename_grid_variable(self, oldname: str, newname: str) -> None: - """ - Function to rename a grid variable. - - note: this does NOT alter the order - of the self.grid_options["_grid_variables"] dictionary. - - The order in which the grid variables are loaded into the grid is based on their - `grid_variable_number` property - - Args: - oldname: - old name of the grid variable - newname: - new name of the grid variable - """ - - try: - self.grid_options["_grid_variables"][newname] = self.grid_options[ - "_grid_variables" - ].pop(oldname) - self.grid_options["_grid_variables"][newname]["name"] = newname - verbose_print( - "Rename grid variable: {} to {}".format(oldname, newname), - self.grid_options["verbosity"], - 1, - ) - except: - msg = "Failed to rename grid variable {} to {}.".format(oldname, newname) - raise ValueError(msg) - def add_grid_variable( - self, - name: str, - parameter_name: str, - longname: str, - valuerange: Union[list, str], - samplerfunc: str, - probdist: str, - dphasevol: Union[str, int] = -1, - gridtype: str = "centred", - branchpoint: int = 0, - branchcode: Union[str, None] = None, - precode: Union[str, None] = None, - postcode: Union[str, None] = None, - topcode: Union[str, None] = None, - bottomcode: Union[str, None] = None, - condition: Union[str, None] = None, - ) -> None: - """ - Function to add grid variables to the grid_options. - - The execution of the grid generation will be through a nested for loop. - Each of the grid variables will get create a deeper for loop. - - The real function that generates the numbers will get written to a new file in the TMP_DIR, - and then loaded imported and evaluated. - beware that if you insert some destructive piece of code, it will be executed anyway. - Use at own risk. - - Tasks: - - TODO: Fix this complex function. - - Args: - name: - name of parameter used in the grid Python code. - This is evaluated as a parameter and you can use it throughout - the rest of the function - - Examples: - name = 'lnm1' - - parameter_name: - name of the parameter in binary_c - - This name must correspond to a Python variable of the same name, - which is automatic if parameter_name == name. - - Note: if parameter_name != name, you must set a - variable in "precode" or "postcode" to define a Python variable - called parameter_name - - longname: - Long name of parameter - - Examples: - longname = 'Primary mass' - range: - Range of values to take. Does not get used really, the samplerfunc is used to - get the values from - - Examples: - range = [math.log(m_min), math.log(m_max)] - samplerfunc: - Function returning a list or numpy array of samples spaced appropriately. - You can either use a real function, or a string representation of a function call. - - Examples: - samplerfunc = "const(math.log(m_min), math.log(m_max), {})".format(resolution['M_1']) - - precode: - Extra room for some code. This code will be evaluated within the loop of the - sampling function (i.e. a value for lnm1 is chosen already) - - Examples: - precode = 'M_1=math.exp(lnm1);' - postcode: - Code executed after the probability is calculated. - probdist: - Function determining the probability that gets assigned to the sampled parameter - - Examples: - probdist = 'Kroupa2001(M_1)*M_1' - dphasevol: - part of the parameter space that the total probability is calculated with. Put to -1 - if you want to ignore any dphasevol calculations and set the value to 1 - Examples: - dphasevol = 'dlnm1' - condition: - condition that has to be met in order for the grid generation to continue - Examples: - condition = 'self.grid_options['binary']==1' - gridtype: - Method on how the value range is sampled. Can be either 'edge' (steps starting at - the lower edge of the value range) or 'centred' - (steps starting at lower edge + 0.5 * stepsize). - - topcode: - Code added at the very top of the block. - - bottomcode: - Code added at the very bottom of the block. - """ - - # check parameters - if False and dphasevol != -1.0 and gridtype == 'discrete': - print("Error making grid: you have set the phasevol to be not -1 and gridtype to discrete, but a discrete grid has no phasevol calculation. You should only set the gridtype to discrete and not set the phasevol in this case.") - sys.exit() - - # Add grid_variable - grid_variable = { - "name": name, - "parameter_name": parameter_name, - "longname": longname, - "valuerange": valuerange, - # "resolution": 0, - "samplerfunc": samplerfunc, - "precode": precode, - "postcode": postcode, - "probdist": probdist, - "dphasevol": dphasevol, - "condition": condition, - "gridtype": gridtype, - "branchpoint": branchpoint, - "branchcode": branchcode, - "topcode": topcode, - "bottomcode": bottomcode, - "grid_variable_number": len(self.grid_options["_grid_variables"]), - } - - # Check for gridtype input - allowed_gridtypes = [ - "edge", - "right", - "right edge", - "left", - "left edge", - "centred", - "centre", - "center", - "discrete" - ] - if not gridtype in allowed_gridtypes: - msg = "Unknown gridtype {gridtype}. Please choose one of: ".format(gridtype=gridtype) + ','.join(allowed_gridtypes) - raise ValueError(msg) - - # Load it into the grid_options - self.grid_options["_grid_variables"][grid_variable["name"]] = grid_variable - - verbose_print( - "Added grid variable: {}".format(json.dumps(grid_variable, indent=4)), - self.grid_options["verbosity"], - 1, - ) + return argline ################################################### # Return functions @@ -661,15 +503,6 @@ class Population: return options - def return_binary_c_version_info(self, parsed=False): - """ - Function that returns the version information of binary_c - """ - - version_info = return_binary_c_version_info(parsed=parsed) - - return version_info - def return_binary_c_defaults(self): """ Function that returns the defaults of the binary_c version that is used. @@ -717,7 +550,7 @@ class Population: all_info["binary_c_defaults"] = binary_c_defaults if include_binary_c_version_info: - binary_c_version_info = return_binary_c_version_info(parsed=True) + binary_c_version_info = self.return_binary_c_version_info(parsed=True) all_info["binary_c_version_info"] = binary_c_version_info if include_binary_c_help_all: @@ -734,14 +567,13 @@ class Population: include_binary_c_defaults: bool = True, include_binary_c_version_info: bool = True, include_binary_c_help_all: bool = True, + ensure_ascii: str = False, + indent: int = 4, ) -> Union[str, None]: """ Function that exports the all_info to a JSON file Tasks: - - TODO: if any of the values in the dicts here is of a not-serialisable form, then we - need to change that to a string or something so, use a recursive function that - goes over the all_info dict and finds those that fit - TODO: Fix to write things to the directory. which options do which etc - TODO: there's flawed logic here. rewrite this part pls - TODO: consider actually just removing the whole 'output to file' part and let the @@ -761,6 +593,9 @@ class Population: <custom_options["base_filename"]>_settings.json. Otherwise a file called simulation_<date+time>_settings.json will be created outfile: if use_datadir is false, a custom filename will be used + ensure_ascii: the ensure_ascii flag passed to json.dump and/or json.dumps + (Default: False) + indent: indentation passed to json.dump and/or json.dumps (default 4) """ all_info = self.return_all_info( @@ -776,17 +611,14 @@ class Population: if use_datadir: if self.custom_options.get("data_dir", None): if not self.custom_options.get("base_filename", None): - base_name = "simulation_{}".format( - datetime.datetime.strftime( - datetime.datetime.now(), "%Y%m%d_%H%M%S" - ) - ) + base_name = "simulation_{}".format(now(style="nospace")) else: base_name = os.path.splitext(self.custom_options["base_filename"])[ 0 ] - settings_name = base_name + "_settings.json" + # save settings as gzipped JSON + settings_name = base_name + "_settings.json.gz" # Check directory, make if necessary os.makedirs(self.custom_options["data_dir"], exist_ok=True) @@ -795,185 +627,97 @@ class Population: self.custom_options["data_dir"], settings_name ) - verbose_print( - "Writing settings to {}".format(settings_fullname), - self.grid_options["verbosity"], - 1, - ) - # if not outfile.endswith('json'): - with open(settings_fullname, "w") as file: - file.write( - json.dumps( - all_info_cleaned, - indent=4, - default=binaryc_json_serializer, - ) + print("ok") + + # open locked settings file, then output if we get the lock + (f, lock) = self.locked_open_for_write(settings_fullname, vb=True) + print("ok") + + if lock and f: + self.verbose_print( + "Writing settings to {}".format(settings_fullname), + self.grid_options["verbosity"], + 1, + ) + json.dump( + all_info_cleaned, + f, + indent=indent, + default=binaryc_json_serializer, + ensure_ascii=ensure_ascii, ) + print("ok pre") + self.locked_close(f, lock) + print("ok ret") return settings_fullname - else: - msg = "Exporting all info without passing a value for `outfile` requires custom_options['data_dir'] to be present. That is not the cause. Either set the `data_dir` or pass a value for `outfile` " - raise ValueError + + # TODO: turn it around and have the exception be within the if statement + msg = "Exporting all info without passing a value for `outfile` requires custom_options['data_dir'] to be present. That is not the cause. Either set the `data_dir` or pass a value for `outfile` " + raise ValueError(msg) else: - verbose_print( + self.verbose_print( "Writing settings to {}".format(outfile), self.grid_options["verbosity"], 1, ) if not outfile.endswith("json"): - verbose_print( + self.verbose_print( "Error: outfile ({}) must end with .json".format(outfile), self.grid_options["verbosity"], 0, ) raise ValueError - with open(outfile, "w") as file: - file.write( - json.dumps( - all_info_cleaned, indent=4, default=binaryc_json_serializer - ) + with self.open(outfile, "w") as file: + json.dump( + all_info_cleaned, + file, + indent=indent, + default=binaryc_json_serializer, + ensure_ascii=ensure_ascii, ) return outfile - def _boxed(self, *list, colour="yellow on black", boxchar="*", separator="\n"): - """ - Function to output a list of strings in a single box. - - Args: - list = a list of strings to be output. If these contain the separator - (see below) these strings are split by it. - separator = strings are split on this, default "\n" - colour = the colour to be used, usually this is 'yellow on black' - as set in the ANSI_colours dict - boxchar = the character used to make the box, '*' by default - - Note: handles tabs (\t) badly, do not use them! - """ - strlen = 0 - strings = [] - lengths = [] - - # make a list of strings - if separator: - for l in list: - strings += l.split(sep=separator) - else: - strings = list - - # get lengths without ANSI codes - for string in strings: - lengths.append(len(strip_ansi.strip_ansi(string))) - - # hence the max length - strlen = max(lengths) - strlen += strlen % 2 - header = boxchar * (4 + strlen) - - # start output - out = self.ANSI_colours[colour] + header + "\n" - - # loop over strings to output, padding as required - for n, string in enumerate(strings): - if lengths[n] % 2 == 1: - string = " " + string - pad = " " * int((strlen - lengths[n]) / 2) - out = out + boxchar + " " + pad + string + pad + " " + boxchar + "\n" - # close output and return - out = out + header + "\n" + self.ANSI_colours["reset"] - return out - - def _set_custom_logging(self): - """ - Function/routine to set all the custom logging so that the function memory pointer - is known to the grid. + ################################################### + # Evolution functions + ################################################### - When the memory adress is loaded and the library file is set we'll skip rebuilding the library + def _set_nprocesses(self): """ + Function to set the number of processes used in multiprocessing. - # Only if the values are the 'default' unset values - if ( - self.grid_options["custom_logging_func_memaddr"] == -1 - and self.grid_options["_custom_logging_shared_library_file"] is None - ): - verbose_print( - "Creating and loading custom logging functionality", - self.grid_options["verbosity"], - 1, - ) - # C_logging_code gets priority of C_autogen_code - if self.grid_options["C_logging_code"]: - # Generate entire shared lib code around logging lines - custom_logging_code = binary_c_log_code( - self.grid_options["C_logging_code"], - verbosity=self.grid_options["verbosity"] - - (_CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), - ) - - # Load memory address - ( - self.grid_options["custom_logging_func_memaddr"], - self.grid_options["_custom_logging_shared_library_file"], - ) = create_and_load_logging_function( - custom_logging_code, - verbosity=self.grid_options["verbosity"] - - (_CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), - custom_tmp_dir=self.grid_options["tmp_dir"], - ) - - elif self.grid_options["C_auto_logging"]: - # Generate real logging code - logging_line = autogen_C_logging_code( - self.grid_options["C_auto_logging"], - verbosity=self.grid_options["verbosity"] - - (_CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), - ) + If grid_options['num_cores'] <= 0, set automatically - # Generate entire shared lib code around logging lines - custom_logging_code = binary_c_log_code( - logging_line, - verbosity=self.grid_options["verbosity"] - - (_CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), - ) + If grid_options['num_cores'] is 0, we use as many as we have available + """ + # backwards compatibility + if "amt_cores" in self.grid_options: + self.grid_options["num_processes"] = self.grid_options["amt_cores"] + self.grid_options["num_cores"] = self.grid_options["amt_cores"] - # Load memory address - ( - self.grid_options["custom_logging_func_memaddr"], - self.grid_options["_custom_logging_shared_library_file"], - ) = create_and_load_logging_function( - custom_logging_code, - verbosity=self.grid_options["verbosity"] - - (_CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), - custom_tmp_dir=self.grid_options["tmp_dir"], - ) + if self.grid_options["num_cores"] == 0: + # use all logical cores available to us + self.grid_options["num_processes"] = max(1, psutil.cpu_count(logical=True)) + elif self.grid_options["num_cores"] == -1: + # use all physical cores available to us + self.grid_options["num_processes"] = max(1, psutil.cpu_count(logical=False)) else: - verbose_print( - "Custom logging library already loaded. Not setting them again.", - self.grid_options["verbosity"], - 1, - ) - - ################################################### - # Ensemble functions - ################################################### + # manually specify number of cores made available + self.grid_options["num_processes"] = self.grid_options["num_cores"] - # Now they are stored in the _process_run_population thing. - # Needed less code since they all - - ################################################### - # Evolution functions - ################################################### - - def _pre_run_cleanup(self) -> None: + def _pre_run_setup(self) -> None: """ Function to clean up some stuff in the grid before a run (like results, ensemble results etc) """ # empty results - self.grid_options["results"] = {} self.grid_results = AutoVivificationDict() self.grid_ensemble_results = {} + # set number of processes/cores we want to use + self._set_nprocesses() + # Reset the process ID (should not have a value initially, but can't hurt if it does) self.process_ID = 0 @@ -985,7 +729,6 @@ class Population: # set previous logging time _t = time.time() - self.shared_memory["prev_log_time"] = multiprocessing.Array( "d", [_t] * self.grid_options["n_logging_stats"] ) @@ -997,85 +740,154 @@ class Population: # arrays to store memory and max memory use per-thread mem = 1.0 * mem_use() - self.shared_memory["memory_use_per_thread"] = multiprocessing.Array( - "d", [mem] * self.grid_options["num_cores"] - ) - self.shared_memory["max_memory_use_per_thread"] = multiprocessing.Array( - "d", [mem] * self.grid_options["num_cores"] - ) + for x in ["", "max_"]: + self.shared_memory[x + "memory_use_per_thread"] = multiprocessing.Array( + "d", [mem] * self.grid_options["num_processes"] + ) + + ############################################################ + # set and check default directory locations + ############################################################ + + # check tmp_dir exists + if self.grid_options["tmp_dir"] is None or not os.path.isdir( + self.grid_options["tmp_dir"] + ): + print( + "grid_options['tmp_dir'] is not set or it is not a directory : this should point to a temporary directory location, preferably local to your CPUs" + ) + self.exit(code=1) + + # check any HPC requirements are met + if self.HPC_job() and not self.HPC_check_requirements()[0]: + print(self.HPC_check_requirements()[1]) + self.exit(code=1) + + # default status_dir and cache_dir to be in tmp_dir + # + # NOTE: binary_c-python uses its own status_dir, which is not + # the same dir as HPC jobs use (so tmp_dir can be local + # to an HPC job, while the HPC status dir is common to + # all jobs) + for x in ["status", "cache"]: + if self.grid_options[x + "_dir"] is None: + self.grid_options[x + "_dir"] = os.path.join( + self.grid_options["tmp_dir"], x + ) + + # make list of directories we want to use + dirs = ["tmp_dir", "status_dir", "cache_dir"] + self.HPC_dirs() + + for dir in dirs: + # try to make directories if they don't exist + path = self.grid_options[dir] + if path is not None: + os.makedirs(path, exist_ok=True) + + # check directories exist and can be written to + if path is not None and self.dir_ok(path) is False: + print( + "Directory {dir} currently set to {path} cannot be written to. Please check that this directory is correct and you have write access.".format( + dir=dir, path=path + ) + ) + self.exit(code=1) + + # Make sure the subdirs of the tmp dir exist + subdirs = [ + "failed_systems", + "current_system", + "process_summary", + "runtime_systems", + "snapshots", + ] + for subdir in subdirs: + path = os.path.join(self.grid_options["tmp_dir"], subdir) + os.makedirs(path, exist_ok=True) + if self.dir_ok(path) is False: + print( + "Sub-Directory {subdir} (in tmp_dir) currently set to {path} cannot be written to. Please check that this directory is correct and you have write access.".format( + subdir=subdir, path=path + ) + ) + self.exit(code=1) + + # restore from existing HPC files + self.HPC_restore() + + # set up function cache + self.setup_function_cache() + + return def clean(self) -> None: """ Clean the contents of the population object so it can be reused. - Calling _pre_run_cleanup() + Calling _pre_run_setup() TODO: decide to deprecate this function """ - self._pre_run_cleanup() + self._pre_run_setup() def evolve(self) -> None: """ Entry point function of the whole object. From here, based on the settings, - we set up a SLURM or CONDOR grid, or if no setting is given we go straight - to evolving the population. + we set up a grid and (probably) evolve the population. - There are no direct arguments to this function, rather it is based on the grid_options settings: - grid_options['slurm']: integer Boolean whether to use a slurm_grid evolution - grid_options['condor']: integer Boolean whether to use a condor_grid evolution + There are no direct arguments to this function, the grid_options + contain all the relevant settings. - If neither of the above is set, we continue without using HPC routines - (that doesn't mean this cannot be run on a server with many cores) - - Returns an dictionary containing the analytics of the run - - TODO: change the way this is done. Slurm & CONDOR should probably do this differently - NOTE: SLURM and CONDOR options are not working properly yet + Returns: + a dictionary containing the analytics of the run. """ # Just to make sure we don't have stuff from a previous run hanging around - self._pre_run_cleanup() - - # Check which type: - if self.grid_options["slurm"] == 1: - # Execute Slurm subroutines - # self._slurm_grid() - raise ValueError("Slurm evolution not available at this moment") - - elif self.grid_options["condor"] == 1: - # Execute condor subroutines - # self._condor_grid() - raise ValueError("Condor evolution not available at this moment") + self._pre_run_setup() + + if self.HPC_job(): + # run HPC grid: if this returns True, then exit immediately + self.grid_options["symlink_latest_gridcode"] = False + if self.HPC_grid(): + self.exit(code=0) + + if self.grid_options["evolution_type"] == "join": + # join previously calculated data and return immediately + self.HPC_join_previous() + return + + # Execute population evolution subroutines + self._evolve_population() + + # make analytics information + analytics_dict = self.make_analytics_dict() + + if self.HPC_job(): + self.HPC_dump_status("HPC grid after analytics") + + if self.custom_options["save_snapshot"]: + # we must save a snapshot, not the population object + # ... also save the new starting point: this has to take into + # account where we originally started, and that the modulo may + # not be == 1. + self.grid_options["start_at"] = ( + self.grid_options["start_at"] + + self.grid_options["_count"] * self.grid_options["modulo"] + ) + # then save the snapshot + self.save_snapshot() + exitcode = 1 if self.was_killed() else 0 + self.exit(code=exitcode) - else: - # Execute population evolution subroutines - self._evolve_population() - - # Put all interesting stuff in a variable and output that afterwards, as analytics of the run. - analytics_dict = { - "population_name": self.grid_options["_population_id"], - "evolution_type": self.grid_options["evolution_type"], - "failed_count": self.grid_options["_failed_count"], - "failed_prob": self.grid_options["_failed_prob"], - "failed_systems_error_codes": self.grid_options[ - "_failed_systems_error_codes" - ].copy(), - "errors_exceeded": self.grid_options["_errors_exceeded"], - "errors_found": self.grid_options["_errors_found"], - "total_probability": self.grid_options["_probtot"], - "total_count": self.grid_options["_count"], - "start_timestamp": self.grid_options["_start_time_evolution"], - "end_timestamp": self.grid_options["_end_time_evolution"], - "total_mass_run": self.grid_options["_total_mass_run"], - "total_probability_weighted_mass_run": self.grid_options[ - "_total_probability_weighted_mass_run" - ], - "zero_prob_stars_skipped": self.grid_options["_zero_prob_stars_skipped"], - } + # Save object to a pickle file + elif self.grid_options["save_population_object"]: + self.save_population_object() - # Add analytics dict to the metadata too: - self.grid_ensemble_results["metadata"].update(analytics_dict) + # if we're running an HPC grid, exit here + # unless we're joining + if self.HPC_job() and self.grid_options["evolution_type"] != "join": + self.exit() ## # Clean up code: remove files, unset values, unload interpolators etc. This is placed in the general evolve function, @@ -1097,22 +909,20 @@ class Population: - TODO: include options for different ways of generating a population here. (i.e. MC or source file) """ - ## + ############################################################ # Prepare code/initialise grid. # set custom logging, set up store_memaddr, build grid code. dry run grid code. - self._setup() + if self._setup() is False: + return - ## - # Evolve systems: via grid_options one can choose to do this linearly, or - # multiprocessing method. + ############################################################ + # Evolve systems + self.set_time("start") if ( self.grid_options["evolution_type"] in self.grid_options["_evolution_type_options"] ): - if self.grid_options["evolution_type"] == "grid": - self._evolve_population_grid() - elif self.grid_options["evolution_type"] == "custom_generator": - # Use the same as the normal grid evolution but just a different generator + if self.grid_options["evolution_type"] in ["grid", "custom_generator"]: self._evolve_population_grid() # elif self.grid_options["evolution_type"] == "mc": @@ -1124,30 +934,40 @@ class Population: self.grid_options["_evolution_type_options"] ) ) + self.set_time("end") - # finished! - self.grid_options["_end_time_evolution"] = time.time() - + ############################################################ # Log and print some information - dtsecs = ( - self.grid_options["_end_time_evolution"] - - self.grid_options["_start_time_evolution"] - ) string1 = "Population-{} finished!\nThe total probability is {:g}.".format( self.grid_options["_population_id"], self.grid_options["_probtot"] ) - string2 = "It took a total of {dtsecs} to run {starcount} systems on {ncores} cores\n = {totaldtsecs} of CPU time.\nMaximum memory use {memuse:.3f} MB".format( - dtsecs=timedelta(dtsecs), - starcount=self.grid_options["_total_starcount"], - ncores=self.grid_options["num_cores"], - totaldtsecs=timedelta(dtsecs * self.grid_options["num_cores"]), + string2 = "It took a total of {dtsecs} to run {starcount} systems on {ncores} cores\n = {CPUtime} of CPU time.\nMaximum memory use {memuse:.3f} MB".format( + dtsecs=timedelta(self.grid_options["_time_elapsed"]), + starcount=self.grid_options[ + "_count" + ], # not _total_count! we may have ended the run early... + ncores=self.grid_options["num_processes"], + CPUtime=timedelta(self.CPU_time()), memuse=sum(self.shared_memory["max_memory_use_per_thread"]), ) - verbose_print(self._boxed(string1, string2), self.grid_options["verbosity"], 0) + ############################################################ + # add warning about a grid that was killed + ############################################################ + if self.was_killed(): + string2 += "\n>>> Grid was killed <<<" + self.set_status("killed") + + self.verbose_print( + self._boxed(string1, string2), self.grid_options["verbosity"], 0 + ) + + ############################################################ + # handle errors + ############################################################ if self.grid_options["_errors_found"]: # Some information afterwards - verbose_print( + self.verbose_print( self._boxed( "During the run {} failed systems were found\nwith a total probability of {:g}\nwith the following unique error codes: {} ".format( self.grid_options["_failed_count"], @@ -1159,7 +979,7 @@ class Population: 0, ) # Some information afterwards - verbose_print( + self.verbose_print( "The full argline commands for {} these systems have been written to {}".format( "ALL" if not self.grid_options["_errors_exceeded"] @@ -1175,36 +995,26 @@ class Population: 0, ) else: - verbose_print( - "There were no errors found in this run.", + self.verbose_print( + "No failed systems were found in this run.", self.grid_options["verbosity"], 0, ) - def _get_stream_logger(self, level=logging.DEBUG): - """Return logger with configured StreamHandler.""" - stream_logger = logging.getLogger("stream_logger") - stream_logger.handlers = [] - stream_logger.setLevel(level) - sh = logging.StreamHandler() - sh.setLevel(level) - fmt = "[%(asctime)s %(levelname)-8s %(processName)s] --- %(message)s" - formatter = logging.Formatter(fmt) - sh.setFormatter(formatter) - stream_logger.addHandler(sh) - - return stream_logger - - def _system_queue_filler(self, job_queue, num_cores): + return + + def _system_queue_filler(self, job_queue, num_processes): """ Function that is responsible for keeping the queue filled. This will generate the systems until it is full, and then keeps trying to fill it. Will have to play with the size of this. + + This function is called as part of the parent process. """ stream_logger = self._get_stream_logger() - if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL: + if self.grid_options["verbosity"] >= self._LOGGER_VERBOSITY_LEVEL: stream_logger.debug(f"setting up the system_queue_filler now") # Setup of the generator @@ -1220,35 +1030,73 @@ class Population: self, print_results=False ) - # TODO: build in method to handle with the HPC. - # Continuously fill the queue + # start_at can be an expression : we should eval it + # prior to running the loop + self.grid_options["start_at"] = eval(str(self.grid_options["start_at"])) + if self.grid_options["start_at"] > 0: + print("Starting at model {} ".format(self.grid_options["start_at"])) + + # Continuously fill the queue while we are allowed to for system_number, system_dict in enumerate(generator): + if self.grid_options["stop_queue"]: + break + + # skip systems before start_at + elif system_number < self.grid_options["start_at"]: + self.verbose_print( + "skip system {n} because < start_at = {start}".format( + n=system_number, start=self.grid_options["start_at"] + ), + self.grid_options["verbosity"], + 3, + ) + continue - # skip systems before start_at, and apply modulo + # apply modulo if not ( - system_number >= self.grid_options["start_at"] - and (system_number - self.grid_options["start_at"]) + (system_number - self.grid_options["start_at"]) % self.grid_options["modulo"] == 0 ): + self.verbose_print( + "skip system {n} because modulo {mod} == {donemod}".format( + n=system_number, + mod=self.grid_options["modulo"], + donemod=(system_number - self.grid_options["start_at"]) + % self.grid_options["modulo"], + ), + self.grid_options["verbosity"], + 3, + ) + continue # Put job in queue - job_queue.put((system_number, system_dict)) + if self.grid_options["stop_queue"]: + break + else: + try: + job_queue.put((system_number, system_dict), block=True) + except Exception as e: + # error on queueing : stop the queue + self.grid_options["stop_queue"] = True + + # Print some info + self.verbose_print( + "Queue produced system {}".format(system_number), + self.grid_options["verbosity"], + 3, + ) - # Print some info - verbose_print( - "Queue produced system {}".format(system_number), - self.grid_options["verbosity"], - 3, - ) + self.grid_options["_queue_done"] = True # Send closing signal to workers. When they receive this they will terminate - if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL: + if self.grid_options["verbosity"] >= self._LOGGER_VERBOSITY_LEVEL: stream_logger.debug(f"Signalling processes to stop") # DEBUG - for _ in range(num_cores): - job_queue.put("STOP") + if True: # not self.grid_options['stop_queue']: + for _ in range(num_processes): + job_queue.put("STOP") def _evolve_population_grid(self): """ @@ -1256,7 +1104,7 @@ class Population: First we set up the multiprocessing manager and the job and result queue. - Then we spawn <self.grid_options["num_cores"]> number of process instances, + Then we spawn <self.grid_options["num_processes"]> number of process instances, and signal them to start. While the processes are waiting for their instructions, we start the queue filler, @@ -1267,25 +1115,35 @@ class Population: When all the systems have been put in the queue we pass a STOP signal that will make the processes wrap up. + We then add any previous population + We read out the information in the result queue and store them in the grid object """ # Set process name setproctitle.setproctitle("binarycpython parent process") + # if max_queue_size is zero, calculate automatically + # to be double the number of processes - you don't want to + # make the queue too large because when it's killed you + # want to end quickly + if self.grid_options["max_queue_size"] == 0: + self.grid_options["max_queue_size"] = 2 * self.grid_options["num_processes"] + # Set up the manager object that can share info between processes manager = multiprocessing.Manager() job_queue = manager.Queue(maxsize=self.grid_options["max_queue_size"]) + result_queue = manager.Queue(maxsize=self.grid_options["num_processes"]) - # backwards compatibility - if "amt_cores" in self.grid_options: - self.grid_options["num_cores"] = self.grid_options["amt_cores"] + # data to be sent to signal handlers + signal_data = { + "where": "_evolve_population_grid", + "queue": job_queue, + } - result_queue = manager.Queue(maxsize=self.grid_options["num_cores"]) - - # Create process instances + # Create process instances to run the stars processes = [] - for ID in range(self.grid_options["num_cores"]): + for ID in range(self.grid_options["num_processes"]): processes.append( multiprocessing.Process( target=self._process_run_population_grid, @@ -1297,34 +1155,102 @@ class Population: for p in processes: p.start() - # Set up the system_queue - self._system_queue_filler(job_queue, num_cores=self.grid_options["num_cores"]) + # activate signal handlers + # * the child processes ignore these signals + # * the parent will be in _system_queue_filler when these are caught + signal.signal( + signal.SIGTERM, functools.partial(self._parent_signal_handler, signal_data) + ) + signal.signal( + signal.SIGINT, functools.partial(self._parent_signal_handler, signal_data) + ) + + # Set up the system_queue in the parent process + self._system_queue_filler( + job_queue, num_processes=self.grid_options["num_processes"] + ) # Join the processes + print("Do join of subprocesses ...") for p in processes: p.join() + print("Joined subprocesses.") + + # todo: error codes # Handle the results by merging all the dictionaries. How that merging happens exactly is # described in the merge_dicts description. - combined_output_dict = OrderedDict() + # + # If there is a preloaded_population, we add this first, + # then we add the populations run just now + + # 1) + # use preloaded population's data as a basis + # for our combined_output_dict + if self.preloaded_population: + combined_output_dict = { + "ensemble_results": keys_to_floats( + self.preloaded_population.grid_ensemble_results + ), + "results": keys_to_floats(self.preloaded_population.grid_results), + } + + for x in self._metadata_keylist(): + try: + combined_output_dict[x] = self.preloaded_population.grid_options[x] + except Exception as e: + print( + "Tried to set combined_output_dict key", + x, + "from preloaded_popuation, but this failed:", + e, + ) + print( + "Pre-loaded data from {} stars".format(combined_output_dict["_count"]) + ) + + # do not propagate _killed + # combined_output_dict['results']['_killed'] = False + # combined_output_dict['_killed'] = False + self.preloaded_population = None + gc.collect() + else: + # new empty combined output + combined_output_dict = OrderedDict() + combined_output_dict["ensemble_results"] = OrderedDict() + combined_output_dict["results"] = OrderedDict() + + # 2) + # combine the dicts that were output from our + # subprocesses sentinel = object() for output_dict in iter(result_queue.get, sentinel): - combined_output_dict = merge_dicts(combined_output_dict, output_dict) + if output_dict: + # don't let Xinit be added + if ( + "ensemble_results" in combined_output_dict + and "ensemble" in combined_output_dict["ensemble_results"] + and "Xinit" in combined_output_dict["ensemble_results"]["ensemble"] + ): + del combined_output_dict["ensemble_results"]["ensemble"]["Xinit"] + + # merge dicts + combined_output_dict = merge_dicts( + combined_output_dict, keys_to_floats(output_dict) + ) if result_queue.empty(): break # Extra ensemble result manipulation: - combined_output_dict["ensemble_results"]["ensemble"] = format_ensemble_results( - combined_output_dict["ensemble_results"].get("ensemble", {}) - ) + if "ensemble_results" in combined_output_dict: + combined_output_dict["ensemble_results"][ + "ensemble" + ] = format_ensemble_results( + combined_output_dict["ensemble_results"].get("ensemble", {}) + ) gc.collect() - # Take into account that we run this on multiple cores - combined_output_dict[ - "_total_probability_weighted_mass_run" - ] = combined_output_dict["_total_probability_weighted_mass_run"] - # Put the values back as object properties self.grid_results = combined_output_dict["results"] @@ -1335,57 +1261,11 @@ class Population: ] # Ensemble results are also passed as output from that dictionary # Add metadata - self.grid_ensemble_results["metadata"] = {} - self.grid_ensemble_results["metadata"]["population_id"] = self.grid_options[ - "_population_id" - ] - self.grid_ensemble_results["metadata"][ - "total_probability_weighted_mass" - ] = combined_output_dict["_total_probability_weighted_mass_run"] - self.grid_ensemble_results["metadata"][ - "factored_in_probability_weighted_mass" - ] = False - if self.grid_options["ensemble_factor_in_probability_weighted_mass"]: - multiply_values_dict( - self.grid_ensemble_results["ensemble"], - 1 - / self.grid_ensemble_results["metadata"][ - "total_probability_weighted_mass" - ], - ) - self.grid_ensemble_results["metadata"][ - "factored_in_probability_weighted_mass" - ] = True - - # Add settings of the populations - all_info = self.return_all_info( - include_population_settings=True, - include_binary_c_defaults=True, - include_binary_c_version_info=True, - include_binary_c_help_all=True, - ) - self.grid_ensemble_results["metadata"]["settings"] = json.loads( - json.dumps(all_info, default=binaryc_json_serializer) - ) + self.add_ensemble_metadata(combined_output_dict) - ############################## - # Update grid options - self.grid_options["_failed_count"] = combined_output_dict["_failed_count"] - self.grid_options["_failed_prob"] = combined_output_dict["_failed_prob"] - self.grid_options["_failed_systems_error_codes"] = list( - set(combined_output_dict["_failed_systems_error_codes"]) - ) - self.grid_options["_errors_exceeded"] = combined_output_dict["_errors_exceeded"] - self.grid_options["_errors_found"] = combined_output_dict["_errors_found"] - self.grid_options["_probtot"] = combined_output_dict["_probtot"] - self.grid_options["_count"] = combined_output_dict["_count"] - self.grid_options["_total_mass_run"] = combined_output_dict["_total_mass_run"] - self.grid_options[ - "_total_probability_weighted_mass_run" - ] = combined_output_dict["_total_probability_weighted_mass_run"] - self.grid_options["_zero_prob_stars_skipped"] = combined_output_dict[ - "_zero_prob_stars_skipped" - ] + # if we were killed, save snapshot + if self.grid_options["save_snapshots"] and self.grid_options["_killed"]: + self.custom_options["save_snapshot"] = True def _evolve_system_mp(self, full_system_dict): """ @@ -1425,6 +1305,84 @@ class Population: self.custom_options["parameter_dict"] = full_system_dict self.grid_options["parse_function"](self, out) + return + + def _parent_signal_handler(self, signal_data, signum, frame): + """ + Signal handling function for the parent process. + """ + + # this function is called by both queues when they + # catch a signal + sigstring = signal.Signals(signum).name + + if sigstring in self.signal_count: + self.signal_count[sigstring] += 1 + else: + self.signal_count[sigstring] = 1 + + if self.signal_count[sigstring] > 3: + print("caught > 3 times : exit") + self.exit(code=2) + + # tell the user what has happened + print( + "Parent signal {} caught (count {}) handler set in {} [ keys {} ]".format( + sigstring, + self.signal_count[sigstring], + signal_data["where"], + ",".join(signal_data.keys()), + ) + ) + + # set status files + self.set_status("signal {sig}".format(sig=sigstring)) + + if signum == signal.SIGINT: + # caught SIGINT: e.g. CTRL-C or HPC job manager + # shutting us down + print("Parent set stop_queue to True") + self.grid_options["stop_queue"] = True + self.custom_options["save_snapshot"] = True + self.grid_options["_killed"] = True + return + else: + # what to do? + return + + def _child_signal_handler(self, signal_data, signum, frame): + """ + Signal handler for child processes. + """ + sigstring = signal.Signals(signum).name + + if sigstring in self.signal_count: + self.signal_count[sigstring] += 1 + else: + self.signal_count[sigstring] = 1 + + # if we receive the signal three times, exit + if self.signal_count[sigstring] > 3: + print("caught > 3 times : exit") + self.exit(code=2) + + print( + "Child signal {} caught (count {}) handler set in {} [ keys {} ]".format( + sigstring, + self.signal_count[sigstring], + signal_data["where"], + ",".join(signal_data.keys()), + ) + ) + + # SIGINT should stop the queue nicely + if signum == signal.SIGINT: + self.grid_options["stop_queue"] = True + self.grid_options["_killed"] = True + + # propagate signal to parent + os.kill(self.grid_options["_main_pid"], signum) + def _process_run_population_grid(self, job_queue, result_queue, ID): """ Worker process that gets items from the job_queue and runs those systems. @@ -1437,43 +1395,46 @@ class Population: """ + # ignore SIGINT and SIGTERM : these are + # handled by our parent process (hence in + # _evolve_population_grid) + signal.signal( + signal.SIGTERM, + functools.partial( + self._child_signal_handler, {"where": "_process_run_population_grid"} + ), + ) + signal.signal( + signal.SIGINT, + functools.partial( + self._child_signal_handler, {"where": "_process_run_population_grid"} + ), + ) + # set start timer start_process_time = datetime.datetime.now() - # - self.process_ID = ( - ID # Store the ID as a object property again, lets see if that works. - ) + # set the process ID + self.process_ID = ID stream_logger = self._get_stream_logger() - if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL: + if self.grid_options["verbosity"] >= self._LOGGER_VERBOSITY_LEVEL: stream_logger.debug(f"Setting up processor: process-{self.process_ID}") # Set the process names - name = "binarycpython population thread {}".format(ID) name_proc = "binarycpython population process {}".format(ID) setproctitle.setproctitle(name_proc) - # setproctitle.setthreadtitle(name) # Set to starting up - with open( - os.path.join( - self.grid_options["tmp_dir"], - "process_status", - "process_{}.txt".format(self.process_ID), - ), - "w", - ) as f: - f.write("STARTING") - f.close() + self.set_status("starting") # lets try out making stores for all the grids: self.grid_options["_store_memaddr"] = _binary_c_bindings.return_store_memaddr() - verbose_print( + self.verbose_print( "Process {} started at {}.\tUsing store memaddr {}".format( ID, - datetime.datetime.now().isoformat(), + now(), self.grid_options["_store_memaddr"], ), self.grid_options["verbosity"], @@ -1491,7 +1452,7 @@ class Population: self.process_ID: persistent_data_memaddr } - verbose_print( + self.verbose_print( "\tUsing persistent_data memaddr: {}".format(persistent_data_memaddr), self.grid_options["verbosity"], 3, @@ -1505,12 +1466,10 @@ class Population: 0 # counter for the probability of the actual systems this tread ran ) number_of_systems_run = ( - 0 # counter for the actual amt of systems this thread ran + 0 # counter for the actual number of systems this thread ran ) zero_prob_stars_skipped = 0 - total_time_calling_binary_c = 0 - total_mass_run = 0 total_probability_weighted_mass_run = 0 @@ -1521,23 +1480,24 @@ class Population: ) next_mem_update_time = start_grid_time + self.grid_options["log_dt"] + # Set status to running + self.set_status("running") + + ############################################################ + # Run stellar systems in the queue ############################################################ - # Go over the queue for system_number, system_dict in iter(job_queue.get, "STOP"): - # At the first system set the status of the thread to running - if localcounter == 0: - # Set status to running - with open( - os.path.join( - self.grid_options["tmp_dir"], - "process_status", - "process_{}.txt".format(self.process_ID), - ), - "w", - ) as f: - f.write("RUNNING") - f.close() + if False: + print( + "Child: Job Queue system_number = {}, dict={}, n={} check {}".format( + system_number, + system_dict, + number_of_systems_run, + self.grid_options["stop_queue"], + ) + ) + sys.stdout.flush() # Combine that with the other settings full_system_dict = self.bse_options.copy() @@ -1568,16 +1528,6 @@ class Population: ) raise ValueError(msg) - # self._print_info( - # i + 1, self.grid_options["_total_starcount"], full_system_dict - # ) - - # verbose_print( - # "Process {} is handling system {}".format(ID, system_number), - # self.grid_options["verbosity"], - # 1, - # ) - ###################### # Print status of runs # save the current time (used often) @@ -1644,13 +1594,14 @@ class Population: # that was on, we log each current system to a file (each thread has one). # Each new system overrides the previous if self.grid_options["log_args"]: - with open( + with self.open( os.path.join( self.grid_options["log_args_dir"], "current_system", "process_{}.txt".format(self.process_ID), ), "w", + encoding="utf-8", ) as f: binary_cmdline_string = self._return_argline(full_system_dict) f.write(binary_cmdline_string) @@ -1685,13 +1636,14 @@ class Population: # Debug line: logging all the lines if self.grid_options["log_runtime_systems"] == 1: - with open( + with self.open( os.path.join( self.grid_options["tmp_dir"], "runtime_systems", "process_{}.txt".format(self.process_ID), ), "a+", + encoding="utf-8", ) as f: binary_cmdline_string = self._return_argline(full_system_dict) f.write( @@ -1723,19 +1675,22 @@ class Population: total_mass_system * full_system_dict.get("probability", 1) ) + if self.grid_options["stop_queue"]: + print("Child: Stop queue at system {n}".format(n=number_of_systems_run)) + break + + if self.grid_options["stop_queue"]: + # any remaining jobs should be ignored + try: + while True: + job_queue.get_nowait() + except queue.Empty: + pass + # Set status to finishing - with open( - os.path.join( - self.grid_options["tmp_dir"], - "process_status", - "process_{}.txt".format(self.process_ID), - ), - "w", - ) as f: - f.write("FINISHING") - f.close() + self.set_status("finishing") - if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL: + if self.grid_options["verbosity"] >= self._LOGGER_VERBOSITY_LEVEL: stream_logger.debug(f"Process-{self.process_ID} is finishing.") ########################### @@ -1744,7 +1699,7 @@ class Population: # if ensemble==1, then either directly write that data to a file, or combine everything into 1 file. ensemble_json = {} # Make sure it exists already if self.bse_options.get("ensemble", 0) == 1: - verbose_print( + self.verbose_print( "Process {}: is freeing ensemble output (using persistent_data memaddr {})".format( ID, self.persistent_data_memory_dict[self.process_ID] ), @@ -1759,11 +1714,15 @@ class Population: ) if ensemble_raw_output is None: - verbose_print( + self.verbose_print( "Process {}: Warning! Ensemble output is empty. ".format(ID), self.grid_options["verbosity"], 1, ) + ensemble_output = None + else: + # convert ensemble_raw_output to a dictionary + ensemble_output = extract_ensemble_json_from_string(ensemble_raw_output) # save the ensemble chunk to a file if ( @@ -1777,7 +1736,7 @@ class Population: self.grid_options["_population_id"], self.process_ID ), ) - verbose_print( + self.verbose_print( "Writing process {} JSON ensemble chunk output to {} ".format( ID, output_file ), @@ -1786,24 +1745,20 @@ class Population: ) ensemble_output = extract_ensemble_json_from_string(ensemble_raw_output) - self.write_ensemble(output_file, ensemble_output) # combine ensemble chunks if self.grid_options["combine_ensemble_with_thread_joining"] is True: - verbose_print( + self.verbose_print( "Process {}: Extracting ensemble info from raw string".format(ID), self.grid_options["verbosity"], 1, ) - - ensemble_json["ensemble"] = extract_ensemble_json_from_string( - ensemble_raw_output - ) # Load this into a dict so that we can combine it later + ensemble_json["ensemble"] = ensemble_output ########################## # Clean up and return - verbose_print( + self.verbose_print( "process {} free memory and return ".format(ID), self.grid_options["verbosity"], 1, @@ -1827,15 +1782,18 @@ class Population: "_total_mass_run": total_mass_run, "_total_probability_weighted_mass_run": total_probability_weighted_mass_run, "_zero_prob_stars_skipped": zero_prob_stars_skipped, + "_killed": self.grid_options["_killed"], } end_process_time = datetime.datetime.now() + killed = self.was_killed() + # thread end message colour = "cyan on black" - verbose_print( + self.verbose_print( self._boxed( - "{colour}Process {ID} finished:\ngenerator started at {start}\ngenerator finished at {end}\ntotal: {timesecs}\nof which {binary_c_secs} with binary_c\nRan {nsystems} systems\nwith a total probability of {psystems:g}\n{failcolour}This thread had {nfail} failing systems{colour}\n{failcolour}with a total failed probability of {pfail}{colour}\n{zerocolour}Skipped a total of {nzero} zero-probability systems{zeroreset}\n".format( + "{colour}Process {ID} finished:\ngenerator started at {start}\ngenerator finished at {end}\ntotal: {timesecs}\nof which {binary_c_secs} with binary_c\nRan {nsystems} systems\nwith a total probability of {psystems:g}\n{failcolour}This thread had {nfail} failing systems{colour}\n{failcolour}with a total failed probability of {pfail}{colour}\n{zerocolour}Skipped a total of {nzero} zero-probability systems{zeroreset}\n{failednotice}".format( colour=self.ANSI_colours[colour], ID=ID, start=start_process_time.isoformat(), @@ -1861,6 +1819,7 @@ class Population: zeroreset=self.ANSI_colours[colour] if zero_prob_stars_skipped > 0 else "", + failednotice=">>> Process was killed <<<\n" if killed else "", ), colour=colour, ), @@ -1884,44 +1843,35 @@ class Population: ], "zero_prob_stars_skipped": zero_prob_stars_skipped, } - with open( + with self.open( os.path.join( self.grid_options["tmp_dir"], "process_summary", "process_{}.json".format(self.process_ID), ), "w", + encoding="utf-8", ) as f: - f.write(json.dumps(summary_dict, indent=4)) - f.close() + json.dump(summary_dict, f, indent=4, ensure_ascii=False) # Set status to finished - with open( - os.path.join( - self.grid_options["tmp_dir"], - "process_status", - "process_{}.txt".format(self.process_ID), - ), - "w", - ) as f: - f.write("FINISHED") - f.close() + if self.was_killed(): + self.set_status("killed") + else: + self.set_status("finished") - verbose_print( + self.verbose_print( "process {} queue put output_dict ".format(ID), self.grid_options["verbosity"], 1, ) + result_queue.put(output_dict) - if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL: + if self.grid_options["verbosity"] >= self._LOGGER_VERBOSITY_LEVEL: stream_logger.debug(f"Process-{self.process_ID} is finished.") - # Don't do this : Clean up the interpolators if they exist - - # TODO: make a cleanup function for the individual threads - # TODO: make sure this is necessary. Actually its probably not, because we have a centralised queue - verbose_print( + self.verbose_print( "process {} return ".format(ID), self.grid_options["verbosity"], 1, @@ -1951,7 +1901,7 @@ class Population: # Get argument line and argline = self._return_argline(self.bse_options) - verbose_print( + self.verbose_print( "Running {}".format(argline), self.grid_options["verbosity"], 1 ) @@ -1989,6 +1939,10 @@ class Population: Since we have different methods of running a population, this setup function will do different things depending on different settings + Returns: + True if we want to continue. + False if we should return to the original calling script. + Tasks: TODO: Make other kinds of populations possible. i.e, read out type of grid, and set up accordingly @@ -1996,22 +1950,9 @@ class Population: function """ - # Make sure the subdirs of the tmp dir exist: - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "failed_systems"), exist_ok=True - ) - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "current_system"), exist_ok=True - ) - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "process_status"), exist_ok=True - ) - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "process_summary"), exist_ok=True - ) - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "runtime_systems"), exist_ok=True - ) + # Check for restore + if self.grid_options["restore_from_snapshot_file"]: + self.load_snapshot(self.grid_options["restore_from_snapshot_file"]) # Check for parse function if not self.grid_options["parse_function"]: @@ -2025,7 +1966,7 @@ class Population: ## check the settings and set all the warnings. if self.bse_options.get("ensemble", None): if not self.bse_options.get("ensemble_defer", 0) == 1: - verbose_print( + self.verbose_print( "Error, if you want to run an ensemble in a population, the output needs to be deferred. Please set 'ensemble_defer' to 1", self.grid_options["verbosity"], 0, @@ -2035,14 +1976,14 @@ class Population: if not any( [key.startswith("ensemble_filter_") for key in self.bse_options] ): - verbose_print( + self.verbose_print( "Warning: Running the ensemble without any filter requires a lot of available RAM", self.grid_options["verbosity"], 0, ) if self.bse_options.get("ensemble_filters_off", 0) != 1: - verbose_print( + self.verbose_print( "Warning: Running the ensemble without any filter requires a lot of available RAM", self.grid_options["verbosity"], 0, @@ -2050,7 +1991,7 @@ class Population: if self.grid_options["combine_ensemble_with_thread_joining"] == False: if not self.custom_options.get("data_dir", None): - verbose_print( + self.verbose_print( "Error: chosen to write the ensemble output directly to files but data_dir isn't set", self.grid_options["verbosity"], 0, @@ -2063,15 +2004,22 @@ class Population: ## Check which type of population generation # grid type if self.grid_options["evolution_type"] == "grid": - ####################### - # Dry run and getting starcount + ################################################## + # Grid run + ############################################################ + # Set up LRU cache + self.setup_function_cache() + ############################################################ + # Dry run and getting starcount + ############################################################ # Put in check if len(self.grid_options["_grid_variables"]) == 0: print("Error: you haven't defined any grid variables! Aborting") raise ValueError # Set up the grid code with a dry run option to see total probability + print("Do dry run? {}".format(self.grid_options["do_dry_run"])) if self.grid_options["do_dry_run"]: print("Doing dry run to calculate total starcount and probability") self._generate_grid_code(dry_run=True) @@ -2082,9 +2030,10 @@ class Population: # Do a dry run self._dry_run() - verbose_print( + self.verbose_print( self._boxed( - "Total starcount for this run is {starcount}".format( + "Dry run", + "Total starcount is {starcount}".format( starcount=self.grid_options["_total_starcount"] ), "Total probability is {probtot:g}".format( @@ -2095,16 +2044,25 @@ class Population: 0, ) if self.grid_options["exit_after_dry_run"]: - sys.exit() + print( + "Exiting after dry run {}".format( + self.grid_options["exit_after_dry_run"] + ) + ) + self.exit(code=0) + elif self.grid_options["return_after_dry_run"]: + print( + "Returning after dry run {}".format( + self.grid_options["exit_after_dry_run"] + ) + ) + return False ####################### # Reset values and prepare the grid function self.grid_options[ "_probtot" ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way - self.grid_options[ - "_start_time_evolution" - ] = time.time() # Setting start time of grid # # Making sure the loaded grid code isn't lingering in the main PID # self._generate_grid_code(dry_run=False) @@ -2140,7 +2098,7 @@ class Population: self._dry_run_source_file() print( - "Total starcount for this run will be: {}".format( + "Total starcount will be: {}".format( self.grid_options["_total_starcount"] ) ) @@ -2150,9 +2108,6 @@ class Population: self.grid_options[ "_probtot" ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way - self.grid_options[ - "_start_time_evolution" - ] = time.time() # Setting start time of grid # # TODO: fix this function @@ -2163,9 +2118,8 @@ class Population: self.grid_options[ "_probtot" ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way - self.grid_options[ - "_start_time_evolution" - ] = time.time() # Setting start time of grid + + return True def _cleanup(self): """ @@ -2179,2815 +2133,242 @@ class Population: """ # Reset values - self.grid_options["_count"] = 0 - self.grid_options["_probtot"] = 0 + for x in [ + "_count", + "_probtot", + "_failed_count", + "_failed_prob", + "_total_mass_run", + "_total_probability_weighted_mass_run", + ]: + self.grid_options[x] = 0 + for x in ["_errors_found", "_errors_exceeded"]: + self.grid_options[x] = False self.grid_options["_system_generator"] = None - self.grid_options["_failed_count"] = 0 - self.grid_options["_failed_prob"] = 0 - self.grid_options["_errors_found"] = False - self.grid_options["_errors_exceeded"] = False self.grid_options["_failed_systems_error_codes"] = [] - self.grid_options["_total_mass_run"] = 0 - self.grid_options["_total_probability_weighted_mass_run"] = 0 - # Remove files - # TODO: remove files - - # Unload functions - # TODO: unload functions + def _dry_run(self): + """ + Function to dry run the grid and know how many stars it will run - # Unload/free custom_logging_code - # TODO: cleanup custom logging code. + Requires the grid to be built as a dry run grid + """ + self.verbose_print( + "Doing a dry run of the grid.", self.grid_options["verbosity"], 1 + ) + system_generator = self.grid_options["_system_generator"] + total_starcount = system_generator(self) + self.grid_options["_total_starcount"] = total_starcount ################################################### - # Grid code functions + # Population from file functions # - # Function below are used to run populations with - # a variable grid + # Functions below are used to run populations from + # a file containing binary_c calls ################################################### - - def _add_code(self, *args, indent=0): - """ - Function to add code to the grid code string - - add code to the code_string - - indent (=0) is added once at the beginning - mindent (=0) is added for every line - - don't use both! - """ - - indent_block = self._indent_block(indent) - for thing in args: - self.code_string += indent_block + thing - - def _indent_block(self, n=0): + def _dry_run_source_file(self): """ - return an indent block, with n extra blocks in it + Function to go through the source_file and count the number of lines and the total probability """ - return (self.indent_depth + n) * self.indent_string + system_generator = self.grid_options["_system_generator"] + total_starcount = 0 - def _increment_indent_depth(self, delta): - """ - increment the indent indent_depth by delta - """ + for _ in system_generator: + total_starcount += 1 - self.indent_depth += delta + total_starcount = system_generator(self) + self.grid_options["_total_starcount"] = total_starcount - def _generate_grid_code(self, dry_run=False): + def _load_source_file(self, check=False): """ - Function that generates the code from which the population will be made. - - dry_run: when True, it will return the starcount at the end so that we know - what the total number of systems is. - - The phasevol values are handled by generating a second array - - # TODO: Add correct logging everywhere - # TODO: add part to handle separation if orbital_period is added. Idea. use default values - # for orbital parameters and possibly overwrite those or something. - # TODO: add sensible description to this function. - # TODO: Check whether all the probability and phasevol values are correct. - # TODO: import only the necessary packages/functions - # TODO: Put all the masses, eccentricities and periods in there already - # TODO: Put the certain blocks that are repeated in some sub functions - # TODO: make sure running systems with multiplicity 3+ is also possible. - - Results in a generated file that contains a system_generator function. + Function that loads the source_file that contains a binary_c calls """ - verbose_print("Generating grid code", self.grid_options["verbosity"], 1) - - total_grid_variables = len(self.grid_options["_grid_variables"]) - - self._add_code( - # Import packages - "import math\n", - "import numpy as np\n", - "from collections import OrderedDict\n", - "from binarycpython.utils.distribution_functions import *\n", - "from binarycpython.utils.spacing_functions import *\n", - "from binarycpython.utils.useful_funcs import *\n", - "\n\n", - # Make the function - "def grid_code(self, print_results=True):\n", - ) + if not os.path.isfile(self.grid_options["source_file_filename"]): + self.verbose_print( + "Source file doesnt exist", self.grid_options["verbosity"], 0 + ) - # Increase indent_depth - self._increment_indent_depth(+1) - - self._add_code( - # Write some info in the function - "# Grid code generated on {}\n".format(datetime.datetime.now().isoformat()), - "# This function generates the systems that will be evolved with binary_c\n\n" - # Set some values in the generated code: - "# Setting initial values\n", - "_total_starcount = 0\n", - "starcounts = [0 for i in range({})]\n".format(total_grid_variables + 1), - "probabilities = {}\n", - "probabilities_list = [0 for i in range({})]\n".format( - total_grid_variables + 1 - ), - "probabilities_sum = [0 for i in range({})]\n".format( - total_grid_variables + 1 + self.verbose_print( + message="Loading source file from {}".format( + self.grid_options["gridcode_filename"] ), - "parameter_dict = {}\n", - "phasevol = 1\n", - ) - - # Set up the system parameters - self._add_code( - "M_1 = None\n", - "M_2 = None\n", - "M_3 = None\n", - "M_4 = None\n", - "orbital_period = None\n", - "orbital_period_triple = None\n", - "orbital_period_quadruple = None\n", - "eccentricity = None\n", - "eccentricity2 = None\n", - "eccentricity3 = None\n", - "\n", - # Prepare the probability - "# setting probability lists\n", + verbosity=self.grid_options["verbosity"], + minimal_verbosity=1, ) - for grid_variable_el in sorted( - self.grid_options["_grid_variables"].items(), - key=lambda x: x[1]["grid_variable_number"], - ): - # Make probabilities dict - grid_variable = grid_variable_el[1] - self._add_code('probabilities["{}"] = 0\n'.format(grid_variable["name"])) - - ################################################################################# - # Start of code generation - ################################################################################# - self._add_code("\n") - - # turn vb to True to have debugging output - vb = False - - # Generate code - print("Generating grid code") - for loopnr, grid_variable_el in enumerate( - sorted( - self.grid_options["_grid_variables"].items(), - key=lambda x: x[1]["grid_variable_number"], - ) - ): - verbose_print( - "Constructing/adding: {}".format(grid_variable_el[0]), - self.grid_options["verbosity"], - 2, + # We can choose to perform a check on the source file, which checks if the lines start with 'binary_c' + if check: + source_file_check_filehandle = self.open( + self.grid_options["source_file_filename"], "r", encoding="utf-8" ) - grid_variable = grid_variable_el[1] - - #################### - # top code - if grid_variable["topcode"]: - self._add_code(grid_variable["topcode"]) - - ######################### - # Setting up the for loop - # Add comment for for loop - self._add_code( - "# for loop for variable {name} gridtype {gridtype}".format( - name=grid_variable["name"], - gridtype=grid_variable["gridtype"], - ) + "\n", - "sampled_values_{} = {}".format( - grid_variable["name"], grid_variable["samplerfunc"] - ) - + "\n") - - if False: - self._add_code( - "print('samples','{name}',':',np.exp(sampled_values_{name}))\n".format( - name=grid_variable["name"], - ) + for line in source_file_check_filehandle: + if not line.startswith("binary_c"): + failed = True + break + if failed: + self.verbose_print( + "Error, sourcefile contains lines that do not start with binary_c", + self.grid_options["verbosity"], + 0, ) + raise ValueError - if vb: - self._add_code( - "print('sample {name} from',sampled_values_{name})".format( - name=grid_variable["name"] - ) - + "\n" - ) + source_file_filehandle = self.open( + self.grid_options["source_file_filename"], "r", encoding="utf-8" + ) - # calculate number of values and starting location - # - # if we're sampling a continuous variable, we - # have one fewer grid point than the length of the - # sampled_values list - if ( - grid_variable["gridtype"] == "centred" - or grid_variable["gridtype"] == "centre" - or grid_variable["gridtype"] == "center" - or grid_variable["gridtype"] == "edge" - or grid_variable["gridtype"] == "left edge" - or grid_variable["gridtype"] == "left" - or grid_variable["gridtype"] == "right" - or grid_variable["gridtype"] == "right edge" - ): - offset = -1 - elif grid_variable["gridtype"] == "discrete": - # discrete variables sample all the points - offset = 0 - - start = 0 - - # for loop over the variable - if vb: - self._add_code( - "print(\"var {name} values \",sampled_values_{name},\" len \",len(sampled_values_{name})+{offset},\" gridtype {gridtype} offset {offset}\\n\")\n".format( - name=grid_variable["name"], - offset=offset, - gridtype=grid_variable['gridtype'], - ) - ) - self._add_code( - "for {name}_sample_number in range({start},len(sampled_values_{name})+{offset}):".format( - name=grid_variable["name"], - offset=offset, - start=start - ) - + "\n" - ) + self.grid_options["_system_generator"] = source_file_filehandle - self._increment_indent_depth(+1) + self.verbose_print("Source file loaded", self.grid_options["verbosity"], 1) - # {}_this_index is this grid point's index - # {}_prev_index and {}_next_index are the previous and next grid points, - # (which can be None if there is no previous or next, or if - # previous and next should not be used: this is deliberate) - # + def _dict_from_line_source_file(self, line): + """ + Function that creates a dict from a binary_c arg line + """ + if line.startswith("binary_c "): + line = line.replace("binary_c ", "") - if grid_variable["gridtype"] == "discrete": - # discrete grids only care about this, - # both prev and next should be None to - # force errors where they are used - self._add_code( - "{name}_this_index = {name}_sample_number ".format( - name=grid_variable["name"], - ), - ) - self._add_code( - "\n", - "{name}_prev_index = None if {name}_this_index == 0 else ({name}_this_index - 1) ".format( - name=grid_variable["name"], - ), - "\n", - ) - self._add_code( - "\n", - "{name}_next_index = None if {name}_this_index >= (len(sampled_values_{name})+{offset} - 1) else ({name}_this_index + 1)".format( - name=grid_variable["name"], - offset=offset - ), - "\n", - ) + split_line = line.split() + arg_dict = {} - elif (grid_variable["gridtype"] == "centred" - or grid_variable["gridtype"] == "centre" - or grid_variable["gridtype"] == "center" - or grid_variable["gridtype"] == "edge" - or grid_variable["gridtype"] == "left" - or grid_variable["gridtype"] == "left edge"): - - # left and centred grids - self._add_code("if {}_sample_number == 0:\n".format(grid_variable["name"])) - self._add_code("{}_this_index = 0;\n".format(grid_variable["name"]), indent=1) - self._add_code("else:\n") - self._add_code("{name}_this_index = {name}_sample_number ".format(name=grid_variable["name"]),indent=1) - self._add_code("\n") - self._add_code("{name}_prev_index = ({name}_this_index - 1) if {name}_this_index > 0 else None ".format(name=grid_variable["name"])) - self._add_code("\n") - self._add_code("{name}_next_index = {name}_this_index + 1".format(name=grid_variable["name"])) - self._add_code("\n") - - elif(grid_variable["gridtype"] == "right" or - grid_variable["gridtype"] == "right edge"): - - # right edged grid - self._add_code("if {name}_sample_number == 0:\n".format(name=grid_variable["name"])) - self._add_code("{name}_this_index = 1;\n".format(name=grid_variable["name"]),indent=1) - self._add_code("else:\n") - self._add_code("{name}_this_index = {name}_sample_number + 1 ".format(name=grid_variable["name"],),indent=1) - self._add_code("\n") - self._add_code("{name}_prev_index = {name}_this_index - 1".format(name=grid_variable["name"])) - self._add_code("\n") - self._add_code("{name}_next_index = ({name}_this_index + 1) if {name}_this_index < len(sampled_values_{name}) else None".format(name=grid_variable["name"])) - self._add_code("\n") - - # calculate phase volume - if(grid_variable["dphasevol"] == -1): - # no phase volume required so set it to 1.0 - self._add_code("dphasevol_{name} = 1.0 # 666\n".format(name=grid_variable["name"])) - - elif(grid_variable["gridtype"] == "right" or - grid_variable["gridtype"] == "right edge"): - # right edges always have this and prev defined - self._add_code( - "dphasevol_{name} = (sampled_values_{name}[{name}_this_index] - sampled_values_{name}[{name}_prev_index])".format(name=grid_variable["name"]) - + "\n" - ) - elif grid_variable["gridtype"] == "discrete": - # discrete might have next defined, use it if we can, - # otherwise use prev - self._add_code( - "dphasevol_{name} = (sampled_values_{name}[{name}_next_index] - sampled_values_{name}[{name}_this_index]) if {name}_next_index else (sampled_values_{name}[{name}_this_index] - sampled_values_{name}[{name}_prev_index])".format(name=grid_variable["name"]) - + "\n" - ) + for i in range(0, len(split_line), 2): + if "." in split_line[i + 1]: + arg_dict[split_line[i]] = float(split_line[i + 1]) else: - # left and centred always have this and next defined - self._add_code( - "dphasevol_{name} = (sampled_values_{name}[{name}_next_index] - sampled_values_{name}[{name}_this_index])".format(name=grid_variable["name"]) - + "\n" - ) + arg_dict[split_line[i]] = int(split_line[i + 1]) + return arg_dict - ############## - # Add phasevol check: - self._add_code("if dphasevol_{name} <= 0:\n".format(name=grid_variable["name"])) - - # TODO: We might actually want to add the starcount and probability to the totals regardless. - # n that case we need another local variable which will prevent it from being run but will track those parameters - # Add phasevol check action: - self._add_code( - 'print("Grid generator: dphasevol_{name} <= 0! (this=",{name}_this_index,"=",sampled_values_{name}[{name}_this_index],", next=",{name}_next_index,"=",sampled_values_{name}[{name}_next_index],") Skipping current sample.")'.format(name=grid_variable["name"]) - + "\n", - "continue\n", - indent=1, - ) + ################################################### + # Unordered functions + # + # Functions that aren't ordered yet + ################################################### - if vb: - self._add_code( - "print('sample {name} from ',sampled_values_{name},' at this=',{name}_this_index,', next=',{name}_next_index)".format(name=grid_variable["name"]) - + "\n" - ) + def _cleanup_defaults(self): + """ + Function to clean up the default values: - # select sampled point location based on gridtype (left, centre or right) - if ( - grid_variable["gridtype"] == "edge" - or grid_variable["gridtype"] == "left" - or grid_variable["gridtype"] == "left edge" - or grid_variable["gridtype"] == "right" - or grid_variable["gridtype"] == "right edge" - or grid_variable['gridtype'] == 'discrete' - ): - self._add_code( - "{name} = sampled_values_{name}[{name}_this_index]".format( - name=grid_variable["name"]) - + "\n" - ) - elif ( - grid_variable["gridtype"] == "centred" - or grid_variable["gridtype"] == "centre" - or grid_variable["gridtype"] == "center" - ): - self._add_code( - "{name} = 0.5 * (sampled_values_{name}[{name}_next_index] + sampled_values_{name}[{name}_this_index])".format(name=grid_variable["name"]) - + "\n" - ) - else: - msg = "Unknown gridtype value {type}.".format(type=grid_variable['gridtype']) - raise ValueError(msg) - - if vb: - self._add_code( - "print('hence {name} = ',{name})\n".format( - name=grid_variable["name"] - ) - ) - - ################################################################################# - # Check condition and generate for loop - - # If the grid variable has a condition, write the check and the action - if grid_variable["condition"]: - self._add_code( - # Add comment - "# Condition for {name}\n".format(name=grid_variable["name"]), - - # Add condition check - "if not {condition}:\n".format(condition=grid_variable["condition"]), - indent=0, - ) - - # Add condition failed action: - if self.grid_options["verbosity"] >= 3: - self._add_code( - 'print("Grid generator: Condition for {name} not met!")'.format( - name=grid_variable["name"] - ) - + "\n", - "continue" + "\n", - indent=1, - ) - else: - self._add_code( - "continue" + "\n", - indent=1, - ) - # Add some whitespace - self._add_code("\n") - - # Add some whitespace - self._add_code("\n") - - ######################### - # Setting up pre-code and value in some cases - # Add pre-code - if grid_variable["precode"]: - self._add_code( - "{precode}".format( - precode=grid_variable["precode"].replace( - "\n", "\n" + self._indent_block(0) - ) - ) - + "\n" - ) - - # Set phasevol - self._add_code( - "phasevol *= dphasevol_{name}\n".format( - name=grid_variable["name"], - ) - ) - - ####################### - # Probabilities - # Calculate probability - self._add_code( - "\n", - "# Setting probabilities\n", - "d{name} = dphasevol_{name} * ({probdist})".format( - name=grid_variable["name"], - probdist=grid_variable["probdist"], - ) - + "\n", - # Save probability sum - "probabilities_sum[{n}] += d{name}".format( - n=grid_variable["grid_variable_number"], - name=grid_variable["name"] - ) - + "\n", - ) - - if grid_variable["grid_variable_number"] == 0: - self._add_code( - "probabilities_list[0] = d{name}".format(name=grid_variable["name"]) + "\n" - ) - else: - self._add_code( - "probabilities_list[{this}] = probabilities_list[{prev}] * d{name}".format( - this=grid_variable["grid_variable_number"], - prev=grid_variable["grid_variable_number"] - 1, - name=grid_variable["name"], - ) - + "\n" - ) - - ############## - # postcode - if grid_variable["postcode"]: - self._add_code( - "{postcode}".format( - postcode=grid_variable["postcode"].replace( - "\n", "\n" + self._indent_block(0) - ) - ) - + "\n" - ) - - ####################### - # Increment starcount for this parameter - self._add_code( - "\n", - "# Increment starcount for {name}\n".format(name=grid_variable["name"]), - "starcounts[{n}] += 1".format( - n=grid_variable["grid_variable_number"], - ) - + "\n", - # Add value to dict - 'parameter_dict["{name}"] = {name}'.format( - name=grid_variable["parameter_name"] - ) - + "\n", - "\n", - ) - - self._increment_indent_depth(-1) - - # The final parts of the code, where things are returned, are within the deepest loop, - # but in some cases code from a higher loop needs to go under it again - # SO I think its better to put an if statement here that checks - # whether this is the last loop. - if loopnr == len(self.grid_options["_grid_variables"]) - 1: - self._write_gridcode_system_call( - grid_variable, - dry_run, - grid_variable["branchpoint"], - grid_variable["branchcode"], - ) - - # increment indent_depth - self._increment_indent_depth(+1) - - #################### - # bottom code - if grid_variable["bottomcode"]: - self._add_code(grid_variable["bottomcode"]) - - self._increment_indent_depth(-1) - self._add_code("\n") - - # Write parts to write below the part that yield the results. - # this has to go in a reverse order: - # Here comes the stuff that is put after the deepest nested part that calls returns stuff. - # Here we will have a - reverse_sorted_grid_variables = sorted( - self.grid_options["_grid_variables"].items(), - key=lambda x: x[1]["grid_variable_number"], - reverse=True, - ) - for loopnr, grid_variable_el in enumerate(reverse_sorted_grid_variables): - grid_variable = grid_variable_el[1] - - self._increment_indent_depth(+1) - self._add_code( - "#" * 40 + "\n", - "# Code below is for finalising the handling of this iteration of the parameter {name}\n".format( - name=grid_variable["name"] - ), - ) - - # Set phasevol - # TODO: fix. this isn't supposed to be the value that we give it here. discuss - self._add_code("phasevol /= dphasevol_{name}\n\n".format(name=grid_variable["name"])) - - self._increment_indent_depth(-2) - - # Check the branchpoint part here. The branchpoint makes sure that we can construct - # a grid with several multiplicities and still can make the system calls for each - # multiplicity without reconstructing the grid each time - if grid_variable["branchpoint"] > 0: - - self._increment_indent_depth(+1) - - self._add_code( - # Add comment - "# Condition for branchpoint at {}".format( - reverse_sorted_grid_variables[loopnr + 1][1]["name"] - ) - + "\n", - # # Add condition check - # "if not {}:".format(grid_variable["condition"]) - # + "\n" - # Add branchpoint - "if multiplicity=={}:".format(grid_variable["branchpoint"]) + "\n", - ) - - self._write_gridcode_system_call( - reverse_sorted_grid_variables[loopnr + 1][1], - dry_run, - grid_variable["branchpoint"], - grid_variable["branchcode"], - ) - self._increment_indent_depth(-1) - self._add_code("\n") - - ############################### - # Finalising print statements - # - self._increment_indent_depth(+1) - self._add_code("\n", "#" * 40 + "\n", "if print_results:\n") - self._add_code( - "print('Grid has handled {starcount} stars with a total probability of {probtot:g}'.format(starcount=_total_starcount,probtot=self.grid_options['_probtot']))\n", - indent=1, - ) - - ################ - # Finalising return statement for dry run. - # - if dry_run: - self._add_code("return _total_starcount\n") - - self._increment_indent_depth(-1) - ################################################################################# - # Stop of code generation. Here the code is saved and written - - # Save the grid code to the grid_options - verbose_print( - "Saving grid code to grid_options", self.grid_options["verbosity"], 1 - ) - - self.grid_options["code_string"] = self.code_string - - # Write to file - gridcode_filename = os.path.join( - self.grid_options["tmp_dir"], - "binary_c_grid_{id}.py".format(id=self.grid_options["_population_id"]), - ) - self.grid_options["gridcode_filename"] = gridcode_filename - - verbose_print( - "{blue}Writing grid code to {file} [dry_run = {dry}]{reset}".format( - blue=self.ANSI_colours["blue"], - file=gridcode_filename, - dry=dry_run, - reset=self.ANSI_colours["reset"], - ), - self.grid_options["verbosity"], - 1, - ) - - with open(gridcode_filename, "w") as file: - file.write(self.code_string) - - # perhaps create symlink - if self.grid_options["symlink latest gridcode"]: - global _count - symlink = os.path.join( - self.grid_options["tmp_dir"], "binary_c_grid-latest" + str(_count) - ) - _count += 1 - if os.path.exists(symlink): - os.unlink(symlink) - - try: - os.symlink(gridcode_filename, symlink) - verbose_print( - "{blue}Symlinked grid code to {symlink} {reset}".format( - blue=self.ANSI_colours["blue"], - symlink=symlink, - reset=self.ANSI_colours["reset"] - ), - self.grid_options["verbosity"], - 1, - ) - except OSError: - print("symlink failed") - - def _write_gridcode_system_call( - self, grid_variable, dry_run, branchpoint, branchcode - ): - ################################################################################# - # Here are the calls to the queuing or other solution. this part is for every system - # Add comment - self._increment_indent_depth(+1) - self._add_code("#" * 40 + "\n") - - if branchcode: - self._add_code("# Branch code\nif {branchcode}:\n".format(branchcode=branchcode)) - - if branchpoint: - self._add_code( - "# Code below will get evaluated for every system at this level of multiplicity (last one of that being {name})\n".format( - name=grid_variable["name"] - ) - ) - else: - self._add_code( - "# Code below will get evaluated for every generated system\n" - ) - - # Factor in the custom weight input - self._add_code( - "\n", - "# Weigh the probability by a custom weighting factor\n", - 'probability = self.grid_options["weight"] * probabilities_list[{n}]'.format( - n=grid_variable["grid_variable_number"] - ) - + "\n", - # Take into account the multiplicity fraction: - "\n", - "# Factor the multiplicity fraction into the probability\n", - "probability = probability * self._calculate_multiplicity_fraction(parameter_dict)" - + "\n", - # Add division by number of repeats - "\n", - "# Divide the probability by the number of repeats\n", - 'probability = probability / self.grid_options["repeat"]' + "\n", - # Now we yield the system self.grid_options["repeat"] times. - "\n", - "# Loop over the repeats\n", - 'for _ in range(self.grid_options["repeat"]):' + "\n", - ) - self._add_code( - "_total_starcount += 1\n", - # set probability and phasevol values into the system dict - 'parameter_dict["{p}"] = {p}'.format(p="probability") + "\n", - 'parameter_dict["{v}"] = {v}'.format(v="phasevol") + "\n", - # Increment total probability - "self._increment_probtot(probability)\n", - indent=1, - ) - - if not dry_run: - # Handling of what is returned, or what is not. - self._add_code("yield(parameter_dict)\n", indent=1) - - # If its a dry run, dont do anything with it - else: - self._add_code("pass\n", indent=1) - - self._add_code("#" * 40 + "\n") - - self._increment_indent_depth(-1) - - return self.code_string - - def _load_grid_function(self): - """ - Function that loads the script containing the grid code. - - TODO: Update this description - Test function to run grid stuff. mostly to test the import - """ - - # Code to load the - verbose_print( - message="Loading grid code function from {file}".format( - file=self.grid_options["gridcode_filename"] - ), - verbosity=self.grid_options["verbosity"], - minimal_verbosity=1, - ) - - spec = importlib.util.spec_from_file_location( - "binary_c_python_grid", - os.path.join(self.grid_options["gridcode_filename"]), - ) - grid_file = importlib.util.module_from_spec(spec) - spec.loader.exec_module(grid_file) - generator = grid_file.grid_code - - self.grid_options["_system_generator"] = generator - - verbose_print("Grid code loaded", self.grid_options["verbosity"], 1) - - def _dry_run(self): - """ - Function to dry run the grid and know how many stars it will run - - Requires the grid to be built as a dry run grid - """ - verbose_print("Dry run of the grid", self.grid_options["verbosity"], 1) - system_generator = self.grid_options["_system_generator"] - total_starcount = system_generator(self) - self.grid_options["_total_starcount"] = total_starcount - - def _print_info(self, run_number, total_systems, full_system_dict): - """ - Function to print info about the current system and the progress of the grid. - - # color info tricks from https://ozzmaker.com/add-colour-to-text-in-python/ - https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python - """ - - # Define frequency - if self.grid_options["verbosity"] == 1: - print_freq = 1 - else: - print_freq = 10 - - # Calculate amount of time left - # calculate amount of time passed - # time_passed = time.time() - self.grid_options["_start_time_evolution"] - - if run_number % print_freq == 0: - binary_cmdline_string = self._return_argline(full_system_dict) - info_string = "{color_part_1} \ - {text_part_1}{end_part_1}{color_part_2} \ - {text_part_2}{end_part_2}".format( - color_part_1="\033[1;32;41m", - text_part_1="{}/{}".format(run_number, total_systems), - end_part_1="\033[0m", - color_part_2="\033[1;32;42m", - text_part_2="{}".format(binary_cmdline_string), - end_part_2="\033[0m", - ) - print(info_string) - - ################################################### - # Monte Carlo functions - # - # Functions below are used to run populations with - # Monte Carlo - ################################################### - - ################################################### - # Population from file functions - # - # Functions below are used to run populations from - # a file containing binary_c calls - ################################################### - def _dry_run_source_file(self): - """ - Function to go through the source_file and count the number of lines and the total probability - """ - - system_generator = self.grid_options["_system_generator"] - - total_starcount = 0 - total_probability = 0 - - contains_probability = False - - for line in system_generator: - total_starcount += 1 - - total_starcount = system_generator(self) - self.grid_options["_total_starcount"] = total_starcount - - def _load_source_file(self, check=False): - """ - Function that loads the source_file that contains a binary_c calls - """ - - if not os.path.isfile(self.grid_options["source_file_filename"]): - verbose_print("Source file doesnt exist", self.grid_options["verbosity"], 0) - - verbose_print( - message="Loading source file from {}".format( - self.grid_options["gridcode_filename"] - ), - verbosity=self.grid_options["verbosity"], - minimal_verbosity=1, - ) - - # We can choose to perform a check on the source file, which checks if the lines start with 'binary_c' - if check: - source_file_check_filehandle = open( - self.grid_options["source_file_filename"], "r" - ) - for line in source_file_check_filehandle: - if not line.startswith("binary_c"): - failed = True - break - if failed: - verbose_print( - "Error, sourcefile contains lines that do not start with binary_c", - self.grid_options["verbosity"], - 0, - ) - raise ValueError - - source_file_filehandle = open(self.grid_options["source_file_filename"], "r") - - self.grid_options["_system_generator"] = source_file_filehandle - - verbose_print("Source file loaded", self.grid_options["verbosity"], 1) - - def _dict_from_line_source_file(self, line): - """ - Function that creates a dict from a binary_c arg line - """ - - if line.startswith("binary_c "): - line = line.replace("binary_c ", "") - - split_line = line.split() - arg_dict = {} - - for i in range(0, len(split_line), 2): - if "." in split_line[i + 1]: - arg_dict[split_line[i]] = float(split_line[i + 1]) - else: - arg_dict[split_line[i]] = int(split_line[i + 1]) - - return arg_dict - - ################################################### - # SLURM functions - # - # subroutines to run SLURM grids - ################################################### - - # def _slurm_grid(self): - # """ - # Main function that manages the SLURM setup. - - # Has three stages: - - # - setup - # - evolve - # - join - - # Which stage is used is determined by the value of grid_options['slurm_command']: - - # <empty>: the function will know its the user that executed the script and - # it will set up the necessary condor stuff - - # 'evolve': evolve_population is called to evolve the population of stars - - # 'join': We will attempt to join the output - # """ - - # # Check version - # # TODO: Put in function - # slurm_version = get_slurm_version() - # if not slurm_version: - # verbose_print( - # "SLURM: Error: No installation of slurm found", - # self.grid_options["verbosity"], - # 0, - # ) - # else: - # major_version = int(slurm_version.split(".")[0]) - # minor_version = int(slurm_version.split(".")[1]) - - # if major_version > 17: - # verbose_print( - # "SLURM: Found version {} which is new enough".format(slurm_version), - # self.grid_options["verbosity"], - # 1, - # ) - # else: - # verbose_print( - # "SLURM: Found version {} which is too old (we require 17+)".format( - # slurm_version - # ), - # self.grid_options["verbosity"], - # 0, - # ) - - # verbose_print( - # "SLURM: Running slurm grid. command={}".format( - # self.grid_options["slurm_command"] - # ), - # self.grid_options["verbosity"], - # 1, - # ) - - # if not self.grid_options["slurm_command"]: - # # Setting up - # verbose_print( - # "SLURM: Main controller script. Setting up", - # self.grid_options["verbosity"], - # 1, - # ) - - # # Set up working directories: - # verbose_print( - # "SLURM: creating working directories", self.grid_options["verbosity"], 1 - # ) - # create_directories_hpc(self.grid_options["slurm_dir"]) - - # # Create command - # python_details = get_python_details() - # scriptname = path_of_calling_script() - # command = "{} {}".format(python_details["executable"], scriptname) - # command += '{}'.format( - # " ".join( - # [ - # "{}".format(self.grid_options["_commandline_input"]), - # "offset=$jobarrayindex", - # "modulo={}".format(self.grid_options["slurm_njobs"]), - # "vb={}".format(self.grid_options["verbosity"]), - # "slurm_jobid=$jobid", - # "slurm_jobarrayindex=$jobarrayindex", - # "slurm_jobname='binary_grid_'$jobid'.'$jobarrayindex", - # "slurm_njobs={}".format(self.grid_options["slurm_njobs"]), - # "slurm_dir={}".format(self.grid_options["slurm_dir"]), - # "rungrid=1", - # "slurm_command=evolve", - # ] - # ).strip() - # ) - - # # Construct dict with settings for the script while checking the settings at the same time - # # Check settings: - # # TODO: check settings - # # Create SLURM_DIR script: - # slurm_script_options = {} - # slurm_script_options["n"] = self.grid_options["slurm_njobs"] - # slurm_script_options["njobs"] = self.grid_options["slurm_njobs"] - # slurm_script_options["dir"] = self.grid_options["slurm_dir"] - # slurm_script_options["memory"] = self.grid_options["slurm_memory"] - # slurm_script_options["working_dir"] = self.grid_options[ - # "slurm_dir" - # ] # TODO: check this - # slurm_script_options["command"] = command - # # slurm_script_options['streams'] = self.grid_options['streams'] - - # # Construct the script - # slurm_script_contents = "" - # slurm_script_contents += "#!/bin/bash\n" - # slurm_script_contents += "# Slurm file for binary_grid and slurm\n" - # slurm_script_contents += "#SBATCH --error={}/stderr/%A.%a\n".format( - # self.grid_options["slurm_dir"] - # ) - # slurm_script_contents += "#SBATCH --output={}/stdout/%A.%a\n".format( - # self.grid_options["slurm_dir"] - # ) - # slurm_script_contents += "#SBATCH --job-name={}\n".format( - # self.grid_options["slurm_jobname"] - # ) - # slurm_script_contents += "#SBATCH --partition={}\n".format( - # self.grid_options["slurm_partition"] - # ) - # slurm_script_contents += "#SBATCH --time={}\n".format( - # self.grid_options["slurm_time"] - # ) - # slurm_script_contents += "#SBATCH --mem={}\n".format( - # self.grid_options["slurm_memory"] - # ) - # slurm_script_contents += "#SBATCH --ntasks={}\n".format( - # self.grid_options["slurm_ntasks"] - # ) - # slurm_script_contents += "#SBATCH --array={}\n".format( - # self.grid_options["slurm_array"] - # ) - # slurm_script_contents += "\n" - - # if self.grid_options["slurm_extra_settings"]: - # slurm_script_contents += "# Extra settings by user:" - # slurm_script_contents += "\n".join( - # [ - # "--{}={}".format( - # key, self.grid_options["slurm_extra_settings"][key] - # ) - # for key in self.grid_options["slurm_extra_settings"] - # ] - # ) - - # slurm_script_contents += '# set status to "running"\n' - # slurm_script_contents += ( - # 'echo "running" > {}/status/$jobid.$jobarrayindex\n\n'.format( - # self.grid_options["slurm_dir"] - # ) - # ) - # slurm_script_contents += "# run grid of stars\n" - # slurm_script_contents += "{}\n\n".format(command) - # slurm_script_contents += '# set status to "finished"\n' - # slurm_script_contents += ( - # 'echo "finished" > {}/status/$jobid.$jobarrayindex\n'.format( - # self.grid_options["slurm_dir"] - # ) - # ) - # slurm_script_contents += "\n" - - # if self.grid_options["slurm_postpone_join"]: - # slurm_script_contents += "{} rungrid=0 results_hash_dumpfile={}/results/$jobid.all slurm_command=join\n".format( - # command, self.grid_options["slurm_dir"] - # ) - - # # Write script to file - # slurm_script_filename = os.path.join( - # self.grid_options["slurm_dir"], "slurm_script" - # ) - # with open(slurm_script_filename, "w") as slurm_script_file: - # slurm_script_file.write(slurm_script_contents) - - # # Execute or postpone - # if self.grid_options["slurm_postpone_sbatch"]: - # # Execute or postpone the real call to sbatch - # sbatch_command = "sbatch {}".format(slurm_script_filename) - # verbose_print( - # "running slurm script {}".format(slurm_script_filename), - # self.grid_options["verbosity"], - # 0, - # ) - # # subprocess.Popen(sbatch_command, close_fds=True) - # # subprocess.Popen(sbatch_command, creationflags=subprocess.DETACHED_PROCESS) - # verbose_print("Submitted scripts.", self.grid_options["verbosity"], 0) - # else: - # verbose_print( - # "Slurm script is in {} but hasnt been executed".format( - # slurm_script_filename - # ), - # self.grid_options["verbosity"], - # 0, - # ) - - # verbose_print("all done!", self.grid_options["verbosity"], 0) - # sys.exit() - - # elif self.grid_options["slurm_command"] == "evolve": - # # Part to evolve the population. - # # TODO: decide how many CPUs - # verbose_print( - # "SLURM: Evolving population", self.grid_options["verbosity"], 1 - # ) - - # # - # self.evolve_population() - - # elif self.grid_options["slurm_command"] == "join": - # # Joining the output. - # verbose_print("SLURM: Joining results", self.grid_options["verbosity"], 1) - - ################################################### - # CONDOR functions - # - # subroutines to run CONDOR grids - ################################################### - - # def _condor_grid(self): - # """ - # Main function that manages the CONDOR setup. - - # Has three stages: - - # - setup - # - evolve - # - join - - # Which stage is used is determined by the value of grid_options['condor_command']: - - # <empty>: the function will know its the user that executed the script and - # it will set up the necessary condor stuff - - # 'evolve': evolve_population is called to evolve the population of stars - - # 'join': We will attempt to join the output - # """ - - # # TODO: Put in function - # condor_version = get_condor_version() - # if not condor_version: - # verbose_print( - # "CONDOR: Error: No installation of condor found", - # self.grid_options["verbosity"], - # 0, - # ) - # else: - # major_version = int(condor_version.split(".")[0]) - # minor_version = int(condor_version.split(".")[1]) - - # if (major_version == 8) and (minor_version > 4): - # verbose_print( - # "CONDOR: Found version {} which is new enough".format( - # condor_version - # ), - # self.grid_options["verbosity"], - # 0, - # ) - # elif major_version > 9: - # verbose_print( - # "CONDOR: Found version {} which is new enough".format( - # condor_version - # ), - # self.grid_options["verbosity"], - # 0, - # ) - # else: - # verbose_print( - # "CONDOR: Found version {} which is too old (we require 8.3/8.4+)".format( - # condor_version - # ), - # self.grid_options["verbosity"], - # 0, - # ) - - # verbose_print( - # "Running Condor grid. command={}".format( - # self.grid_options["condor_command"] - # ), - # self.grid_options["verbosity"], - # 1, - # ) - # if not self.grid_options["condor_command"]: - # # Setting up - # verbose_print( - # "CONDOR: Main controller script. Setting up", - # self.grid_options["verbosity"], - # 1, - # ) - - # # Set up working directories: - # verbose_print( - # "CONDOR: creating working directories", - # self.grid_options["verbosity"], - # 1, - # ) - # create_directories_hpc(self.grid_options["condor_dir"]) - - # # Create command - # current_workingdir = os.getcwd() - # python_details = get_python_details() - # scriptname = path_of_calling_script() - # # command = "".join([ - # # "{}".python_details['executable'], - # # "{}".scriptname, - # # "offset=$jobarrayindex", - # # "modulo={}".format(self.grid_options['condor_njobs']), - # # "vb={}".format(self.grid_options['verbosity']) - - # # "results_hash_dumpfile=$self->{_grid_options}{slurm_dir}/results/$jobid.$jobarrayindex", - # # 'slurm_jobid='.$jobid, - # # 'slurm_jobarrayindex='.$jobarrayindex, - # # 'slurm_jobname=binary_grid_'.$jobid.'.'.$jobarrayindex, - # # "slurm_njobs=$njobs", - # # "slurm_dir=$self->{_grid_options}{slurm_dir}", - # # ); - - # # Create directory with info for the condor script. By creating this directory we also check whether all the values are set correctly - # # TODO: create the condor script. - # condor_script_options = {} - # # condor_script_options['n'] = - # condor_script_options["njobs"] = self.grid_options["condor_njobs"] - # condor_script_options["dir"] = self.grid_options["condor_dir"] - # condor_script_options["memory"] = self.grid_options["condor_memory"] - # condor_script_options["working_dir"] = self.grid_options[ - # "condor_working_dir" - # ] - # condor_script_options["command"] = self.grid_options["command"] - # condor_script_options["streams"] = self.grid_options["streams"] - - # # TODO: condor works with running an executable. - - # # Create script contents - # condor_script_contents = "" - # condor_script_contents += """ - # ################################################# - # # - # # Condor script to run a binary_grid via python - # # - # ################################################# - # """ - # condor_script_contents += "Executable\t= {}".format(executable) - # condor_script_contents += "arguments\t= {}".format(arguments) - # condor_script_contents += "environment\t= {}".format(environment) - # condor_script_contents += "universe\t= {}".format( - # self.grid_options["condor_universe"] - # ) - # condor_script_contents += "\n" - # condor_script_contents += "output\t= {}/stdout/$id\n".format( - # self.grid_options["condor_dir"] - # ) - # condor_script_contents += "error\t={}/sterr/$id".format( - # self.grid_options["condor_dir"] - # ) - # condor_script_contents += "log\t={}\n".format( - # self.grid_options["condor_dir"] - # ) - # condor_script_contents += "initialdir\t={}\n".format(current_workingdir) - # condor_script_contents += "remote_initialdir\t={}\n".format( - # current_workingdir - # ) - # condor_script_contents += "\n" - # condor_script_contents += "steam_output\t={}".format(stream) - # condor_script_contents += "steam_error\t={}".format(stream) - # condor_script_contents += "+WantCheckpoint = False" - # condor_script_contents += "\n" - # condor_script_contents += "request_memory\t={}".format( - # self.grid_options["condor_memory"] - # ) - # condor_script_contents += "ImageSize\t={}".format( - # self.grid_options["condor_memory"] - # ) - # condor_script_contents += "\n" - - # if self.grid_options["condor_extra_settings"]: - # slurm_script_contents += "# Extra settings by user:" - # slurm_script_contents += "\n".join( - # [ - # "{}\t={}".format( - # key, self.grid_options["condor_extra_settings"][key] - # ) - # for key in self.grid_options["condor_extra_settings"] - # ] - # ) - - # condor_script_contents += "\n" - - # # request_memory = $_[0]{memory} - # # ImageSize = $_[0]{memory} - - # # Requirements = (1) \&\& (". - # # $self->{_grid_options}{condor_requirements}.")\n"; - - # # - # # file name: my_program.condor - # # Condor submit description file for my_program - # # Executable = my_program - # # Universe = vanilla - # # Error = logs/err.$(cluster) - # # Output = logs/out.$(cluster) - # # Log = logs/log.$(cluster) - - # # should_transfer_files = YES - # # when_to_transfer_output = ON_EXIT - # # transfer_input_files = files/in1,files/in2 - - # # Arguments = files/in1 files/in2 files/out1 - # # Queue - - # # Write script contents to file - # if self.grid_options["condor_postpone_join"]: - # condor_script_contents += "{} rungrid=0 results_hash_dumpfile={}/results/$jobid.all condor_command=join\n".format( - # command, self.grid_options["condor_dir"] - # ) - - # condor_script_filename = os.path.join( - # self.grid_options["condor_dir"], "condor_script" - # ) - # with open(condor_script_filename, "w") as condor_script_file: - # condor_script_file.write(condor_script_contents) - - # if self.grid_options["condor_postpone_sbatch"]: - # # Execute or postpone the real call to sbatch - # submit_command = "condor_submit {}".format(condor_script_filename) - # verbose_print( - # "running condor script {}".format(condor_script_filename), - # self.grid_options["verbosity"], - # 0, - # ) - # # subprocess.Popen(sbatch_command, close_fds=True) - # # subprocess.Popen(sbatch_command, creationflags=subprocess.DETACHED_PROCESS) - # verbose_print("Submitted scripts.", self.grid_options["verbosity"], 0) - # else: - # verbose_print( - # "Condor script is in {} but hasnt been executed".format( - # condor_script_filename - # ), - # self.grid_options["verbosity"], - # 0, - # ) - - # verbose_print("all done!", self.grid_options["verbosity"], 0) - # sys.exit() - - # elif self.grid_options["condor_command"] == "evolve": - # # TODO: write this function - # # Part to evolve the population. - # # TODO: decide how many CPUs - # verbose_print( - # "CONDOR: Evolving population", self.grid_options["verbosity"], 1 - # ) - - # # - # self.evolve_population() - - # elif self.grid_options["condor_command"] == "join": - # # TODO: write this function - # # Joining the output. - # verbose_print("CONDOR: Joining results", self.grid_options["verbosity"], 1) - - # pass - ################################################### - # Unordered functions - # - # Functions that aren't ordered yet - ################################################### - - def write_ensemble(self, output_file, data=None, sort_keys=True, indent=4): - """ - write_ensemble : Write ensemble results to a file. - - Args: - output_file : the output filename. - - If the filename has an extension that we recognise, - e.g. .gz or .bz2, we compress the output appropriately. - - The filename should contain .json or .msgpack, the two - currently-supported formats. - - Usually you'll want to output to JSON, but we can - also output to msgpack. - - data : the data dictionary to be converted and written to the file. - If not set, this defaults to self.grid_ensemble_results. - - sort_keys : if True, and output is to JSON, the keys will be sorted. - (default: True, passed to json.dumps) - - indent : number of space characters used in the JSON indent. (Default: 4, - passed to json.dumps) - """ - # TODO: consider writing this in a formatted structure - - # get the file type - file_type = ensemble_file_type(output_file) - - # choose compression algorithm based on file extension - compression = ensemble_compression(output_file) - - # default to using grid_ensemble_results if no data is given - if data is None: - data = self.grid_ensemble_results - - if not file_type: - print( - "Unable to determine file type from ensemble filename {} : it should be .json or .msgpack." - ).format(output_file) - sys.exit() - elif file_type is "JSON": - # JSON output - if compression == "gzip": - # gzip - f = gzip.open(output_file, "wt") - elif compression == "bzip2": - # bzip2 - f = bz2.open(output_file, "wt") - else: - # raw output (not compressed) - f = open(output_file, "wt") - f.write(json.dumps(data, sort_keys=sort_keys, indent=indent)) - - elif file_type is "msgpack": - # msgpack output - if compression == "gzip": - f = gzip.open(output_file, "wb") - elif compression == "bzip2": - f = bz2.open(output_file, "wb") - else: - f = open(output_file, "wb") - msgpack.dump(data, f) - f.close() - - print( - "Thread {thread}: Wrote ensemble results to file: {colour}{file}{reset} (file type {file_type}, compression {compression})".format( - thread=self.process_ID, - file=output_file, - colour=self.ANSI_colours["green"], - reset=self.ANSI_colours["reset"], - file_type=file_type, - compression=compression, - ) - ) - - ############################################################ - def write_binary_c_calls_to_file( - self, - output_dir: Union[str, None] = None, - output_filename: Union[str, None] = None, - include_defaults: bool = False, - ) -> None: - """ - Function that loops over the grid code and writes the generated parameters to a file. - In the form of a command line call - - Only useful when you have a variable grid as system_generator. MC wouldn't be that useful - - Also, make sure that in this export there are the basic parameters - like m1,m2,sep, orb-per, ecc, probability etc. - - On default this will write to the datadir, if it exists - - Tasks: - - TODO: test this function - - TODO: make sure the binary_c_python .. output file has a unique name - - Args: - output_dir: (optional, default = None) directory where to write the file to. If custom_options['data_dir'] is present, then that one will be used first, and then the output_dir - output_filename: (optional, default = None) filename of the output. If not set it will be called "binary_c_calls.txt" - include_defaults: (optional, default = None) whether to include the defaults of binary_c in the lines that are written. Beware that this will result in very long lines, and it might be better to just export the binary_c defaults and keep them in a separate file. - - Returns: - filename: filename that was used to write the calls to - """ - - # Check if there is no compiled grid yet. If not, lets try to build it first. - if not self.grid_options["_system_generator"]: - - ## check the settings: - if self.bse_options.get("ensemble", None): - if self.bse_options["ensemble"] == 1: - if not self.bse_options.get("ensemble_defer", 0) == 1: - verbose_print( - "Error, if you want to run an ensemble in a population, the output needs to be deferred", - self.grid_options["verbosity"], - 0, - ) - raise ValueError - - # Put in check - if len(self.grid_options["_grid_variables"]) == 0: - print("Error: you haven't defined any grid variables! Aborting") - raise ValueError - - # - self._generate_grid_code(dry_run=False) - - # - self._load_grid_function() - - # then if the _system_generator is present, we go through it - if self.grid_options["_system_generator"]: - # Check if there is an output dir configured - if self.custom_options.get("data_dir", None): - binary_c_calls_output_dir = self.custom_options["data_dir"] - # otherwise check if there's one passed to the function - else: - if not output_dir: - print( - "Error. No data_dir configured and you gave no output_dir. Aborting" - ) - raise ValueError - binary_c_calls_output_dir = output_dir - - # check if there's a filename passed to the function - if output_filename: - binary_c_calls_filename = output_filename - # otherwise use default value - else: - binary_c_calls_filename = "binary_c_calls.txt" - - binary_c_calls_full_filename = os.path.join( - binary_c_calls_output_dir, binary_c_calls_filename - ) - print("Writing binary_c calls to {}".format(binary_c_calls_full_filename)) - - # Write to file - with open(binary_c_calls_full_filename, "w") as file: - # Get defaults and clean them, then overwrite them with the set values. - if include_defaults: - # TODO: make sure that the defaults here are cleaned up properly - cleaned_up_defaults = self.cleaned_up_defaults - full_system_dict = cleaned_up_defaults.copy() - full_system_dict.update(self.bse_options.copy()) - else: - full_system_dict = self.bse_options.copy() - - for system in self.grid_options["_system_generator"](self): - # update values with current system values - full_system_dict.update(system) - - binary_cmdline_string = self._return_argline(full_system_dict) - file.write(binary_cmdline_string + "\n") - else: - print("Error. No grid function found!") - raise ValueError - - return binary_c_calls_full_filename - - def _cleanup_defaults(self): - """ - Function to clean up the default values: - - from a dictionary, removes the entries that have the following values: - - "NULL" - - "" - - "Function" - - Uses the function from utils.functions - - TODO: Rethink this functionality. seems a bit double, could also be just outside of the class - """ - - binary_c_defaults = self.return_binary_c_defaults().copy() - cleaned_dict = filter_arg_dict(binary_c_defaults) - - return cleaned_dict - - def _clean_up_custom_logging(self, evol_type): - """ - Function to clean up the custom logging. - Has two types: - 'single': - - removes the compiled shared library - (which name is stored in grid_options['_custom_logging_shared_library_file']) - - TODO: unloads/frees the memory allocated to that shared library - (which is stored in grid_options['custom_logging_func_memaddr']) - - sets both to None - 'multiple': - - TODO: make this and design this - """ - - if evol_type == "single": - verbose_print( - "Cleaning up the custom logging stuff. type: single", - self.grid_options["verbosity"], - 1, - ) - - # TODO: Explicitly unload the library - - # Reset the memory adress location - self.grid_options["custom_logging_func_memaddr"] = -1 - - # remove shared library files - if self.grid_options["_custom_logging_shared_library_file"]: - remove_file( - self.grid_options["_custom_logging_shared_library_file"], - self.grid_options["verbosity"], - ) - self.grid_options["_custom_logging_shared_library_file"] = None - - if evol_type == "population": - verbose_print( - "Cleaning up the custom logging stuffs. type: population", - self.grid_options["verbosity"], - 1, - ) - - # TODO: make sure that these also work. not fully sure if necessary tho. - # whether its a single file, or a dict of files/mem addresses - - if evol_type == "MC": - pass - - def _increment_probtot(self, prob): - """ - Function to add to the total probability. For now not used - """ - - self.grid_options["_probtot"] += prob - - def _increment_count(self): - """ - Function to add to the total number of stars. For now not used - """ - self.grid_options["_count"] += 1 - - def _set_loggers(self): - """ - Function to set the loggers for the execution of the grid - """ - - # Set log file - binary_c_logfile = self.grid_options["log_file"] - - # Create directory - os.makedirs(os.path.dirname(binary_c_logfile), exist_ok=True) - - # Set up logger - self.logger = logging.getLogger("binary_c_python_logger") - self.logger.setLevel(self.grid_options["verbosity"]) - - # Reset handlers - self.logger.handlers = [] - - # Set formatting of output - log_formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) - - # Make and add file handlers - # make handler for output to file - handler_file = logging.FileHandler(filename=os.path.join(binary_c_logfile)) - handler_file.setFormatter(log_formatter) - handler_file.setLevel(logging.INFO) - - # Make handler for output to stdout - handler_stdout = logging.StreamHandler(sys.stdout) - handler_stdout.setFormatter(log_formatter) - handler_stdout.setLevel(logging.INFO) - - # Add the loggers - self.logger.addHandler(handler_file) - self.logger.addHandler(handler_stdout) - - def _check_binary_c_error(self, binary_c_output, system_dict): - """ - Function to check whether binary_c throws an error and handle accordingly. - """ - - if binary_c_output: - if (binary_c_output.splitlines()[0].startswith("SYSTEM_ERROR")) or ( - binary_c_output.splitlines()[-1].startswith("SYSTEM_ERROR") - ): - verbose_print( - "FAILING SYSTEM FOUND", - self.grid_options["verbosity"], - 0, - ) - - # Keep track of the amount of failed systems and their error codes - self.grid_options["_failed_prob"] += system_dict.get("probability", 1) - self.grid_options["_failed_count"] += 1 - self.grid_options["_errors_found"] = True - - # Try catching the error code and keep track of the unique ones. - try: - error_code = int( - binary_c_output.splitlines()[0] - .split("with error code")[-1] - .split(":")[0] - .strip() - ) - - if ( - not error_code - in self.grid_options["_failed_systems_error_codes"] - ): - self.grid_options["_failed_systems_error_codes"].append( - error_code - ) - except ValueError: - verbose_print( - "Failed to extract the error-code", - self.grid_options["verbosity"], - 1, - ) - - # Check if we have exceeded the number of errors - if ( - self.grid_options["_failed_count"] - > self.grid_options["failed_systems_threshold"] - ): - if not self.grid_options["_errors_exceeded"]: - verbose_print( - self._boxed( - "Process {} exceeded the maximum ({}) number of failing systems. Stopped logging them to files now".format( - self.process_ID, - self.grid_options["failed_systems_threshold"], - ) - ), - self.grid_options["verbosity"], - 1, - ) - self.grid_options["_errors_exceeded"] = True - - # If not, write the failing systems to files unique to each process - else: - # Write arg lines to file - argstring = self._return_argline(system_dict) - with open( - os.path.join( - self.grid_options["tmp_dir"], - "failed_systems", - "process_{}.txt".format(self.process_ID), - ), - "a+", - ) as f: - f.write(argstring + "\n") - f.close() - else: - verbose_print( - "binary_c output nothing - this is strange. If there is ensemble output being generated then this is fine.", - self.grid_options["verbosity"], - 3, - ) - - def set_moe_di_stefano_settings(self, options=None): - """ - Function to set user input configurations for the Moe & di Stefano methods - - If nothing is passed then we just use the default options - """ - - if not options: - options = {} - - # Take the option dictionary that was given and override. - options = update_dicts(self.grid_options["Moe2017_options"], options) - self.grid_options["Moe2017_options"] = copy.deepcopy(options) - - # Write options to a file - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - exist_ok=True, - ) - with open( - os.path.join( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - "moeopts.dat", - ), - "w", - ) as f: - f.write(json.dumps(self.grid_options["Moe2017_options"], indent=4)) - f.close() - - def _load_moe_di_stefano_data(self): - """ - Function to load the moe & di stefano data - """ - - # Only if the grid is loaded and Moecache contains information - if not self.grid_options["_loaded_Moe2017_data"]: # and not Moecache: - - if self.grid_options["_Moe2017_JSON_data"]: - # Use the existing (perhaps modified) JSON data - json_data = self.grid_options["_Moe2017_JSON_data"] - - else: - # Load the JSON data from a file - json_data = get_moe_di_stefano_dataset( - self.grid_options["Moe2017_options"], - verbosity=self.grid_options["verbosity"], - ) - - # entry of log10M1 is a list containing 1 dict. - # We can take the dict out of the list - if isinstance(json_data["log10M1"], list): - json_data["log10M1"] = json_data["log10M1"][0] - - # save this data in case we want to modify it later - self.grid_options["_Moe2017_JSON_data"] = json_data - - # Get all the masses - logmasses = sorted(json_data["log10M1"].keys()) - if not logmasses: - msg = "The table does not contain masses." - verbose_print( - "\tMoe_di_Stefano_2017: {}".format(msg), - self.grid_options["verbosity"], - 0, - ) - raise ValueError(msg) - - # Write to file - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - exist_ok=True, - ) - with open( - os.path.join( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - "moe.log", - ), - "w", - ) as logfile: - logfile.write("logâ‚â‚€Masses(M☉) {}\n".format(logmasses)) - - # Get all the periods and see if they are all consistently present - logperiods = [] - for logmass in logmasses: - if not logperiods: - logperiods = sorted(json_data["log10M1"][logmass]["logP"].keys()) - dlog10P = float(logperiods[1]) - float(logperiods[0]) - - current_logperiods = sorted(json_data["log10M1"][logmass]["logP"]) - if not (logperiods == current_logperiods): - msg = ( - "Period values are not consistent throughout the dataset\logperiods = " - + " ".join(str(x) for x in logperiods) - + "\nCurrent periods = " - + " ".join(str(x) for x in current_logperiods) - ) - verbose_print( - "\tMoe_di_Stefano_2017: {}".format(msg), - self.grid_options["verbosity"], - 0, - ) - raise ValueError(msg) - - ############################################################ - # log10period binwidth : of course this assumes a fixed - # binwidth, so we check for this too. - for i in range(len(current_logperiods) - 1): - if not dlog10P == ( - float(current_logperiods[i + 1]) - float(current_logperiods[i]) - ): - msg = "Period spacing is not consistent throughout the dataset" - verbose_print( - "\tMoe_di_Stefano_2017: {}".format(msg), - self.grid_options["verbosity"], - 0, - ) - raise ValueError(msg) - - # save the logperiods list in the cache: - # this is used in the renormalization integration - Moecache["logperiods"] = logperiods - - # Write to file - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - exist_ok=True, - ) - with open( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano", "moe.log"), - "a", - ) as logfile: - logfile.write("logâ‚â‚€Periods(days) {}\n".format(logperiods)) - - # Fill the global dict - for logmass in logmasses: - # Create the multiplicity table - if not Moecache.get("multiplicity_table", None): - Moecache["multiplicity_table"] = [] - - # multiplicity as a function of primary mass - Moecache["multiplicity_table"].append( - [ - float(logmass), - json_data["log10M1"][logmass]["f_multi"], - json_data["log10M1"][logmass]["single star fraction"], - json_data["log10M1"][logmass]["binary star fraction"], - json_data["log10M1"][logmass]["triple/quad star fraction"], - ] - ) - - ############################################################ - # a small log10period which we can shift just outside the - # table to force integration out there to zero - epslog10P = 1e-8 * dlog10P - - ############################################################ - # loop over either binary or triple-outer periods - first = 1 - - # Go over the periods - for logperiod in logperiods: - ############################################################ - # distributions of binary and triple star fractions - # as a function of mass, period. - # - # Note: these should be per unit log10P, hence we - # divide by dlog10P - - if first: - first = 0 - - # Create the multiplicity table - if not Moecache.get("period_distributions", None): - Moecache["period_distributions"] = [] - - ############################################################ - # lower bound the period distributions to zero probability - Moecache["period_distributions"].append( - [ - float(logmass), - float(logperiod) - 0.5 * dlog10P - epslog10P, - 0.0, - 0.0, - ] - ) - Moecache["period_distributions"].append( - [ - float(logmass), - float(logperiod) - 0.5 * dlog10P, - json_data["log10M1"][logmass]["logP"][logperiod][ - "normed_bin_frac_p_dist" - ] - / dlog10P, - json_data["log10M1"][logmass]["logP"][logperiod][ - "normed_tripquad_frac_p_dist" - ] - / dlog10P, - ] - ) - - Moecache["period_distributions"].append( - [ - float(logmass), - float(logperiod), - json_data["log10M1"][logmass]["logP"][logperiod][ - "normed_bin_frac_p_dist" - ] - / dlog10P, - json_data["log10M1"][logmass]["logP"][logperiod][ - "normed_tripquad_frac_p_dist" - ] - / dlog10P, - ] - ) - - ############################################################ - # distributions as a function of mass, period, q - # - # First, get a list of the qs given by Moe - # - qs = sorted(json_data["log10M1"][logmass]["logP"][logperiod]["q"]) - - # Fill the data and 'normalise' - qdata = fill_data( - qs, json_data["log10M1"][logmass]["logP"][logperiod]["q"] - ) - - # Create the multiplicity table - if not Moecache.get("q_distributions", None): - Moecache["q_distributions"] = [] - - for q in qs: - Moecache["q_distributions"].append( - [float(logmass), float(logperiod), float(q), qdata[q]] - ) - - ############################################################ - # eccentricity distributions as a function of mass, period, ecc - eccs = sorted(json_data["log10M1"][logmass]["logP"][logperiod]["e"]) - - # Fill the data and 'normalise' - ecc_data = fill_data( - eccs, json_data["log10M1"][logmass]["logP"][logperiod]["e"] - ) - - # Create the multiplicity table - if not Moecache.get("ecc_distributions", None): - Moecache["ecc_distributions"] = [] - - for ecc in eccs: - Moecache["ecc_distributions"].append( - [ - float(logmass), - float(logperiod), - float(ecc), - ecc_data[ecc], - ] - ) + from a dictionary, removes the entries that have the following values: + - "NULL" + - "" + - "Function" - ############################################################ - # upper bound the period distributions to zero probability - Moecache["period_distributions"].append( - [ - float(logmass), - float(logperiods[-1]) + 0.5 * dlog10P, # TODO: why this shift? - json_data["log10M1"][logmass]["logP"][logperiods[-1]][ - "normed_bin_frac_p_dist" - ] - / dlog10P, - json_data["log10M1"][logmass]["logP"][logperiods[-1]][ - "normed_tripquad_frac_p_dist" - ] - / dlog10P, - ] - ) - Moecache["period_distributions"].append( - [ - float(logmass), - float(logperiods[-1]) + 0.5 * dlog10P + epslog10P, - 0.0, - 0.0, - ] - ) + Uses the function from utils.functions - verbose_print( - "\tMoe_di_Stefano_2017: Length period_distributions table: {}".format( - len(Moecache["period_distributions"]) - ), - self.grid_options["verbosity"], - _MOE2017_VERBOSITY_LEVEL, - ) - verbose_print( - "\tMoe_di_Stefano_2017: Length multiplicity table: {}".format( - len(Moecache["multiplicity_table"]) - ), - self.grid_options["verbosity"], - _MOE2017_VERBOSITY_LEVEL, - ) - verbose_print( - "\tMoe_di_Stefano_2017: Length q table: {}".format( - len(Moecache["q_distributions"]) - ), - self.grid_options["verbosity"], - _MOE2017_VERBOSITY_LEVEL, - ) - verbose_print( - "\tMoe_di_Stefano_2017: Length ecc table: {}".format( - len(Moecache["ecc_distributions"]) - ), - self.grid_options["verbosity"], - _MOE2017_VERBOSITY_LEVEL, - ) + TODO: Rethink this functionality. seems a bit double, could also be just outside of the class + """ - # Write to log file - os.makedirs( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - exist_ok=True, - ) - with open( - os.path.join( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - "moecache.json", - ), - "w", - ) as cache_filehandle: - cache_filehandle.write(json.dumps(Moecache, indent=4)) + binary_c_defaults = self.return_binary_c_defaults().copy() + cleaned_dict = filter_arg_dict(binary_c_defaults) - # Signal that the data has been loaded - self.grid_options["_loaded_Moe2017_data"] = True + return cleaned_dict - def _set_moe_di_stefano_distributions(self): + def _increment_probtot(self, prob): """ - Function to set the Moe & di Stefano distribution + Function to add to the total probability. For now not used """ - ############################################################ - # first, the multiplicity, this is 1,2,3,4, ... - # for singles, binaries, triples, quadruples, ... - - max_multiplicity = get_max_multiplicity( - self.grid_options["Moe2017_options"]["multiplicity_modulator"] - ) - verbose_print( - "\tMoe_di_Stefano_2017: Max multiplicity = {}".format(max_multiplicity), - self.grid_options["verbosity"], - _MOE2017_VERBOSITY_LEVEL, - ) - ###### - # Setting up the grid variables - - # Multiplicity - self.add_grid_variable( - name="multiplicity", - parameter_name="multiplicity", - longname="multiplicity", - valuerange=[1, max_multiplicity], - samplerfunc="const_int(1, {n}, {n})".format(n=max_multiplicity), - precode='self.grid_options["multiplicity"] = multiplicity; self.bse_options["multiplicity"] = multiplicity; options={}'.format( - self.grid_options["Moe2017_options"] - ), - condition="({}[int(multiplicity)-1] > 0)".format( - str(self.grid_options["Moe2017_options"]["multiplicity_modulator"]) - ), - gridtype="discrete", - probdist=1, - ) - - ############################################################ - # always require M1, for all systems - # - # log-spaced m1 with given resolution - self.add_grid_variable( - name="lnm1", - parameter_name="M_1", - longname="Primary mass", - samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"]["M"][0] - or "const(np.log({}), np.log({}), {})".format( - self.grid_options["Moe2017_options"]["ranges"]["M"][0], - self.grid_options["Moe2017_options"]["ranges"]["M"][1], - self.grid_options["Moe2017_options"]["resolutions"]["M"][0], - ), - valuerange=[ - "np.log({})".format( - self.grid_options["Moe2017_options"]["ranges"]["M"][0] - ), - "np.log({})".format( - self.grid_options["Moe2017_options"]["ranges"]["M"][1] - ), - ], - gridtype="centred", - dphasevol="dlnm1", - precode='M_1 = np.exp(lnm1); options["M_1"]=M_1', - probdist="Moe_di_Stefano_2017_pdf({{{}, {}, {}}}, verbosity=self.grid_options['verbosity'])['total_probdens'] if multiplicity == 1 else 1".format( - str(dict(self.grid_options["Moe2017_options"]))[1:-1], - "'multiplicity': multiplicity", - "'M_1': M_1", - ), - ) - - # Go to higher multiplicities - if max_multiplicity >= 2: - # binaries: period - self.add_grid_variable( - name="log10per", - parameter_name="orbital_period", - longname="log10(Orbital_Period)", - probdist=1.0, - condition='(self.grid_options["multiplicity"] >= 2)', - branchpoint=1 - if max_multiplicity > 1 - else 0, # Signal here to put a branchpoint if we have a max multiplicity higher than 1. - gridtype="centred", - dphasevol="({} * dlog10per)".format(LOG_LN_CONVERTER), - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["logP"][0], - self.grid_options["Moe2017_options"]["ranges"]["logP"][1], - ], - samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"][ - "logP" - ][0] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["logP"][0], - self.grid_options["Moe2017_options"]["ranges"]["logP"][1], - self.grid_options["Moe2017_options"]["resolutions"]["logP"][0], - ), - precode="""orbital_period = 10.0**log10per -qmin={}/M_1 -qmax=maximum_mass_ratio_for_RLOF(M_1, orbital_period) -""".format( - self.grid_options["Moe2017_options"]["Mmin"] - ), - ) # TODO: change the maximum_mass_ratio_for_RLOF - - # binaries: mass ratio - self.add_grid_variable( - name="q", - parameter_name="M_2", - longname="Mass ratio", - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["q"][0] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "options['Mmin']/M_1", - self.grid_options["Moe2017_options"]["ranges"]["q"][1] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "qmax", - ], - probdist=1, - gridtype="centred", - dphasevol="dq", - precode=""" -M_2 = q * M_1 -sep = calc_sep_from_period(M_1, M_2, orbital_period) - """, - samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"]["M"][1] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["q"][0] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", [None, None])[0] - else "{}/M_1".format(self.grid_options["Moe2017_options"]["Mmin"]), - self.grid_options["Moe2017_options"]["ranges"]["q"][1] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", [None, None])[1] - else "qmax", - self.grid_options["Moe2017_options"]["resolutions"]["M"][1], - ), - ) - - # (optional) binaries: eccentricity - if self.grid_options["Moe2017_options"]["resolutions"]["ecc"][0] > 0: - self.add_grid_variable( - name="ecc", - parameter_name="eccentricity", - longname="Eccentricity", - probdist=1, - gridtype="centred", - dphasevol="decc", - precode="eccentricity=ecc", - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["ecc"][ - 0 - ], # Just fail if not defined. - self.grid_options["Moe2017_options"]["ranges"]["ecc"][1], - ], - samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"][ - "ecc" - ][0] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["ecc"][ - 0 - ], # Just fail if not defined. - self.grid_options["Moe2017_options"]["ranges"]["ecc"][1], - self.grid_options["Moe2017_options"]["resolutions"]["ecc"][0], - ), - ) - - # Now for triples and quadruples - if max_multiplicity >= 3: - # Triple: period - self.add_grid_variable( - name="log10per2", - parameter_name="orbital_period_triple", - longname="log10(Orbital_Period2)", - probdist=1.0, - condition='(self.grid_options["multiplicity"] >= 3)', - branchpoint=2 - if max_multiplicity > 2 - else 0, # Signal here to put a branchpoint if we have a max multiplicity higher than 1. - gridtype="centred", - dphasevol="({} * dlog10per2)".format(LOG_LN_CONVERTER), - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["logP"][0], - self.grid_options["Moe2017_options"]["ranges"]["logP"][1], - ], - samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"][ - "logP" - ][1] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["logP"][0], - self.grid_options["Moe2017_options"]["ranges"]["logP"][1], - self.grid_options["Moe2017_options"]["resolutions"]["logP"][1], - ), - precode="""orbital_period_triple = 10.0**log10per2 -q2min={}/(M_1+M_2) -q2max=maximum_mass_ratio_for_RLOF(M_1+M_2, orbital_period_triple) - """.format( - self.grid_options["Moe2017_options"]["Mmin"] - ), - ) - - # Triples: mass ratio - # Note, the mass ratio is M_outer/M_inner - self.add_grid_variable( - name="q2", - parameter_name="M_3", - longname="Mass ratio outer/inner", - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["q"][0] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "options['Mmin']/(M_1+M_2)", - self.grid_options["Moe2017_options"]["ranges"]["q"][1] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "q2max", - ], - probdist=1, - gridtype="centred", - dphasevol="dq2", - precode=""" -M_3 = q2 * (M_1 + M_2) -sep2 = calc_sep_from_period((M_1+M_2), M_3, orbital_period_triple) -eccentricity2=0 -""", - samplerfunc=self.grid_options["Moe2017_options"]["samplerfuncs"][ - "M" - ][2] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["q"][0] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "options['Mmin']/(M_1+M_2)", - self.grid_options["Moe2017_options"]["ranges"]["q"][1] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "q2max", - self.grid_options["Moe2017_options"]["resolutions"]["M"][2], - ), - ) - - # (optional) triples: eccentricity - if self.grid_options["Moe2017_options"]["resolutions"]["ecc"][1] > 0: - self.add_grid_variable( - name="ecc2", - parameter_name="eccentricity2", - longname="Eccentricity of the triple", - probdist=1, - gridtype="centred", - dphasevol="decc2", - precode="eccentricity2=ecc2", - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["ecc"][ - 0 - ], # Just fail if not defined. - self.grid_options["Moe2017_options"]["ranges"]["ecc"][1], - ], - samplerfunc=self.grid_options["Moe2017_options"][ - "samplerfuncs" - ]["ecc"][1] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["ecc"][ - 0 - ], # Just fail if not defined. - self.grid_options["Moe2017_options"]["ranges"]["ecc"][1], - self.grid_options["Moe2017_options"]["resolutions"]["ecc"][ - 1 - ], - ), - ) - - if max_multiplicity == 4: - # Quadruple: period - self.add_grid_variable( - name="log10per3", - parameter_name="orbital_period_quadruple", - longname="log10(Orbital_Period3)", - probdist=1.0, - condition='(self.grid_options["multiplicity"] >= 4)', - branchpoint=3 - if max_multiplicity > 3 - else 0, # Signal here to put a branchpoint if we have a max multiplicity higher than 1. - gridtype="centred", - dphasevol="({} * dlog10per3)".format(LOG_LN_CONVERTER), - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["logP"][0], - self.grid_options["Moe2017_options"]["ranges"]["logP"][1], - ], - samplerfunc=self.grid_options["Moe2017_options"][ - "samplerfuncs" - ]["logP"][2] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["logP"][0], - self.grid_options["Moe2017_options"]["ranges"]["logP"][1], - self.grid_options["Moe2017_options"]["resolutions"]["logP"][ - 2 - ], - ), - precode="""orbital_period_quadruple = 10.0**log10per3 -q3min={}/(M_3) -q3max=maximum_mass_ratio_for_RLOF(M_3, orbital_period_quadruple) - """.format( - self.grid_options["Moe2017_options"]["Mmin"] - ), - ) - - # Quadruple: mass ratio : M_outer / M_inner - self.add_grid_variable( - name="q3", - parameter_name="M_4", - longname="Mass ratio outer low/outer high", - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["q"][0] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "options['Mmin']/(M_3)", - self.grid_options["Moe2017_options"]["ranges"]["q"][1] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "q3max", - ], - probdist=1, - gridtype="centred", - dphasevol="dq3", - precode=""" -M_4 = q3 * M_3 -sep3 = calc_sep_from_period((M_3), M_4, orbital_period_quadruple) -eccentricity3=0 -""", - samplerfunc=self.grid_options["Moe2017_options"][ - "samplerfuncs" - ]["M"][3] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["q"][0] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "options['Mmin']/(M_3)", - self.grid_options["Moe2017_options"]["ranges"]["q"][1] - if self.grid_options["Moe2017_options"] - .get("ranges", {}) - .get("q", None) - else "q3max", - self.grid_options["Moe2017_options"]["resolutions"]["M"][2], - ), - ) - - # (optional) triples: eccentricity - if ( - self.grid_options["Moe2017_options"]["resolutions"]["ecc"][2] - > 0 - ): - self.add_grid_variable( - name="ecc3", - parameter_name="eccentricity3", - longname="Eccentricity of the triple+quadruple/outer binary", - probdist=1, - gridtype="centred", - dphasevol="decc3", - precode="eccentricity3=ecc3", - valuerange=[ - self.grid_options["Moe2017_options"]["ranges"]["ecc"][ - 0 - ], # Just fail if not defined. - self.grid_options["Moe2017_options"]["ranges"]["ecc"][ - 1 - ], - ], - samplerfunc=self.grid_options["Moe2017_options"][ - "samplerfuncs" - ]["ecc"][2] - or "const({}, {}, {})".format( - self.grid_options["Moe2017_options"]["ranges"]["ecc"][ - 0 - ], # Just fail if not defined. - self.grid_options["Moe2017_options"]["ranges"]["ecc"][ - 1 - ], - self.grid_options["Moe2017_options"]["resolutions"][ - "ecc" - ][2], - ), - ) - - # Now we are at the last part. - # Here we should combine all the information that we calculate and update the options - # dictionary. This will then be passed to the Moe_di_Stefano_2017_pdf to calculate - # the real probability. The trick we use is to strip the options_dict as a string - # and add some keys to it: - - updated_options = "{{{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}}}".format( - str(dict(self.grid_options["Moe2017_options"]))[1:-1], - '"multiplicity": multiplicity', - '"M_1": M_1', - '"M_2": M_2', - '"M_3": M_3', - '"M_4": M_4', - '"P": orbital_period', - '"P2": orbital_period_triple', - '"P3": orbital_period_quadruple', - '"ecc": eccentricity', - '"ecc2": eccentricity2', - '"ecc3": eccentricity3', - ) - - probdist_addition = "Moe_di_Stefano_2017_pdf({}, verbosity=self.grid_options['verbosity'])['total_probdens']".format( - updated_options - ) - - # and finally the probability calculator - self.grid_options["_grid_variables"][self._last_grid_variable()][ - "probdist" - ] = probdist_addition - - verbose_print( - "\tMoe_di_Stefano_2017: Added final call to the pdf function", - self.grid_options["verbosity"], - _MOE2017_VERBOSITY_LEVEL, - ) - - # Signal that the MOE2017 grid has been set - self.grid_options["_set_Moe2017_grid"] = True + self.grid_options["_probtot"] += prob - ################################################################################################ - def Moe_di_Stefano_2017(self, options=None): + def _increment_count(self): """ - Function to handle setting the user input settings, - set up the data and load that into interpolators and - then set the distribution functions - - Takes a dictionary as its only argument + Function to add to the total number of stars. For now not used """ + self.grid_options["_count"] += 1 - default_options = { - "apply settings": True, - "setup grid": True, - "load data": True, - "clean cache": False, - "clean load flag": False, - "clean all": False, - } - if not options: - options = {} - options = update_dicts(default_options, options) - - # clean cache? - if options["clean all"] or options["clean cache"]: - Moecache.clear() - - if options["clean all"] or options["clean load flag"]: - self.grid_options["_loaded_Moe2017_data"] = False - - # Set the user input - if options["apply settings"]: - self.set_moe_di_stefano_settings(options=options) - - # Load the data - if options["load data"]: - self._load_moe_di_stefano_data() - - # construct the grid here - if options["setup grid"]: - self._set_moe_di_stefano_distributions() - - def _clean_interpolators(self): + def was_killed(self): """ - Function to clean up the interpolators after a run - - We look in the Moecache global variable for items that are interpolators. - Should be called by the general cleanup function AND the thread cleanup function + Function to determine if the process was killed. Returns True if so, false otherwise. """ + killed = self.grid_options["_killed"] - interpolator_keys = [] - for key in Moecache.keys(): - if isinstance(Moecache[key], py_rinterpolate.Rinterpolate): - interpolator_keys.append(key) + if "_killed" in self.grid_ensemble_results.get("metadata", {}): + killed = killed or self.grid_ensemble_results["metadata"]["_killed"] - for key in interpolator_keys: - Moecache[key].destroy() - del Moecache[key] - gc.collect() + return killed - ##### Unsorted functions - def _calculate_multiplicity_fraction(self, system_dict): + def _check_binary_c_error(self, binary_c_output, system_dict): """ - Function to calculate multiplicity fraction - - Makes use of the self.bse_options['multiplicity'] value. If its not set, it will raise an error - - grid_options['multiplicity_fraction_function'] will be checked for the choice - - TODO: add option to put a manual binary fraction in here (solve via negative numbers being the functions) + Function to check whether binary_c throws an error and handle accordingly. """ - # Just return 1 if no option has been chosen - if self.grid_options["multiplicity_fraction_function"] in [0, "None"]: - verbose_print( - "_calculate_multiplicity_fraction: Chosen not to use any multiplicity fraction.", - self.grid_options["verbosity"], - 3, - ) - - return 1 - - # Raise an error if the multiplicity is not set - if not system_dict.get("multiplicity", None): - msg = "Multiplicity value has not been set. When using a specific multiplicity fraction function please set the multiplicity" - raise ValueError(msg) - - # Go over the chosen options - if self.grid_options["multiplicity_fraction_function"] in [1, "Arenou2010"]: - # Arenou 2010 will be used - verbose_print( - "_calculate_multiplicity_fraction: Using Arenou 2010 to calculate multiplicity fractions", - self.grid_options["verbosity"], - 3, - ) - - binary_fraction = Arenou2010_binary_fraction(system_dict["M_1"]) - multiplicity_fraction_dict = { - 1: 1 - binary_fraction, - 2: binary_fraction, - 3: 0, - 4: 0, - } - - elif self.grid_options["multiplicity_fraction_function"] in [2, "Raghavan2010"]: - # Raghavan 2010 will be used - verbose_print( - "_calculate_multiplicity_fraction: Using Raghavan (2010) to calculate multiplicity fractions", - self.grid_options["verbosity"], - 3, - ) - - binary_fraction = raghavan2010_binary_fraction(system_dict["M_1"]) - multiplicity_fraction_dict = { - 1: 1 - binary_fraction, - 2: binary_fraction, - 3: 0, - 4: 0, - } - - elif self.grid_options["multiplicity_fraction_function"] in [3, "Moe2017"]: - # We need to check several things now here: - - # First, are the options for the MOE2017 grid set? On start it is filled with the default settings - if not self.grid_options["Moe2017_options"]: - msg = "The MOE2017 options do not seem to be set properly. The value is {}".format( - self.grid_options["Moe2017_options"] - ) - raise ValueError(msg) - - # Second: is the Moecache filled. - if not Moecache: - verbose_print( - "_calculate_multiplicity_fraction: Moecache is empty. It needs to be filled with the data for the interpolators. Loading the data now", + if binary_c_output: + if (binary_c_output.splitlines()[0].startswith("SYSTEM_ERROR")) or ( + binary_c_output.splitlines()[-1].startswith("SYSTEM_ERROR") + ): + self.verbose_print( + "FAILING SYSTEM FOUND", self.grid_options["verbosity"], - 3, + 0, ) - # Load the data - self._load_moe_di_stefano_data() - - # record the prev value - prev_M1_value_ms = self.grid_options["Moe2017_options"].get("M_1", None) - - # Set value of M1 of the current system - self.grid_options["Moe2017_options"]["M_1"] = system_dict["M_1"] - - # Calculate the multiplicity fraction - multiplicity_fraction_list = Moe_di_Stefano_2017_multiplicity_fractions( - self.grid_options["Moe2017_options"], self.grid_options["verbosity"] - ) - - # Turn into dict - multiplicity_fraction_dict = { - el + 1: multiplicity_fraction_list[el] - for el in range(len(multiplicity_fraction_list)) - } - - # Set the prev value back - self.grid_options["Moe2017_options"]["M_1"] = prev_M1_value_ms - - # we don't know what to do next - else: - msg = "Chosen value for the multiplicity fraction function is not known." - raise ValueError(msg) - - # To make sure we normalize the dictionary - multiplicity_fraction_dict = normalize_dict( - multiplicity_fraction_dict, verbosity=self.grid_options["verbosity"] - ) - - verbose_print( - "Multiplicity: {} multiplicity_fraction: {}".format( - system_dict["multiplicity"], - multiplicity_fraction_dict[system_dict["multiplicity"]], - ), - self.grid_options["verbosity"], - 3, - ) - - return multiplicity_fraction_dict[system_dict["multiplicity"]] - - ###################### - # Status logging - - def vb1print(self, ID, now, system_number, system_dict): - """ - Verbosity-level 1 printing, to keep an eye on a grid. - Arguments: - ID: thread ID for debugging (int) - now: the time now as a UNIX-style epoch in seconds (float) - system_number: the system number - - TODO: add information about the number of cores. the TPR shows the dt/dn but i want to see the number per core too - """ - - # calculate estimated time of arrive (eta and eta_secs), time per run (tpr) - localtime = time.localtime(now) - - # calculate stats - n = self.shared_memory["n_saved_log_stats"].value - if n < 2: - # simple 1-system calculation: inaccurate - # but best for small n - dt = now - self.shared_memory["prev_log_time"][0] - dn = system_number - self.shared_memory["prev_log_system_number"][0] - else: - # average over n_saved_log_stats - dt = ( - self.shared_memory["prev_log_time"][0] - - self.shared_memory["prev_log_time"][n - 1] - ) - dn = ( - self.shared_memory["prev_log_system_number"][0] - - self.shared_memory["prev_log_system_number"][n - 1] - ) - - eta, units, tpr, eta_secs = trem( - dt, system_number, dn, self.grid_options["_total_starcount"] - ) - - # compensate for multithreading and modulo - tpr *= self.grid_options["num_cores"] * self.grid_options["modulo"] - - if eta_secs < secs_per_day: - fintime = time.localtime(now + eta_secs) - etf = "{hours:02d}:{minutes:02d}:{seconds:02d}".format( - hours=fintime.tm_hour, minutes=fintime.tm_min, seconds=fintime.tm_sec - ) - else: - d = int(eta_secs / secs_per_day) - if d == 1: - etf = "Tomorrow" - else: - etf = "In {} days".format(d) + # Keep track of the amount of failed systems and their error codes + self.grid_options["_failed_prob"] += system_dict.get("probability", 1) + self.grid_options["_failed_count"] += 1 + self.grid_options["_errors_found"] = True - # modulo information - if self.grid_options["modulo"] == 1: - modulo = "" # usual case - else: - modulo = "%" + str(self.grid_options["modulo"]) + # Try catching the error code and keep track of the unique ones. + try: + error_code = int( + binary_c_output.splitlines()[0] + .split("with error code")[-1] + .split(":")[0] + .strip() + ) - # add up memory use from each thread - total_mem_use = sum(self.shared_memory["memory_use_per_thread"]) + if ( + not error_code + in self.grid_options["_failed_systems_error_codes"] + ): + self.grid_options["_failed_systems_error_codes"].append( + error_code + ) + except ValueError: + self.verbose_print( + "Failed to extract the error-code", + self.grid_options["verbosity"], + 1, + ) - # make a string to describe the system e.g. M1, M2, etc. - system_string = "" + # Check if we have exceeded the number of errors + if ( + self.grid_options["_failed_count"] + > self.grid_options["failed_systems_threshold"] + ): + if not self.grid_options["_errors_exceeded"]: + self.verbose_print( + self._boxed( + "Process {} exceeded the maximum ({}) number of failing systems. Stopped logging them to files now".format( + self.process_ID, + self.grid_options["failed_systems_threshold"], + ) + ), + self.grid_options["verbosity"], + 1, + ) + self.grid_options["_errors_exceeded"] = True - # use the multiplicity if given - if "multiplicity" in system_dict: - nmult = int(system_dict["multiplicity"]) + # If not, write the failing systems to files unique to each process + else: + # Write arg lines to file + argstring = self._return_argline(system_dict) + with self.open( + os.path.join( + self.grid_options["tmp_dir"], + "failed_systems", + "process_{}.txt".format(self.process_ID), + ), + "a+", + encoding="utf-8", + ) as f: + f.write(argstring + "\n") + f.close() else: - nmult = 4 - - # masses - for i in range(nmult): - i1 = str(i + 1) - if "M_" + i1 in system_dict: - system_string += ( - "M{}=".format(i1) + format_number(system_dict["M_" + i1]) + " " - ) - - # separation and orbital period - if "separation" in system_dict: - system_string += "a=" + format_number(system_dict["separation"]) - if "orbital_period" in system_dict: - system_string += "P=" + format_number(system_dict["orbital_period"]) - - # do the print - verbose_print( - "{opening_colour}{system_number}/{total_starcount}{modulo} {pc_colour}{pc_complete:5.1f}% complete {time_colour}{hours:02d}:{minutes:02d}:{seconds:02d} {ETA_colour}ETA={ETA:7.1f}{units} tpr={tpr:2.2e} {ETF_colour}ETF={ETF} {mem_use_colour}mem:{mem_use:.1f}MB {system_string_colour}{system_string}{closing_colour}".format( - opening_colour=self.ANSI_colours["reset"] - + self.ANSI_colours["yellow on black"], - system_number=system_number, - total_starcount=self.grid_options["_total_starcount"], - modulo=modulo, - pc_colour=self.ANSI_colours["blue on black"], - pc_complete=(100.0 * system_number) - / (1.0 * self.grid_options["_total_starcount"]) - if self.grid_options["_total_starcount"] - else -1, - time_colour=self.ANSI_colours["green on black"], - hours=localtime.tm_hour, - minutes=localtime.tm_min, - seconds=localtime.tm_sec, - ETA_colour=self.ANSI_colours["red on black"], - ETA=eta, - units=units, - tpr=tpr, - ETF_colour=self.ANSI_colours["blue"], - ETF=etf, - mem_use_colour=self.ANSI_colours["magenta"], - mem_use=total_mem_use, - system_string_colour=self.ANSI_colours["yellow"], - system_string=system_string, - closing_colour=self.ANSI_colours["reset"], - ), - self.grid_options["verbosity"], - 1, - ) - - def vb2print(self, system_dict, cmdline_string): - print( - "Running this system now on thread {ID}\n{blue}{cmdline}{reset}\n".format( - ID=self.process_ID, - blue=self.ANSI_colours["blue"], - cmdline=cmdline_string, - reset=self.ANSI_colours["reset"], + self.verbose_print( + "binary_c output nothing - this is strange. If there is ensemble output being generated then this is fine.", + self.grid_options["verbosity"], + 3, ) - ) diff --git a/binarycpython/utils/grid_logging.py b/binarycpython/utils/grid_logging.py new file mode 100644 index 0000000000000000000000000000000000000000..2ee7a5b174b142141f92ca89e97f720d996f23d1 --- /dev/null +++ b/binarycpython/utils/grid_logging.py @@ -0,0 +1,461 @@ +""" +The class extension for the population object that contains logging functionality +""" + +# pylint: disable=E1101 + +import os +import sys +import time +import logging +import strip_ansi + +import binarycpython.utils.functions + +from binarycpython.utils.functions import ( + format_number, + trem, + remove_file, + verbose_print +) +from binarycpython.utils.grid_options_defaults import secs_per_day + +from binarycpython.utils.custom_logging_functions import ( + binary_c_log_code, + create_and_load_logging_function, + autogen_C_logging_code + +) + + +class grid_logging: + """ + The class extension for the population object that contains logging functionality + """ + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + def _set_custom_logging(self): + """ + Function/routine to set all the custom logging so that the function memory pointer + is known to the grid. + + When the memory adress is loaded and the library file is set we'll skip rebuilding the library + """ + + # Only if the values are the 'default' unset values + if ( + self.grid_options["custom_logging_func_memaddr"] == -1 + and self.grid_options["_custom_logging_shared_library_file"] is None + ): + self.verbose_print( + "Creating and loading custom logging functionality", + self.grid_options["verbosity"], + 1, + ) + # C_logging_code gets priority of C_autogen_code + if self.grid_options["C_logging_code"]: + # Generate entire shared lib code around logging lines + custom_logging_code = binary_c_log_code( + self.grid_options["C_logging_code"], + verbosity=self.grid_options["verbosity"] + - (self._CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), + ) + + # Load memory address + ( + self.grid_options["custom_logging_func_memaddr"], + self.grid_options["_custom_logging_shared_library_file"], + ) = create_and_load_logging_function( + custom_logging_code, + verbosity=self.grid_options["verbosity"] + - (self._CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), + custom_tmp_dir=self.grid_options["tmp_dir"], + ) + + elif self.grid_options["C_auto_logging"]: + # Generate real logging code + logging_line = autogen_C_logging_code( + self.grid_options["C_auto_logging"], + verbosity=self.grid_options["verbosity"] + - (self._CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), + ) + + # Generate entire shared lib code around logging lines + custom_logging_code = binary_c_log_code( + logging_line, + verbosity=self.grid_options["verbosity"] + - (self._CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), + ) + + # Load memory address + ( + self.grid_options["custom_logging_func_memaddr"], + self.grid_options["_custom_logging_shared_library_file"], + ) = create_and_load_logging_function( + custom_logging_code, + verbosity=self.grid_options["verbosity"] + - (self._CUSTOM_LOGGING_VERBOSITY_LEVEL - 1), + custom_tmp_dir=self.grid_options["tmp_dir"], + ) + else: + self.verbose_print( + "Custom logging library already loaded. Not setting them again.", + self.grid_options["verbosity"], + 1, + ) + + def _print_info(self, run_number, total_systems, full_system_dict): + """ + Function to print info about the current system and the progress of the grid. + + # color info tricks from https://ozzmaker.com/add-colour-to-text-in-python/ + https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python + """ + + # Define frequency + if self.grid_options["verbosity"] == 1: + print_freq = 1 + else: + print_freq = 10 + + if run_number % print_freq == 0: + binary_cmdline_string = self._return_argline(full_system_dict) + info_string = "{color_part_1} \ + {text_part_1}{end_part_1}{color_part_2} \ + {text_part_2}{end_part_2}".format( + color_part_1="\033[1;32;41m", + text_part_1="{}/{}".format(run_number, total_systems), + end_part_1="\033[0m", + color_part_2="\033[1;32;42m", + text_part_2="{}".format(binary_cmdline_string), + end_part_2="\033[0m", + ) + print(info_string) + + def _set_loggers(self): + """ + Function to set the loggers for the execution of the grid + """ + + # Set log file + binary_c_logfile = self.grid_options["log_file"] + + # Create directory + os.makedirs(os.path.dirname(binary_c_logfile), exist_ok=True) + + # Set up logger + self.logger = logging.getLogger("binary_c_python_logger") + self.logger.setLevel(self.grid_options["verbosity"]) + + # Reset handlers + self.logger.handlers = [] + + # Set formatting of output + log_formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + + # Make and add file handlers + # make handler for output to file + handler_file = logging.FileHandler(filename=os.path.join(binary_c_logfile)) + handler_file.setFormatter(log_formatter) + handler_file.setLevel(logging.INFO) + + # Make handler for output to stdout + handler_stdout = logging.StreamHandler(sys.stdout) + handler_stdout.setFormatter(log_formatter) + handler_stdout.setLevel(logging.INFO) + + # Add the loggers + self.logger.addHandler(handler_file) + self.logger.addHandler(handler_stdout) + + ###################### + # Status logging + + def vb1print(self, ID, now, system_number, system_dict): + """ + Verbosity-level 1 printing, to keep an eye on a grid. + + Input: + ID: thread ID for debugging (int): TODO fix this + now: the time now as a UNIX-style epoch in seconds (float) + system_number: the system number + """ + + # calculate estimated time of arrive (eta and eta_secs), time per run (tpr) + localtime = time.localtime(now) + + # calculate stats + n = self.shared_memory["n_saved_log_stats"].value + if n < 2: + # simple 1-system calculation: inaccurate + # but best for small n + dt = now - self.shared_memory["prev_log_time"][0] + dn = system_number - self.shared_memory["prev_log_system_number"][0] + else: + # average over n_saved_log_stats + dt = ( + self.shared_memory["prev_log_time"][0] + - self.shared_memory["prev_log_time"][n - 1] + ) + dn = ( + self.shared_memory["prev_log_system_number"][0] + - self.shared_memory["prev_log_system_number"][n - 1] + ) + + eta, units, tpr, eta_secs = trem( + dt, system_number, dn, self.grid_options["_total_starcount"] + ) + + # compensate for multithreading and modulo + tpr *= self.grid_options["num_processes"] * self.grid_options["modulo"] + + if eta_secs < secs_per_day: + fintime = time.localtime(now + eta_secs) + etf = "{hours:02d}:{minutes:02d}:{seconds:02d}".format( + hours=fintime.tm_hour, minutes=fintime.tm_min, seconds=fintime.tm_sec + ) + else: + d = int(eta_secs / secs_per_day) + if d == 1: + etf = "Tomorrow" + else: + etf = "In {} days".format(d) + + # modulo information + if self.grid_options["modulo"] == 1: + modulo = "" # usual case + else: + modulo = "%" + str(self.grid_options["modulo"]) + + # add up memory use from each thread + total_mem_use = sum(self.shared_memory["memory_use_per_thread"]) + + # make a string to describe the system e.g. M1, M2, etc. + system_string = "" + + # use the multiplicity if given + if "multiplicity" in system_dict: + nmult = int(system_dict["multiplicity"]) + else: + nmult = 4 + + # masses + for i in range(nmult): + i1 = str(i + 1) + if "M_" + i1 in system_dict: + system_string += ( + "M{}=".format(i1) + format_number(system_dict["M_" + i1]) + " " + ) + + # separation and orbital period + if "separation" in system_dict: + system_string += "a=" + format_number(system_dict["separation"]) + if "orbital_period" in system_dict: + system_string += "P=" + format_number(system_dict["orbital_period"]) + + # do the print + if self.grid_options["_total_starcount"] > 0: + self.verbose_print( + "{opening_colour}{system_number}/{total_starcount}{modulo} {pc_colour}{pc_complete:5.1f}% complete {time_colour}{hours:02d}:{minutes:02d}:{seconds:02d} {ETA_colour}ETA={ETA:7.1f}{units} tpr={tpr:2.2e} {ETF_colour}ETF={ETF} {mem_use_colour}mem:{mem_use:.1f}MB {system_string_colour}{system_string}{closing_colour}".format( + opening_colour=self.ANSI_colours["reset"] + + self.ANSI_colours["yellow on black"], + system_number=system_number, + total_starcount=self.grid_options["_total_starcount"], + modulo=modulo, + pc_colour=self.ANSI_colours["blue on black"], + pc_complete=(100.0 * system_number) + / (1.0 * self.grid_options["_total_starcount"]) + if self.grid_options["_total_starcount"] + else -1, + time_colour=self.ANSI_colours["green on black"], + hours=localtime.tm_hour, + minutes=localtime.tm_min, + seconds=localtime.tm_sec, + ETA_colour=self.ANSI_colours["red on black"], + ETA=eta, + units=units, + tpr=tpr, + ETF_colour=self.ANSI_colours["blue"], + ETF=etf, + mem_use_colour=self.ANSI_colours["magenta"], + mem_use=total_mem_use, + system_string_colour=self.ANSI_colours["yellow"], + system_string=system_string, + closing_colour=self.ANSI_colours["reset"], + ), + self.grid_options["verbosity"], + 1, + ) + else: + self.verbose_print( + "{opening_colour}{system_number}{modulo} {time_colour}{hours:02d}:{minutes:02d}:{seconds:02d} tpr={tpr:2.2e} {mem_use_colour}mem:{mem_use:.1f}MB {system_string_colour}{system_string}{closing_colour}".format( + opening_colour=self.ANSI_colours["reset"] + + self.ANSI_colours["yellow on black"], + system_number=system_number, + modulo=modulo, + time_colour=self.ANSI_colours["green on black"], + hours=localtime.tm_hour, + minutes=localtime.tm_min, + seconds=localtime.tm_sec, + tpr=tpr, + mem_use_colour=self.ANSI_colours["magenta"], + mem_use=total_mem_use, + system_string_colour=self.ANSI_colours["yellow"], + system_string=system_string, + closing_colour=self.ANSI_colours["reset"], + ), + self.grid_options["verbosity"], + 1, + ) + + def vb2print(self, system_dict, cmdline_string): + """ + Extra function for verbose printing + """ + + print( + "Running this system now on thread {ID}\n{blue}{cmdline}{reset}:\n\t{system_dict}\n".format( + ID=self.process_ID, + blue=self.ANSI_colours["blue"], + cmdline=cmdline_string, + reset=self.ANSI_colours["reset"], + system_dict=system_dict + ) + ) + + def verbose_print(self, *args, **kwargs): + """ + Wrapper method for the verbose print that calls the verbose print with the correct newline + + TODO: consider merging the two + """ + + # wrapper for functions.verbose_print to use the correct newline + newline = kwargs.get("newline", self.grid_options["log_newline"]) + if newline is None: + newline = "\n" + kwargs["newline"] = newline + + # Pass the rest to the original verbose print + verbose_print(*args, **kwargs) + + def _boxed(self, *stringlist, colour="yellow on black", boxchar="*", separator="\n"): + """ + Function to output a list of strings in a single box. + + Args: + list = a list of strings to be output. If these contain the separator + (see below) these strings are split by it. + separator = strings are split on this, default "\n" + colour = the colour to be used, usually this is 'yellow on black' + as set in the ANSI_colours dict + boxchar = the character used to make the box, '*' by default + + Note: handles tabs (\t) badly, do not use them! + """ + strlen = 0 + strings = [] + lengths = [] + + # make a list of strings + if separator: + for l in stringlist: + strings += l.split(sep=separator) + else: + strings = stringlist + + # get lengths without ANSI codes + for string in strings: + lengths.append(len(strip_ansi.strip_ansi(string))) + + # hence the max length + strlen = max(lengths) + strlen += strlen % 2 + header = boxchar * (4 + strlen) + + # start output + out = self.ANSI_colours[colour] + header + "\n" + + # loop over strings to output, padding as required + for n, string in enumerate(strings): + if lengths[n] % 2 == 1: + string = " " + string + pad = " " * int((strlen - lengths[n]) / 2) + out = out + boxchar + " " + pad + string + pad + " " + boxchar + "\n" + + # close output and return + out = out + header + "\n" + self.ANSI_colours["reset"] + return out + + def _get_stream_logger(self, level=logging.DEBUG): + """ + Function to set up the streamlogger + """ + + # Format + fmt = "[%(asctime)s %(levelname)-8s %(processName)s] --- %(message)s" + formatter = logging.Formatter(fmt) + + # Streamhandle + sh = logging.StreamHandler(stream=sys.stdout) + sh.setLevel(level) + sh.setFormatter(formatter) + + # Logger itself + stream_logger = logging.getLogger("stream_logger") + stream_logger.handlers = [] + stream_logger.setLevel(level) + stream_logger.addHandler(sh) + + return stream_logger + + def _clean_up_custom_logging(self, evol_type): + """ + Function to clean up the custom logging. + Has two types: + 'single': + - removes the compiled shared library + (which name is stored in grid_options['_custom_logging_shared_library_file']) + - TODO: unloads/frees the memory allocated to that shared library + (which is stored in grid_options['custom_logging_func_memaddr']) + - sets both to None + 'multiple': + - TODO: make this and design this + """ + + if evol_type == "single": + self.verbose_print( + "Cleaning up the custom logging stuff. type: single", + self.grid_options["verbosity"], + 1, + ) + + # TODO: Explicitly unload the library + + # Reset the memory adress location + self.grid_options["custom_logging_func_memaddr"] = -1 + + # remove shared library files + if self.grid_options["_custom_logging_shared_library_file"]: + remove_file( + self.grid_options["_custom_logging_shared_library_file"], + self.grid_options["verbosity"], + ) + self.grid_options["_custom_logging_shared_library_file"] = None + + if evol_type == "population": + self.verbose_print( + "Cleaning up the custom logging stuffs. type: population", + self.grid_options["verbosity"], + 1, + ) + + # TODO: make sure that these also work. not fully sure if necessary tho. + # whether its a single file, or a dict of files/mem addresses + + if evol_type == "MC": + pass diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py index 68db0f4b9c677a7758f1f4cf2d2ef55a54568868..dfc1fe43c9ae95601ef30645c5bd5c92ab0d4bef 100644 --- a/binarycpython/utils/grid_options_defaults.py +++ b/binarycpython/utils/grid_options_defaults.py @@ -11,840 +11,607 @@ There are several other functions in this module, mostly to generate help texts With this its also possible to automatically generate a document containing all the setting names + descriptions. All the options starting with _ should not be changed by the user except when you really know what you're doing (which is probably hacking the code :P) + +TODO: reconsider having this all as class methods. It seems unnecessary to have all these functions as class methods. """ +# pylint: disable=E1101 + import os +import shutil +import sys from binarycpython.utils.custom_logging_functions import temp_dir -from binarycpython.utils.functions import return_binary_c_version_info -_LOGGER_VERBOSITY_LEVEL = 1 -_CUSTOM_LOGGING_VERBOSITY_LEVEL = 2 _MOE2017_VERBOSITY_LEVEL = 5 _MOE2017_VERBOSITY_INTERPOLATOR_LEVEL = 6 _MOE2017_VERBOSITY_INTERPOLATOR_EXTRA_LEVEL = 7 -# Options dict -grid_options_defaults_dict = { - ########################## - # general (or unordered..) - ########################## - "num_cores": 1, # total number of cores used to evolve the population - "parse_function": None, # Function to parse the output with. - "multiplicity_fraction_function": 0, # Which multiplicity fraction function to use. 0: None, 1: Arenou 2010, 2: Rhagavan 2010, 3: Moe and di Stefano 2017 - "tmp_dir": temp_dir(), # Setting the temp dir of the program - "_main_pid": -1, # Placeholder for the main process id of the run. - "save_ensemble_chunks": True, # Force the ensemble chunk to be saved even if we are joining a thread (just in case the joining fails) - "combine_ensemble_with_thread_joining": True, # Flag on whether to combine everything and return it to the user or if false: write it to data_dir/ensemble_output_{population_id}_{thread_id}.json - "_commandline_input": "", - "log_runtime_systems": 0, # whether to log the runtime of the systems (1 file per thread. stored in the tmp_dir) - "_actually_evolve_system": True, # Whether to actually evolve the systems of just act as if. for testing. used in _process_run_population_grid - "max_queue_size": 1000, # Maximum size of the system call queue. - "run_zero_probability_system": True, # Whether to run the zero probability systems - "_zero_prob_stars_skipped": 0, - "ensemble_factor_in_probability_weighted_mass": False, # Whether to multiply the ensemble results by 1/probability_weighted_mass - "do_dry_run": True, # Whether to do a dry run to calculate the total probability for this run - "custom_generator": None, # Place for the custom system generator - "exit_after_dry_run": False, # Exit after dry run? - ########################## - # Execution log: - ########################## - "verbosity": 0, # Level of verbosity of the simulation - "log_file": os.path.join( - temp_dir(), "binary_c_python.log" - ), # Set to None to not log to file. The directory will be created - "log_dt": 5, # time between vb=1 logging outputs - "n_logging_stats": 50, # number of logging stats used to calculate time remaining (etc.) default = 50 - ########################## - # binary_c files - ########################## - "_binary_c_executable": os.path.join( - os.environ["BINARY_C"], "binary_c" - ), # TODO: make this more robust - "_binary_c_shared_library": os.path.join( - os.environ["BINARY_C"], "src", "libbinary_c.so" - ), # TODO: make this more robust - "_binary_c_config_executable": os.path.join( - os.environ["BINARY_C"], "binary_c-config" - ), # TODO: make this more robust - "_binary_c_dir": os.environ["BINARY_C"], - ########################## - # Moe and di Stefano (2017) internal settings - ########################## - "_loaded_Moe2017_data": False, # Holds flag whether the Moe and di Stefano (2017) data is loaded into memory - "_set_Moe2017_grid": False, # Whether the Moe and di Stefano (2017) grid has been loaded - "Moe2017_options": None, # Holds the Moe and di Stefano (2017) options. - "_Moe2017_JSON_data": None, # Stores the data - ########################## - # Custom logging - ########################## - "C_auto_logging": None, # Should contain a dictionary where the keys are they headers - # and the values are lists of parameters that should be logged. - # This will get parsed by autogen_C_logging_code in custom_logging_functions.py - "C_logging_code": None, # Should contain a string which holds the logging code. - "custom_logging_func_memaddr": -1, # Contains the custom_logging functions memory address - "_custom_logging_shared_library_file": None, # file containing the .so file - ########################## - # Store pre-loading: - ########################## - "_store_memaddr": -1, # Contains the store object memory address, useful for pre loading. - # defaults to -1 and isn't used if that's the default then. - ########################## - # Log args: logging of arguments - ########################## - "log_args": 0, # unused - "log_args_dir": "/tmp/", # unused - ########################## - # Population evolution - ########################## - ## General - "evolution_type": "grid", # Flag for type of population evolution - "_evolution_type_options": [ - "grid", - "custom_generator", - ], # available choices for type of population evolution. # TODO: fill later with Monte Carlo, source file - "_system_generator": None, # value that holds the function that generates the system - # (result of building the grid script) - "source_file_filename": None, # filename for the source - "_count": 0, # count of systems - "_total_starcount": 0, # Total count of systems in this generator - "_probtot": 0, # total probability - "weight": 1.0, # weighting for the probability - "repeat": 1, # number of times to repeat each system (probability is adjusted to be 1/repeat) - "_start_time_evolution": 0, # Start time of the grid - "_end_time_evolution": 0, # end time of the grid - "_errors_found": False, # Flag whether there are any errors from binary_c - "_errors_exceeded": False, # Flag whether the number of errors have exceeded the limit - "_failed_count": 0, # number of failed systems - "_failed_prob": 0, # Summed probability of failed systems - "failed_systems_threshold": 20, # Maximum failed systems per process allowed to fail before the process stops logging the failing systems. - "_failed_systems_error_codes": [], # List to store the unique error codes - "_population_id": 0, # Random id of this grid/population run, Unique code for the population. Should be set only once by the controller process. - "_total_mass_run": 0, # To count the total mass that thread/process has ran - "_total_probability_weighted_mass_run": 0, # To count the total mass * probability for each system that thread/process has ran - "modulo": 1, # run modulo n of the grid. - "start_at": 0, # start at the first model - ## Grid type evolution - "_grid_variables": {}, # grid variables - "gridcode_filename": None, # filename of gridcode - "symlink latest gridcode": True, # symlink to latest gridcode - ## Monte carlo type evolution - # TODO: make MC options - ## Evolution from source file - # TODO: make run from sourcefile options. - ## Other no yet implemented parts for the population evolution part - # # start at this model number: handy during debugging - # # to skip large parts of the grid - # start_at => 0 - # global_error_string => undef, - # monitor_files => [], - # nextlogtime => 0, - # nthreads => 1, # number of threads - # # start at model offset (0-based, so first model is zero) - # offset => 0, - # resolution=>{ - # shift =>0, - # previous=>0, - # n =>{} # per-variable resolution - # }, - # thread_q => undef, - # threads => undef, # array of threads objects - # tstart => [gettimeofday], # flexigrid start time - # __nvar => 0, # number of grid variables - # _varstub => undef, - # _lock => undef, - # _evcode_pids => [], - # }; - ######################################## - # Slurm stuff - ######################################## - "slurm": 0, # dont use the slurm by default. 1 = use slurm - # "slurm_ntasks": 1, # CPUs required per array job: usually only need this - # "slurm_command": "", # Command that slurm runs (e.g. evolve or join_datafiles) - # "slurm_dir": "", # working directory containing scripts output logs etc. - # "slurm_njobs": 0, # number of scripts; set to 0 as default - # "slurm_jobid": "", # slurm job id (%A) - # "slurm_memory": 512, # in MB, the memory use of the job - # "slurm_warn_max_memory": 1024, # in MB : warn if mem req. > this - # "slurm_use_all_node_CPUs": 0, # 1 = use all of a node's CPUs. 0 = use a given number of CPUs - # "slurm_postpone_join": 0, # if 1 do not join on slurm, join elsewhere. want to do it off the slurm grid (e.g. with more RAM) - # "slurm_jobarrayindex": "", # slurm job array index (%a) - # "slurm_jobname": "binary_grid", # default - # "slurm_partition": None, - # "slurm_time": 0, # total time. 0 = infinite time - # "slurm_postpone_sbatch": 0, # if 1: don't submit, just make the script - # "slurm_array": None, # override for --array, useful for rerunning jobs - # "slurm_use_all_node_CPUs": 0, # if given nodes, set to 1 - # # if given CPUs, set to 0 - # # you will want to use this if your Slurm SelectType is e.g. linear - # # which means it allocates all the CPUs in a node to the job - # "slurm_control_CPUs": 0, # if so, leave this many for Pythons control (0) - # "slurm_array": None, # override for --array, useful for rerunning jobs - # "slurm_partition": None, # MUST be defined - # "slurm_extra_settings": {}, # Place to put extra configuration for the SLURM batch file. The key and value of the dict will become the key and value of the line in te slurm batch file. Will be put in after all the other settings (and before the command). Take care not to overwrite something without really meaning to do so. - ######################################## - # Condor stuff - ######################################## - "condor": 0, # 1 to use condor, 0 otherwise - # "condor_command": "", # condor command e.g. "evolve", "join" - # "condor_dir": "", # working directory containing e.g. scripts, output, logs (e.g. should be NFS available to all) - # "condor_njobs": "", # number of scripts/jobs that CONDOR will run in total - # "condor_jobid": "", # condor job id - # "condor_postpone_join": 0, # if 1, data is not joined, e.g. if you want to do it off the condor grid (e.g. with more RAM) - # # "condor_join_machine": None, # if defined then this is the machine on which the join command should be launched (must be sshable and not postponed) - # "condor_join_pwd": "", # directory the join should be in (defaults to $ENV{PWD} if undef) - # "condor_memory": 1024, # in MB, the memory use (ImageSize) of the job - # "condor_universe": "vanilla", # usually vanilla universe - # "condor_extra_settings": {}, # Place to put extra configuration for the CONDOR submit file. The key and value of the dict will become the key and value of the line in te slurm batch file. Will be put in after all the other settings (and before the command). Take care not to overwrite something without really meaning to do so. - # snapshots and checkpoints - # condor_snapshot_on_kill=>0, # if 1 snapshot on SIGKILL before exit - # condor_load_from_snapshot=>0, # if 1 check for snapshot .sv file and load it if found - # condor_checkpoint_interval=>0, # checkpoint interval (seconds) - # condor_checkpoint_stamp_times=>0, # if 1 then files are given timestamped names - # (warning: lots of files!), otherwise just store the lates - # condor_streams=>0, # stream stderr/stdout by default (warning: might cause heavy network load) - # condor_save_joined_file=>0, # if 1 then results/joined contains the results - # (useful for debugging, otherwise a lot of work) - # condor_requirements=>'', # used? - # # resubmit options : if the status of a condor script is - # # either 'finished','submitted','running' or 'crashed', - # # decide whether to resubmit it. - # # NB Normally the status is empty, e.g. on the first run. - # # These are for restarting runs. - # condor_resubmit_finished=>0, - # condor_resubmit_submitted=>0, - # condor_resubmit_running=>0, - # condor_resubmit_crashed=>0, - ########################## - # Unordered. Need to go through this. Copied from the perl implementation. - ########################## - ## - # return_array_refs=>1, # quicker data parsing mode - # sort_args=>1, - # save_args=>1, - # nice=>'nice -n +20', # nice command e.g. 'nice -n +10' or '' - # timeout=>15, # seconds until timeout - # log_filename=>"/scratch/davidh/results_simulations/tmp/log.txt", - # # current_log_filename=>"/scratch/davidh/results_simulations/tmp/grid_errors.log", - ############################################################ - # Set default grid properties (in %self->{_grid_options}} - # and %{$self->{_bse_options}}) - # This is the first thing that should be called by the user! - ############################################################ - # # set signal handlers for timeout - # $self->set_class_signal_handlers(); - # # set operating system - # my $os = rob_misc::operating_system(); - # %{$self->{_grid_options}}=( - # # save operating system - # operating_system=>$os, - # # process name - # process_name => 'binary_grid'.$VERSION, - # grid_defaults_set=>1, # so we know the grid_defaults function has been called - # # grid suspend files: assume binary_c by default - # suspend_files=>[$tmp.'/force_binary_c_suspend', - # './force_binary_c_suspend'], - # snapshot_file=>$tmp.'/binary_c-snapshot', - # ######################################## - # # infomration about the running grid script - # ######################################## - # working_directory=>cwd(), # the starting directory - # perlscript=>$0, # the name of the perlscript - # perlscript_arguments=>join(' ',@ARGV), # arguments as a string - # perl_executable=>$^X, # the perl executable - # command_line=>join(' ',$0,@ARGV), # full command line - # process_ID=>$$, # process ID of the main perl script - # ######################################## - # # GRID - # ######################################## - # # if undef, generate gridcode, otherwise load the gridcode - # # from this file. useful for debugging - # gridcode_from_file => undef, - # # assume binary_grid perl backend by default - # backend => - # $self->{_grid_options}->{backend} // - # $binary_grid2::backend // - # 'binary_grid::Perl', - # # custom C function for output : this automatically - # # binds if a function is available. - # C_logging_code => undef, - # C_auto_logging => undef, - # custom_output_C_function_pointer => binary_c_function_bind(), - # # control flow - # rungrid=>1, # usually run the grid, but can be 0 - # # to skip it (e.g. for condor/slurm runs) - # merge_datafiles=>'', - # merge_datafiles_filelist=>'', - # # parameter space options - # binary=>0, # set to 0 for single stars, 1 for binaries - # # if use_full_resolution is 1, then run a dummy grid to - # # calculate the resolution. this could be slow... - # use_full_resolution => 1, - # # the probability in any distribution must be within - # # this tolerance of 1.0, ignored if undef (if you want - # # to run *part* of the parameter space then this *must* be undef) - # probability_tolerance=>undef, - # # how to deal with a failure of the probability tolerance: - # # 0 = nothing - # # 1 = warning - # # 2 = stop - # probability_tolerance_failmode=>1, - # # add up and log system error count and probability - # add_up_system_errors=>1, - # log_system_errors=>1, - # # codes, paths, executables etc. - # # assume binary_c by default, and set its defaults - # code=>'binary_c', - # arg_prefix=>'--', - # prog=>'binary_c', # executable - # nice=>'nice -n +0', # nice command - # ionice=>'', - # # compress output? - # binary_c_compression=>0, - # # get output as array of pre-split array refs - # return_array_refs=>1, - # # environment - # shell_environment=>undef, - # libpath=>undef, # for backwards compatibility - # # where is binary_c? need this to get the values of some counters - # rootpath=>$self->okdir($ENV{BINARY_C_ROOTPATH}) // - # $self->okdir($ENV{HOME}.'/progs/stars/binary_c') // - # '.' , # last option is a fallback ... will fail if it doesn't exist - # srcpath=>$self->okdir($ENV{BINARY_C_SRCPATH}) // - # $self->okdir($ENV{BINARY_C_ROOTPATH}.'/src') // - # $self->okdir($ENV{HOME}.'/progs/stars/binary_c/src') // - # './src' , # last option is fallback... will fail if it doesn't exist - # # stack size per thread in megabytes - # threads_stack_size=>50, - # # thread sleep time between starting the evolution code and starting - # # the grid - # thread_presleep=>0, - # # threads - # # Max time a thread can sit looping (with calls to tbse_line) - # # before a warning is issued : NB this does not catch real freezes, - # # just infinite loops (which still output) - # thread_max_freeze_time_before_warning=>10, - # # run all models by default: modulo=1, offset=0 - # modulo=>1, - # offset=>0, - # # max number of stars on the queue - # maxq_per_thread => 100, - # # data dump file : undef by default (do nothing) - # results_hash_dumpfile => '', - # # compress files with bzip2 by default - # compress_results_hash => 1, - # ######################################## - # # CPU - # ######################################## - # cpu_cap=>0, # if 1, limits to one CPU - # cpu_affinity => 0, # do not bind to a CPU by default - # ######################################## - # # Code, Timeouts, Signals - # ######################################## - # binary_grid_code_filtering=>1, # you want this, it's (MUCH!) faster - # pre_filter_file=>undef, # dump pre filtered code to this file - # post_filter_file=>undef, # dump post filtered code to this file - # timeout=>30, # timeout in seconds - # timeout_vb=>0, # no timeout logging - # tvb=>0, # no thread logging - # nfs_sleep=>1, # time to wait for NFS to catch up with file accesses - # # flexigrid checks the timeouts every - # # flexigrid_timeout_check_interval seconds - # flexigrid_timeout_check_interval=>0.01, - # # this is set to 1 when the grid is finished - # flexigrid_finished=>0, - # # allow signals by default - # 'no signals'=>0, - # # but perhaps disable specific signals? - # 'disable signal'=>{INT=>0,ALRM=>0,CONT=>0,USR1=>0,STOP=>0}, - # # dummy variables - # single_star_period=>1e50, # orbital period of a single star - # #### timers : set timers to 0 (or empty list) to ignore, - # #### NB these must be given context (e.g. main::xyz) - # #### for functions not in binary_grid - # timers=>0, - # timer_subroutines=>[ - # # this is a suggested default list - # 'flexigrid', - # 'set_next_alarm', - # 'vbout', - # 'vbout_fast', - # 'run_flexigrid_thread', - # 'thread_vb' - # ], - # ######################################## - # # INPUT/OUTPUT - # ######################################## - # blocking=>undef, # not yet set - # # prepend command with stdbuf to stop buffering (if available) - # stdbuf_command=>`stdbuf --version`=~/stdbuf \(GNU/ ? ' stdbuf -i0 -o0 -e0 ' : undef, - # vb=>("@ARGV"=~/\Wvb=(\d+)\W/)[0] // 0, # set to 1 (or more) for verbose output to the screen - # log_dt_secs=>1, # log output to stdout~every log_dt_secs seconds - # nmod=>10, # every nmod models there is output to the screen, - # # if log_dt_secs has been exceeded also (ignored if 0) - # colour=>1, # set to 1 to use the ANSIColor module for colour output - # log_args=>0, # do not log args in files - # log_fins=>0, # log end of runs too - # sort_args=>0, # do not sort args - # save_args=>0, # do not save args in a string - # log_args_dir=>$tmp, # where to output the args files - # always_reopen_arg_files=>0, # if 1 then arg files are always closed and reopened - # (may cause a lot of disk I/O) - # lazy_arg_sending=>1, # if 1, the previous args are remembered and - # # only args that changed are sent (except M1, M2 etc. which always - # # need sending) - # # force output files to open on a local disk (not an NFS partion) - # # not sure how to do this on another OS - # force_local_hdd_use=>($os eq 'unix'), - # # for verbose output, define the newline - # # For terminals use "\x0d", for files use "\n", in the - # # case of multiple threads this will be set to \n - # newline=> "\x0d", - # # use reset_stars_defaults - # reset_stars_defaults=>1, - # # set signal captures: argument determines behaviour when the code locks up - # # 0: exit - # # 1: reset and try the next star (does this work?!) - # alarm_procedure=>1, - # # exit on eval failure? - # exit_on_eval_failure=>1, - # ## functions: these should be set by perl lexical name - # ## (they are automatically converted to function pointers - # ## at runtime) - # # function to be called just before a thread is created - # thread_precreate_function=>undef, - # thread_precreate_function_pointer=>undef, - # # function to be called just after a thread is created - # # (from inside the thread just before *grid () call) - # threads_entry_function=>undef, - # threads_entry_function_pointer=>undef, - # # function to be called just after a thread is finished - # # (from inside the thread just after *grid () call) - # threads_flush_function=>undef, - # threads_flush_function_pointer=>undef, - # # function to be called just after a thread is created - # # (but external to the thread) - # thread_postrun_function=>undef, - # thread_postrun_function_pointer=>undef, - # # function to be called just before a thread join - # # (external to the thread) - # thread_prejoin_function=>undef, - # thread_prejoin_function_pointer=>undef, - # # default to using the internal join_flexigrid_thread function - # threads_join_function=>'binary_grid2::join_flexigrid_thread', - # threads_join_function_pointer=>sub{return $self->join_flexigrid_thread(@_)}, - # # function to be called just after a thread join - # # (external to the thread) - # thread_postjoin_function=>undef, - # thread_postjoin_function_pointer=>undef, - # # usually, parse_bse in the main script is called - # parse_bse_function=>'main::parse_bse', - # parse_bse_function_pointer=>undef, - # # if starting_snapshot_file is defined, load initial - # # values for the grid from the snapshot file rather - # # than a normal initiation: this enables you to - # # stop and start a grid - # starting_snapshot_file=>undef, -} - -# Grid containing the descriptions of the options # TODO: add input types for all of them -grid_options_descriptions = { - "tmp_dir": "Directory where certain types of output are stored. The grid code is stored in that directory, as well as the custom logging libraries. Log files and other diagnostics will usually be written to this location, unless specified otherwise", # TODO: improve this - "_binary_c_dir": "Director where binary_c is stored. This options are not really used", - "_binary_c_config_executable": "Full path of the binary_c-config executable. This options is not used in the population object.", - "_binary_c_executable": "Full path to the binary_c executable. This options is not used in the population object.", - "_binary_c_shared_library": "Full path to the libbinary_c file. This options is not used in the population object", - "verbosity": "Verbosity of the population code. Default is 0, by which only errors will be printed. Higher values will show more output, which is good for debugging.", - "binary": "Set this to 1 if the population contains binaries. Input: int", # TODO: write what effect this has. - "num_cores": "The number of cores that the population grid will use. The multiprocessing is useful but make sure to figure out how many logical cores the machine has (use e.g. psutil.cpu_count(logical=False) to find the true number of cores, psutil.cpu_count(logical=True) to find the number of logical cores). The core is multi processed, not multi threaded, and will gain no extra speed when num_cores exceeds the number of logical cores. Input: int", - "_start_time_evolution": "Variable storing the start timestamp of the population evolution. Set by the object itself.", # TODO: make sure this is logged to a file - "_end_time_evolution": "Variable storing the end timestamp of the population evolution. Set by the object itself", # TODO: make sure this is logged to a file - "_total_starcount": "Variable storing the total number of systems in the generator. Used and set by the population object.", - "_custom_logging_shared_library_file": "filename for the custom_logging shared library. Used and set by the population object", - "_errors_found": "Variable storing a Boolean flag whether errors by binary_c are encountered.", - "_errors_exceeded": "Variable storing a Boolean flag whether the number of errors was higher than the set threshold (failed_systems_threshold). If True, then the command line arguments of the failing systems will not be stored in the failed_system_log files.", - "source_file_filename": "Variable containing the source file containing lines of binary_c command line calls. These all have to start with binary_c.", # TODO: Expand - "C_auto_logging": "Dictionary containing parameters to be logged by binary_c. The structure of this dictionary is as follows: the key is used as the headline which the user can then catch. The value at that key is a list of binary_c system parameters (like star[0].mass)", - "C_logging_code": "Variable to store the exact code that is used for the custom_logging. In this way the user can do more complex logging, as well as putting these logging strings in files.", - "_failed_count": "Variable storing the number of failed systems.", - "_evolution_type_options": "List containing the evolution type options.", - "_failed_prob": "Variable storing the total probability of all the failed systems", - "_failed_systems_error_codes": "List storing the unique error codes raised by binary_c of the failed systems", - "_grid_variables": "Dictionary storing the grid_variables. These contain properties which are accessed by the _generate_grid_code function", - "_population_id": "Variable storing a unique 32-char hex string.", - "_commandline_input": "String containing the arguments passed to the population object via the command line. Set and used by the population object.", - "_system_generator": "Function object that contains the system generator function. This can be from a grid, or a source file, or a Monte Carlo grid.", - "gridcode_filename": "Filename for the grid code. Set and used by the population object. TODO: allow the user to provide their own function, rather than only a generated function.", - "log_args": "Boolean to log the arguments of the current running system. ", - "log_args_dir": "Directory to log the arguments of the current running system to.", - "log_file": "Log file for the population object. Unused", # TODO: fix the functionality for this and describe it properly - "custom_logging_func_memaddr": "Memory address where the custom_logging_function is stored. Input: int", - "_count": "Counter tracking which system the generator is on.", - "_probtot": "Total probability of the population.", # TODO: check whether this is used properly throughout - "_main_pid": "Main process ID of the master process. Used and set by the population object.", - "_store_memaddr": "Memory address of the store object for binary_c.", - "failed_systems_threshold": "Variable storing the maximum number of systems that are allowed to fail before logging their command line arguments to failed_systems log files", - "parse_function": "Function that the user can provide to handle the output the binary_c. This function has to take the arguments (self, output). Its best not to return anything in this function, and just store stuff in the self.grid_results dictionary, or just output results to a file", - "condor": "Int flag whether to use a condor type population evolution. Not implemented yet.", # TODO: describe this in more detail - "slurm": "Int flag whether to use a Slurm type population evolution.", # TODO: describe this in more detail - "weight": "Weight factor for each system. The calculated probability is multiplied by this. If the user wants each system to be repeated several times, then this variable should not be changed, rather change the _repeat variable instead, as that handles the reduction in probability per system. This is useful for systems that have a process with some random element in it.", # TODO: add more info here, regarding the evolution splitting. - "repeat": "Factor of how many times a system should be repeated. Consider the evolution splitting binary_c argument for supernovae kick repeating.", # TODO: make sure this is used. - "evolution_type": "Variable containing the type of evolution used of the grid. Multiprocessing or linear processing", - "combine_ensemble_with_thread_joining": "Boolean flag on whether to combine everything and return it to the user or if false: write it to data_dir/ensemble_output_{population_id}_{thread_id}.json", - "log_runtime_systems": "Whether to log the runtime of the systems . Each systems run by the thread is logged to a file and is stored in the tmp_dir. (1 file per thread). Don't use this if you are planning to run a lot of systems. This is mostly for debugging and finding systems that take long to run. Integer, default = 0. if value is 1 then the systems are logged", - "_total_mass_run": "To count the total mass that thread/process has ran", - "_total_probability_weighted_mass_run": "To count the total mass * probability for each system that thread/process has ran", - "_actually_evolve_system": "Whether to actually evolve the systems of just act as if. for testing. used in _process_run_population_grid", - "max_queue_size": "Maximum size of the queue that is used to feed the processes. Don't make this too big! Default: 1000. Input: int", - "_set_Moe2017_grid": "Internal flag whether the Moe and di Stefano (2017) grid has been loaded", - "run_zero_probability_system": "Whether to run the zero probability systems. Default: True. Input: Boolean", - "_zero_prob_stars_skipped": "Internal counter to track how many systems are skipped because they have 0 probability", - "ensemble_factor_in_probability_weighted_mass": "Flag to multiply all the ensemble results with 1/probability_weighted_mass", - "multiplicity_fraction_function": "Which multiplicity fraction function to use. 0: None, 1: Arenou 2010, 2: Rhagavan 2010, 3: Moe and di Stefano (2017) 2017", - "m&s_options": "Internal variable that holds the Moe and di Stefano (2017) options. Don't write to this your self", - "_loaded_Moe2017_data": "Internal variable storing whether the Moe and di Stefano (2017) data has been loaded into memory", - "do_dry_run": "Whether to do a dry run to calculate the total probability for this run", - "_Moe2017_JSON_data": "Location to store the loaded Moe&diStefano2017 dataset", # Stores the data -} - -### -# - -MIN_MASS_BINARY_C = float( - return_binary_c_version_info(parsed=True)["macros"]["BINARY_C_MINIMUM_STELLAR_MASS"] -) - - -# Default options for the Moe & di Stefano grid -moe_di_stefano_default_options = { - # place holder for the JSON data to be used if a file - # isn't specified - "JSON": None, - # resolution data - "resolutions": { - "M": [ - 20, # M1 - 20, # M2 (i.e. q) - 0, # M3 currently unused - 0, # M4 currently unused - ], - "logP": [ - 20, # P2 (binary period) - 0, # P3 (triple period) currently unused - 0, # P4 (quadruple period) currently unused - ], - "ecc": [ - 10, # e (binary eccentricity) - 0, # e2 (triple eccentricity) currently unused - 0, # e3 (quadruple eccentricity) currently unused - ], - }, - "samplerfuncs": { - "M": [None, None, None, None], - "logP": [None, None, None], - "ecc": [None, None, None], - }, - "ranges": { - # stellar masses (Msun) - "M": [ - MIN_MASS_BINARY_C - * 1.05, # 0.08 is a tad bit above the minimum mass. Don't sample at 0.07, otherwise the first row of q values will have a phasevol of 0. Anything higher is fine. - 80.0, # (rather arbitrary) upper mass cutoff - ], - "q": [ - None, # artificial qmin : set to None to use default - None, # artificial qmax : set to None to use default - ], - "logP": [0.0, 8.0], # 0 = log10(1 day) # 8 = log10(10^8 days) - "ecc": [0.0, 0.99], - }, - # minimum stellar mass - "Mmin": MIN_MASS_BINARY_C, # We take the value that binary_c has set as the default - # multiplicity model (as a function of log10M1) - # - # You can use 'Poisson' which uses the system multiplicity - # given by Moe and maps this to single/binary/triple/quad - # fractions. - # - # Alternatively, 'data' takes the fractions directly - # from the data, but then triples and quadruples are - # combined (and there are NO quadruples). - "multiplicity_model": "Poisson", - # multiplicity modulator: - # [single, binary, triple, quadruple] - # - # e.g. [1,0,0,0] for single stars only - # [0,1,0,0] for binary stars only - # - # defaults to [1,1,0,0] i.e. all types - # - "multiplicity_modulator": [ - 1, # single - 1, # binary - 0, # triple - 0, # quadruple - ], - # given a mix of multiplicities, you can either (noting that - # here (S,B,T,Q) = appropriate modulator * model(S,B,T,Q) ) - # - # 'norm' : normalise so the whole population is 1.0 - # after implementing the appropriate fractions - # S/(S+B+T+Q), B/(S+B+T+Q), T/(S+B+T+Q), Q/(S+B+T+Q) - # - # 'raw' : stick to what is predicted, i.e. - # S/(S+B+T+Q), B/(S+B+T+Q), T/(S+B+T+Q), Q/(S+B+T+Q) - # without normalisation - # (in which case the total probability < 1.0 unless - # all you use single, binary, triple and quadruple) - # - # 'merge' : e.g. if you only have single and binary, - # add the triples and quadruples to the binaries, so - # binaries represent all multiple systems - # ... - # *** this is canonical binary population synthesis *** - # - # Note: if multiplicity_modulator == [1,1,1,1] this - # option does nothing (equivalent to 'raw'). - # - # - # note: if you only set one multiplicity_modulator - # to 1, and all the others to 0, then normalising - # will mean that you effectively have the same number - # of stars as single, binary, triple or quad (whichever - # is non-zero) i.e. the multiplicity fraction is ignored. - # This is probably not useful except for - # testing purposes or comparing to old grids. - "normalize_multiplicities": "merge", - # q extrapolation (below 0.15 and above 0.9) method. We can choose from ['flat', 'linear', 'plaw2', 'nolowq'] - "q_low_extrapolation_method": "linear", - "q_high_extrapolation_method": "linear", -} - -moe_di_stefano_default_options_description = { - "resolutions": "", - "ranges": "", - "Mmin": "Minimum stellar mass", - "multiplicity_model": """ -multiplicity model (as a function of log10M1) - -You can use 'Poisson' which uses the system multiplicity -given by Moe and maps this to single/binary/triple/quad -fractions. - -Alternatively, 'data' takes the fractions directly -from the data, but then triples and quadruples are -combined (and there are NO quadruples). -""", - "multiplicity_modulator": """ -[single, binary, triple, quadruple] - -e.g. [1,0,0,0] for single stars only - [0,1,0,0] for binary stars only - -defaults to [1,1,0,0] i.e. singles and binaries -""", - "normalize_multiplicities": """ -'norm': normalise so the whole population is 1.0 - after implementing the appropriate fractions - S/(S+B+T+Q), B/(S+B+T+Q), T/(S+B+T+Q), Q/(S+B+T+Q) - given a mix of multiplicities, you can either (noting that - here (S,B,T,Q) = appropriate modulator * model(S,B,T,Q) ) - note: if you only set one multiplicity_modulator - to 1, and all the others to 0, then normalising - will mean that you effectively have the same number - of stars as single, binary, triple or quad (whichever - is non-zero) i.e. the multiplicity fraction is ignored. - This is probably not useful except for - testing purposes or comparing to old grids. - -'raw' : stick to what is predicted, i.e. - S/(S+B+T+Q), B/(S+B+T+Q), T/(S+B+T+Q), Q/(S+B+T+Q) - without normalisation - (in which case the total probability < 1.0 unless - all you use single, binary, triple and quadruple) - -'merge' : e.g. if you only have single and binary, - add the triples and quadruples to the binaries, so - binaries represent all multiple systems - ... - *** this is canonical binary population synthesis *** - - It only takes the maximum multiplicity into account, - i.e. it doesn't multiply the resulting array by the multiplicity modulator again. - This prevents the resulting array to always be 1 if only 1 multiplicity modulator element is nonzero - - Note: if multiplicity_modulator == [1,1,1,1]. this option does nothing (equivalent to 'raw'). -""", - "q_low_extrapolation_method": """ -q extrapolation (below 0.15) method - none - flat - linear2 - plaw2 - nolowq -""", - "q_high_extrapolation_method": "Same as q_low_extrapolation_method", -} - - -################################# -# Grid options functions - -# Utility functions -def grid_options_help(option: str) -> dict: - """ - Function that prints out the description of a grid option. Useful function for the user. +secs_per_day = 86400 # probably needs to go somewhere more sensible - Args: - option: which option you want to have the description of - returns: - dict containing the option, the description if its there, otherwise empty string. And if the key doesnt exist, the dict is empty +class grid_options_defaults: """ - - option_keys = grid_options_defaults_dict.keys() - description_keys = grid_options_descriptions.keys() - - if not option in option_keys: - print( - "Error: This is an invalid entry. Option does not exist, please choose from the following options:\n\t{}".format( - ", ".join(option_keys) + Class extension to Population grid containing all the functionality for the options and defaults + """ + def __init__(self, **kwargs): + + return + + def get_grid_options_defaults_dict(self): + """ + Function to return the default values for the grid options + """ + + # Options dict + return { + ########################## + # general (or unordered..) + ########################## + "num_cores": 1, # total number of cores used to evolve the population + "num_cores_available": None, # set automatically, not by the user + "parse_function": None, # Function to parse the output with. + "multiplicity_fraction_function": 0, # Which multiplicity fraction function to use. 0: None, 1: Arenou 2010, 2: Rhagavan 2010, 3: Moe and di Stefano 2017 + "tmp_dir": temp_dir(), # Setting the temp dir of the program + "cache_dir": self.default_cache_dir(), # Cache location, usually $HOME/.cache + "status_dir": None, # + "_main_pid": -1, # Placeholder for the main process id of the run. + "save_ensemble_chunks": True, # Force the ensemble chunk to be saved even if we are joining a thread (just in case the joining fails) + "combine_ensemble_with_thread_joining": True, # Flag on whether to combine everything and return it to the user or if false: write it to data_dir/ensemble_output_{population_id}_{thread_id}.json + "_commandline_input": "", + "log_runtime_systems": 0, # whether to log the runtime of the systems (1 file per thread. stored in the tmp_dir) + "_actually_evolve_system": True, # Whether to actually evolve the systems of just act as if. for testing. used in _process_run_population_grid + "max_queue_size": 0, # Maximum size of the system call queue. Set to 0 for this to be calculated automatically + "run_zero_probability_system": True, # Whether to run the zero probability systems + "_zero_prob_stars_skipped": 0, + "ensemble_factor_in_probability_weighted_mass": False, # Whether to multiply the ensemble results by 1/probability_weighted_mass + "do_dry_run": True, # Whether to do a dry run to calculate the total probability for this run + "dry_run_num_cores": 1, # number of parallel processes for the dry run (outer loop) + "dry_run_hook": None, # Function hook for the dry run: this function is called, if not None, for every star in the dry run. Useful for checking initial distributions. + "custom_generator": None, # Place for the custom system generator + "return_after_dry_run": False, # Return immediately after a dry run? + "exit_after_dry_run": False, # Exit after dry run? + "print_stack_on_exit": False, # print the stack trace on exit calls? + ##################### + # System information + ##################### + "command_line": " ".join(sys.argv), + "original_command_line": os.getenv("BINARY_C_PYTHON_ORIGINAL_CMD_LINE"), + "working_diretory": os.getcwd(), + "original_working_diretory": os.getenv("BINARY_C_PYTHON_ORIGINAL_WD"), + "start_time": self.now(), + "original_submission_time": os.getenv( + "BINARY_C_PYTHON_ORIGINAL_SUBMISSION_TIME" + ), + ########################## + # Execution log: + ########################## + "verbosity": 0, # Level of verbosity of the simulation + "log_file": os.path.join( # not used (yet?) + temp_dir(), "binary_c_python.log" + ), # Set to None to not log to file. The directory will be created + "log_dt": 5, # time between vb=1 logging outputs + "n_logging_stats": 50, # number of logging stats used to calculate time remaining (etc.) default = 50 + "log_newline": "\n", # newline character in logs ("\n" for newlines, "\x0d" for carriage return) + ########################## + # binary_c files + ########################## + "_binary_c_executable": os.path.join( + os.environ["BINARY_C"], "binary_c" + ), + "_binary_c_shared_library": os.path.join( + os.environ["BINARY_C"], "src", "libbinary_c.so" + ), + "_binary_c_config_executable": os.path.join( + os.environ["BINARY_C"], "binary_c-config" + ), + "_binary_c_dir": os.environ["BINARY_C"], + ########################## + # Moe and di Stefano (2017) internal settings + ########################## + "_loaded_Moe2017_data": False, # Holds flag whether the Moe and di Stefano (2017) data is loaded into memory + "_set_Moe2017_grid": False, # Whether the Moe and di Stefano (2017) grid has been loaded + "Moe2017_options": None, # Holds the Moe and di Stefano (2017) options. + "_Moe2017_JSON_data": None, # Stores the data + ########################## + # Custom logging + ########################## + "C_auto_logging": None, # Should contain a dictionary where the keys are they headers + # and the values are lists of parameters that should be logged. + # This will get parsed by autogen_C_logging_code in custom_logging_functions.py + "C_logging_code": None, # Should contain a string which holds the logging code. + "custom_logging_func_memaddr": -1, # Contains the custom_logging functions memory address + "_custom_logging_shared_library_file": None, # file containing the .so file + ########################## + # Store pre-loading: + ########################## + "_store_memaddr": -1, # Contains the store object memory address, useful for pre loading. + # defaults to -1 and isn't used if that's the default then. + ########################## + # Log args: logging of arguments + ########################## + "log_args": 0, # unused + "log_args_dir": "/tmp/", # unused + ########################## + # Population evolution + ########################## + ## General + "evolution_type": "grid", # Flag for type of population evolution + "_evolution_type_options": [ + "grid", + "custom_generator", + ], # available choices for type of population evolution. # TODO: fill later with Monte Carlo, source file + "_system_generator": None, # value that holds the function that generates the system + # (result of building the grid script) + "source_file_filename": None, # filename for the source + "_count": 0, # count of systems + "_total_starcount": 0, # Total count of systems in this generator + "_probtot": 0, # total probability + "weight": 1.0, # weighting for the probability + "repeat": 1, # number of times to repeat each system (probability is adjusted to be 1/repeat) + "_start_time_evolution": 0, # Start time of the grid + "_end_time_evolution": 0, # end time of the grid + "_errors_found": False, # Flag whether there are any errors from binary_c + "_errors_exceeded": False, # Flag whether the number of errors have exceeded the limit + "_failed_count": 0, # number of failed systems + "_failed_prob": 0, # Summed probability of failed systems + "failed_systems_threshold": 20, # Maximum failed systems per process allowed to fail before the process stops logging the failing systems. + "_failed_systems_error_codes": [], # List to store the unique error codes + "_population_id": 0, # Random id of this grid/population run, Unique code for the population. Should be set only once by the controller process. + "_total_mass_run": 0, # To count the total mass that thread/process has ran + "_total_probability_weighted_mass_run": 0, # To count the total mass * probability for each system that thread/process has ran + "modulo": 1, # run modulo n of the grid. + "start_at": 0, # start at the first model + ## Grid type evolution + "_grid_variables": {}, # grid variables + "gridcode_filename": None, # filename of gridcode + "symlink_latest_gridcode": True, # symlink to latest gridcode + "save_population_object": None, # filename to which we should save a pickled grid object as the final thing we do + "joinlist": None, + "do_analytics": True, # if True, calculate analytics prior to return + "save_snapshots": True, # if True, save snapshots on SIGINT + "restore_from_snapshot_file": None, # file to restore from + "restore_from_snapshot_dir": None, # dir to restore from + "exit_code": 0, # return code + "stop_queue": False, + "_killed": False, + "_queue_done": False, + ## Monte carlo type evolution + # TODO: make MC options + ## Evolution from source file + # TODO: make run from sourcefile options. + ######################################## + # function caching options + ######################################## + "function_cache": True, + "function_cache_default_maxsize": 256, + "function_cache_default_type": "NullCache", # one of LRUCache, LFUCache, FIFOCache, MRUCache, RRCache, TTLCache, NullCache, NoCache + "function_cache_TTL": 30, + "function_cache_functions": { + # key=function_name : value=(cache_size, cache_type, test_args (string)) + # + # if cache_size is 0, use function_cache_default_maxsize + # set above + # + # if cache_type is None, use function_cache_default_type + # set above + # + # if n is None, no cache is set up + "distribution_functions.powerlaw_constant": (0, "NoCache", "1,100,-2"), + "distribution_functions.calculate_constants_three_part_powerlaw": ( + 16, + "FIFOCache", + "0.1,0.5,1,100,-1.3,-2.3,-2.3", + ), + "distribution_functions.gaussian_normalizing_const": ( + 16, + "FIFOCache", + "1.0,1.0,-10.0,+10.0", + ), + "spacing_functions.const_linear": (16, "FIFOCache", "1,10,9"), + "spacing_functions.const_int": (0, None, "1,10,9"), + "spacing_functions.const_ranges": ( + 16, + "FIFOCache", + "((0.1,0.65,10),(0.65,0.85,20),(0.85,10.0,10))", + ), + "spacing_functions.gaussian_zoom": ( + 16, + "FIFOCache", + "1.0,10.0,5.0,2.0,0.9,100", + ), + }, + ######################################## + # HPC variables + ######################################## + "HPC_force_join": 0, # if True, and the HPC variable ("slurm" or "condor") is 3, skip checking our own job and force the join + "HPC_rebuild_joinlist": 0, # if True, ignore the joinlist we would usually use and rebuild it automatically + ######################################## + # Slurm stuff + ######################################## + "slurm": 0, # dont use the slurm by default, 0 = no slurm, 1 = launch slurm jobs, 2 = run slurm jobs + "slurm_ntasks": 1, # CPUs required per array job: usually only need this to be 1 + "slurm_dir": "", # working directory containing scripts output logs etc. + "slurm_njobs": 0, # number of scripts; set to 0 as default + "slurm_jobid": "", # slurm job id (%A) + "slurm_memory": "512MB", # memory required for the job + "slurm_warn_max_memory": "1024MB", # warn if we set it to more than this (usually by accident) + "slurm_postpone_join": 0, # if 1 do not join on slurm, join elsewhere. want to do it off the slurm grid (e.g. with more RAM) + "slurm_jobarrayindex": None, # slurm job array index (%a) + "slurm_jobname": "binary_c-python", # default + "slurm_partition": None, + "slurm_time": 0, # total time. 0 = infinite time + "slurm_postpone_sbatch": 0, # if 1: don't submit, just make the script + "slurm_array": None, # override for --array, useful for rerunning jobs + "slurm_array_max_jobs": None, # override for the max number of concurrent array jobs + "slurm_extra_settings": {}, # Dictionary of extra settings for Slurm to put in its launch script. + "slurm_sbatch": shutil.which("sbatch"), # sbatch command + "slurm_env": shutil.which("env"), # env location for Slurm + "slurm_bash": shutil.which("bash"), # bash location for Slurm + "slurm_pwd": shutil.which("pwd"), # pwd command location for Slurm + "slurm_date": shutil.which("date"), # bash location for Slurm + ######################################## + # Condor stuff + ######################################## + "condor": 0, # 1 to use condor, 0 otherwise + "condor_dir": "", # working directory containing e.g. scripts, output, logs (e.g. should be NFS available to all) + "condor_njobs": 0, # number of scripts/jobs that CONDOR will run in total + "condor_ClusterID": None, # condor cluster id, equivalent to Slurm's jobid + "condor_Process": None, # condor process, equivalent to Slurm's jobarrayindex + "condor_postpone_submit": 0, # if 1, the condor script is not submitted (useful for debugging). Default 0. + "condor_postpone_join": 0, # if 1, data is not joined, e.g. if you want to do it off the condor grid (e.g. with more RAM). Default 0. + "condor_memory": 512, # in MB, the memory use (ImageSize) of the job + "condor_warn_max_memory": 1024, # in MB, the memory use (ImageSize) of the job + "condor_universe": "vanilla", # usually vanilla universe + "condor_extra_settings": {}, # Place to put extra configuration for the CONDOR submit file. The key and value of the dict will become the key and value of the line in te slurm batch file. Will be put in after all the other settings (and before the command). Take care not to overwrite something without really meaning to do so. + # snapshots and checkpoints + "condor_snapshot_on_kill": 0, # if 1 snapshot on SIGKILL before exit + "condor_stream_output": True, # stream stdout + "condor_stream_error": True, # stream stderr + "condor_should_transfer_files": "YES", + "condor_when_to_transfer_output": "ON_EXIT_OR_EVICT", + # (useful for debugging, otherwise a lot of work) + "condor_requirements": "", # job requirements + "condor_env": shutil.which("env"), # /usr/bin/env location + "condor_bash": shutil.which("bash"), # bash executable location + "condor_pwd": shutil.which("pwd"), # pwd command location for Condor + "condor_date": shutil.which("date"), # bash location for Condor + "condor_initial_dir": None, # directory from which condor is run, if None is the directory in which your script is run + "condor_submit": shutil.which("condor_submit"), # the condor_submit command + "condor_q": shutil.which("condor_q"), # the condor_submit command + "condor_getenv": True, # if True condor takes the environment at submission and copies it to the jobs. You almost certainly want this. + "condor_batchname": "binary_c-condor", # Condor batchname option + "condor_kill_sig": "SIGINT", # signal Condor should use to stop a process : note that grid.py expects this to be "SIGINT" + # ######################################## + # # GRID + # ######################################## + # control flow + "rungrid": 1, # usually run the grid, but can be 0 to skip it (e.g. for condor/slurm admin) + } + + def get_grid_options_descriptions(self): + """ + Function that returns the descriptions for all the grid options + + TODO: consider putting input types for all of them + """ + + # Grid containing the descriptions of the options + return { + "tmp_dir": "Directory where certain types of output are stored. The grid code is stored in that directory, as well as the custom logging libraries. Log files and other diagnostics will usually be written to this location, unless specified otherwise", # TODO: improve this + "status_dir": "Directory where grid status is stored", + "_binary_c_dir": "Director where binary_c is stored. This options are not really used", + "_binary_c_config_executable": "Full path of the binary_c-config executable. This options is not used in the population object.", + "_binary_c_executable": "Full path to the binary_c executable. This options is not used in the population object.", + "_binary_c_shared_library": "Full path to the libbinary_c file. This options is not used in the population object", + "verbosity": "Verbosity of the population code. Default is 0, by which only errors will be printed. Higher values will show more output, which is good for debugging.", + "log_dt": "Time between verbose logging output.", + "log_newline": "Newline character used at the end of verbose logging statements. This is \\n (newline) by default, but \\x0d (carriage return) might also be what you want.", + "n_logging_stats": "Number of logging statistics used to calculate time remaining (etc.). E.g., if you set this to 10 the previous 10 calls to the verbose log will be used to construct an estimate of the time remaining.", + "num_cores": "The number of cores that the population grid will use. You can set this manually by entering an integer great than 0. When 0 uses all logical cores. When -1 uses all physical cores. Input: int", + "num_processes": "Number of processes launched by multiprocessing. This should be set automatically by binary_c-python, not by the user.", + "_start_time_evolution": "Variable storing the start timestamp of the population evolution. Set by the object itself.", + "_end_time_evolution": "Variable storing the end timestamp of the population evolution. Set by the object itself", + "_total_starcount": "Variable storing the total number of systems in the generator. Used and set by the population object.", + "_custom_logging_shared_library_file": "filename for the custom_logging shared library. Used and set by the population object", + "_errors_found": "Variable storing a Boolean flag whether errors by binary_c are encountered.", + "_errors_exceeded": "Variable storing a Boolean flag whether the number of errors was higher than the set threshold (failed_systems_threshold). If True, then the command line arguments of the failing systems will not be stored in the failed_system_log files.", + "source_file_filename": "Variable containing the source file containing lines of binary_c command line calls. These all have to start with binary_c.", # TODO: Expand + "C_auto_logging": "Dictionary containing parameters to be logged by binary_c. The structure of this dictionary is as follows: the key is used as the headline which the user can then catch. The value at that key is a list of binary_c system parameters (like star[0].mass)", + "C_logging_code": "Variable to store the exact code that is used for the custom_logging. In this way the user can do more complex logging, as well as putting these logging strings in files.", + "_failed_count": "Variable storing the number of failed systems.", + "_evolution_type_options": "List containing the evolution type options.", + "_failed_prob": "Variable storing the total probability of all the failed systems", + "_failed_systems_error_codes": "List storing the unique error codes raised by binary_c of the failed systems", + "_grid_variables": "Dictionary storing the grid_variables. These contain properties which are accessed by the _generate_grid_code function", + "_population_id": "Variable storing a unique 32-char hex string.", + "_commandline_input": "String containing the arguments passed to the population object via the command line. Set and used by the population object.", + "_system_generator": "Function object that contains the system generator function. This can be from a grid, or a source file, or a Monte Carlo grid.", + "gridcode_filename": "Filename for the grid code. Set and used by the population object. TODO: allow the user to provide their own function, rather than only a generated function.", + "log_args": "Boolean to log the arguments.", + "log_args_dir": "Directory to log the arguments to.", + "log_file": "Log file for the population object. Unused", + "custom_logging_func_memaddr": "Memory address where the custom_logging_function is stored. Input: int", + "_count": "Counter tracking which system the generator is on.", + "_probtot": "Total probability of the population.", + "_main_pid": "Main process ID of the master process. Used and set by the population object.", + "_store_memaddr": "Memory address of the store object for binary_c.", + "failed_systems_threshold": "Variable storing the maximum number of systems that are allowed to fail before logging their command line arguments to failed_systems log files", + "parse_function": "Function that the user can provide to handle the output the binary_c. This function has to take the arguments (self, output). Its best not to return anything in this function, and just store stuff in the self.grid_results dictionary, or just output results to a file", + ############################################################ + # Condor + "condor": "Integer flag used to control HTCondor (referred to as Condor here) jobs. Default is 0 which means no Condor. 1 means launch Condor jobs. Do not manually set this to 2 (run Condor jobs) or 3 (join Condor job data) unless you know what you are doing, this is usually done for you.", + "condor_dir": "String. Working directory containing e.g. scripts, output, logs (e.g. should be NFS available to all jobs). This directory should not exist when you launch the Condor jobs.", + "condor_njobs": "Integer. Number of jobs that Condor will run", + "condor_ClusterID": "Integer. Condor ClusterID variable, equivalent to Slurm's jobid. Jobs are numbered <ClusterID>.<Process>", + "condor_Process": "Integer. Condor Process variable, equivalent to Slurm's jobarrayindex. Jobs are numbered <ClusterID>.<Process>", + "condor_postpone_submit": "Integer. Debugging tool. If 1, the condor script is not submitted (useful for debugging). Default 0.", + "condor_postpone_join": "Integer. Use to delay the joining of Condor grid data. If 1, data is not joined, e.g. if you want to do it off the condor grid (e.g. with more RAM). Default 0.", + "condor_memory": "Integer. In MB, the memory use (ImageSize) of the job.", + "condor_warn_max_memory": "Integer. In MB, the memory use (ImageSize) of the job.", + "condor_universe": 'String. The HTCondor "universe": this is "vanilla" by default.', + "condor_extra_settings": "Dictionary. Place to put extra configuration for the CONDOR submit file. The key and value of the dict will become the key and value of the line in te slurm batch file. Will be put in after all the other settings (and before the command). Take care not to overwrite something without really meaning to do so.", + "condor_snapshot_on_kill": "Integer. If 1 we save a snapshot on SIGKILL before exit.", + "condor_stream_output": "Boolean. If True, we activate Condor's stdout stream. If False, this data is copied at the end of the job.", + "condor_stream_error": "Boolean. If True, we activate Condor's stderr stream. If False, this data is copied at the end of the job.", + "condor_should_transfer_files": 'Integer. Condor\'s option to transfer files at the end of the job. You should set this to "YES"', + "condor_when_to_transfer_output": 'Integer. Condor\'s option to decide when output files are transferred. You should usually set this to "ON_EXIT_OR_EVICT"', + "condor_requirements": "String. Condor job requirements. These are passed to Condor directly, you should read the HTCondor manual to learn about this. If no requirements exist, leave as an string.", + "condor_env": 'String. Points the location of the "env" command, e.g. /usr/bin/env or /bin/env, that is used in Condor launch scripts. This is set automatically on the submit machine, so if it is different on the nodes, you should set it manually.', + "condor_bash": 'String. Points the location of the "bash" command, e.g. /bin/bash, that is used in Condor launch scripts. This is set automatically on the submit machine, so if it is different on the nodes, you should set it manually.', + "condor_pwd": 'String. Points the location of the "pwd" command, e.g. /bin/pwd, that is used in Condor launch scripts. This is set automatically on the submit machine, so if it is different on the nodes, you should set it manually.', + "condor_date": 'String. Points the location of the "date" command, e.g. /usr/bin/date, that is used in Condor launch scripts. This is set automatically on the submit machine, so if it is different on the nodes, you should set it manually.', + "condor_initial_dir": "String. Directory from which condor scripts are run. If set to the default, None, this is the directory from which your script is run.", + "condor_submit": 'String. The Condor_submit command, usually "/usr/bin/condor_submit" but will depend on your HTCondor installation.', + "condor_q": 'String. The Condor_q command, usually "/usr/bin/condor_q" but will depend on your HTCondor installation.', + "condor_getenv": "Boolean. If True, the default, condor takes the environment at submission and copies it to the jobs. You almost certainly want this to be True.", + "condor_batchname": 'String. Condor batchname option: this is what appears in condor_q. Defaults to "binary_c-condor"', + "condor_kill_sig": 'String. Signal Condor should use to stop a process. Note that grid.py expects this to be "SIGINT" which is the default.', + ############################################################ + # Slurm options + ############################################################ + "slurm": "Integer flag used to control Slurm jobs. Default is 0 which means no Slurm. 1 means launch Slurm jobs. Do not manually set this to 2 (run Slurm jobs) or 3 (join Slurm job data) unless you know what you are doing, this is usually done for you.", + "slurm_dir": "String. Working directory containing e.g. scripts, output, logs (e.g. should be NFS available to all jobs). This directory should not exist when you launch the Slurm jobs.", + "slurm_ntasks": "Integer. Number of CPUs required per array job: usually only need this to be 1 (the default).", + "slurm_njobs": "Integer. Number of Slurm jobs to be launched.", + "slurm_jobid": "Integer. Slurm job id. Each job is numbered <slurm_jobid>.<slurm_jobarrayindex>.", + "slurm_jobarrayindex": "Integer. Slurm job array index. Each job is numbered <slurm_jobid>.<slurm_jobarrayindex>.", + "slurm_memory": 'String. Memory required for the job. Should be in megabytes in a format that Slurm understands, e.g. "512MB" (the default).', + "slurm_warn_max_memory": 'String. If we set slurm_memory in excess of this, warn the user because this is usually a mistake. Default "1024MB".', + "slurm_postpone_join": "Integer, default 0. If 1 do not join job results with Slurm, instead you have to do it later manually.", + "slurm_jobname": 'String which names the Slurm jobs, default "binary_c-python".', + "slurm_partition": "String containing the Slurm partition name. You should check your local Slurm installation to find out partition information, e.g. using the sview command.", + "slurm_time": "String. The time a Slurm job is allowed to take. Default is 0 which means no limit. Please check the Slurm documentation for required format of this option.", + "slurm_postpone_sbatch": "Integer, default 0. If set to 1, do not launch Slurm jobs with sbatch, just make the scripts that would have.", + "slurm_array": "String. Override for Slurm's --array option, useful for rerunning jobs manually. Default None.", + "slurm_array_max_jobs": "Integer. Override for the max number of concurrent Slurm array jobs. Default None.", + "slurm_extra_settings": "Dictionary of extra settings for Slurm to put in its launch script. Please see the Slurm documentation for the many options that are available to you.", + "slurm_sbatch": 'String. The Slurm "sbatch" submission command, usually "/usr/bin/sbatch" but will depend on your Slurm installation. By default is set automatically.', + "slurm_env": 'String. Points the location of the "env" command, e.g. /usr/bin/env or /bin/env, that is used in Slurm scripts. This is set automatically on the submit machine, so if it is different on the nodes, you should set it manually.', + "slurm_bash": 'String. Points the location of the "bash" command, e.g. /bin/bash, that is used in Slurm scripts. This is set automatically on the submit machine, so if it is different on the nodes, you should set it manually.', + "slurm_pwd": 'String. Points the location of the "pwd" command, e.g. /bin/pwd, that is used in Slurm scripts. This is set automatically on the submit machine, so if it is different on the nodes, you should set it manually.', + "slurm_date": 'String. Points the location of the "date" command, e.g. /usr/bin/date, that is used in Slurm scripts. This is set automatically on the submit machine, so if it is different on the nodes, you should set it manually.', + ############################################################ + # High power computing (HPC) variables + ############################################################ + "HPC_force_join": 'Integer, default 0. If 1, and the HPC variable ("slurm" or "condor") is 3, skip checking our own job and force the join.', + "HPC_rebuild_joinlist": "Integer, default 0. If 1, ignore the joinlist we would usually use and rebuild it automatically", + ############################################################ + # Cacheing + ############################################################ + "function_cache": "Boolean, default True. If True, we use a cache for certain function calls.", + "function_cache_default_maxsize": "Integer, default 256. The default maxsize of the cache. Should be a power of 2.", + "function_cache_default_type": "String. One of the following types: LRUCache, LFUCache, FIFOCache, MRUCache, RRCache, TTLCache, NullCache, NoCache. You can find details of what these mean in the Python cachetools manual, except fo NoCache which means no cache is used at all, and NullCache is a dummy cache that never matches, used for testing overheads.", + "function_cache_functions.": "Dict. Keys are the function names that should be in the cache. The value is a tuple of (cache_size, cache_type, test_args) where cache_size used as the size of the cache, or if 0 the function_cache_default_maxsize is used. The cache_type is the function_cache_default_type if None, otherwise is the cache type (see the list defined at function_cache_default_type). The test_args are constant arguments used to call the function when testing the cache, see cache.cache_test() for details.", + ############################################################ + "weight": "Weight factor for each system. The calculated probability is multiplied by this. If the user wants each system to be repeated several times, then this variable should not be changed, rather change the _repeat variable instead, as that handles the reduction in probability per system. This is useful for systems that have a process with some random element in it.", # TODO: add more info here, regarding the evolution splitting. + "repeat": "Factor of how many times a system should be repeated. Consider the evolution splitting binary_c argument for supernovae kick repeating.", + "evolution_type": "Variable containing the type of evolution used of the grid. Multiprocessing, linear processing or possibly something else (e.g. for Slurm or Condor).", + "combine_ensemble_with_thread_joining": "Boolean flag on whether to combine everything and return it to the user or if false: write it to data_dir/ensemble_output_{population_id}_{thread_id}.json", + "log_runtime_systems": "Whether to log the runtime of the systems . Each systems run by the thread is logged to a file and is stored in the tmp_dir. (1 file per thread). Don't use this if you are planning to run a lot of systems. This is mostly for debugging and finding systems that take long to run. Integer, default = 0. if value is 1 then the systems are logged", + "_total_mass_run": "To count the total mass that thread/process has ran", + "_total_probability_weighted_mass_run": "To count the total mass * probability for each system that thread/process has ran", + "_actually_evolve_system": "Whether to actually evolve the systems of just act as if. for testing. used in _process_run_population_grid", + "max_queue_size": "Maximum size of the queue that is used to feed the processes. Don't make this too big! Default: 1000. Input: int", + "_set_Moe2017_grid": "Internal flag whether the Moe and di Stefano (2017) grid has been loaded", + "run_zero_probability_system": "Whether to run the zero probability systems. Default: True. Input: Boolean", + "_zero_prob_stars_skipped": "Internal counter to track how many systems are skipped because they have 0 probability", + "ensemble_factor_in_probability_weighted_mass": "Flag to multiply all the ensemble results with 1/probability_weighted_mass", + "multiplicity_fraction_function": "Which multiplicity fraction function to use. 0: None, 1: Arenou 2010, 2: Rhagavan 2010, 3: Moe and di Stefano (2017) 2017", + "m&s_options": "Internal variable that holds the Moe and di Stefano (2017) options. Don't write to this your self", + "_loaded_Moe2017_data": "Internal variable storing whether the Moe and di Stefano (2017) data has been loaded into memory", + "do_dry_run": "Whether to do a dry run to calculate the total probability for this run", + "dry_run_hook": "Function hook to be called for every system in a dry run. The function is passed a dict of the system parameters. Does nothing if None (the default).", + "return_after_dry_run": "If True, return immediately after a dry run (and don't run actual stars). Default is False.", + "exit_after_dry_run": "If True, exits after a dry run. Default is False.", + "print_stack_on_exit": "If True, prints a stack trace when the population's exit method is called.", + "_Moe2017_JSON_data": "Location to store the loaded Moe&diStefano2017 dataset", # Stores the data + } + + ################################# + # Grid options functions + + # Utility functions + def grid_options_help(self, option: str) -> dict: + """ + Function that prints out the description of a grid option. Useful function for the user. + + Args: + option: which option you want to have the description of + + returns: + dict containing the option, the description if its there, otherwise empty string. And if the key doesnt exist, the dict is empty + """ + + # + grid_options_defaults_dict = self.get_grid_options_defaults_dict() + grid_options_descriptions = self.get_grid_options_descriptions() + + # + option_keys = grid_options_defaults_dict.keys() + description_keys = grid_options_descriptions.keys() + + # If the option is unknown + if option not in option_keys: + print( + "Error: This is an invalid entry. Option does not exist, please choose from the following options:\n\t{}".format( + ", ".join(option_keys) + ) ) - ) - return {} + return {} - else: - if not option in description_keys: + # If its not described + if option not in description_keys: print( "This option has not been described properly yet. Please contact on of the authors" ) return {option: ""} - else: - print(grid_options_descriptions[option]) - return {option: grid_options_descriptions[option]} + # If its known and described: + print(grid_options_descriptions[option]) + return {option: grid_options_descriptions[option]} -def grid_options_description_checker(print_info: bool = True) -> int: - """ - Function that checks which descriptions are missing + def grid_options_description_checker(self, print_info: bool = True) -> int: + """ + Function that checks which descriptions are missing - Args: - print_info: whether to print out information about which options contain proper descriptions and which do not + Args: + print_info: whether to print out information about which options contain proper descriptions and which do not - Returns: - the number of undescribed keys - """ + Returns: + the number of undescribed keys + """ - # Get the keys - option_keys = grid_options_defaults_dict.keys() - description_keys = grid_options_descriptions.keys() + # + grid_options_defaults_dict = self.get_grid_options_defaults_dict() + grid_options_descriptions = self.get_grid_options_descriptions() - # - undescribed_keys = list(set(option_keys) - set(description_keys)) + # + option_keys = grid_options_defaults_dict.keys() + description_keys = grid_options_descriptions.keys() - if undescribed_keys: - if print_info: - print( - "Warning: the following keys have no description yet:\n\t{}".format( - ", ".join(sorted(undescribed_keys)) + # + undescribed_keys = list(set(option_keys) - set(description_keys)) + + if undescribed_keys: + if print_info: + print( + "Warning: the following keys have no description yet:\n\t{}".format( + ", ".join(sorted(undescribed_keys)) + ) ) - ) - print( - "Total description progress: {:.2f}%%".format( - 100 * len(description_keys) / len(option_keys) + print( + "Total description progress: {:.2f}%%".format( + 100 * len(description_keys) / len(option_keys) + ) ) - ) - return len(undescribed_keys) - + return len(undescribed_keys) -def write_grid_options_to_rst_file(output_file: str) -> None: - """ - Function that writes the descriptions of the grid options to a rst file + def write_grid_options_to_rst_file(self, output_file: str) -> None: + """ + Function that writes the descriptions of the grid options to an rst file - Tasks: - TODO: separate things into private and public options + Args: + output_file: target file where the grid options descriptions are written to + """ - Args: - output_file: target file where the grid options descriptions are written to - """ + # Get the options and the description + options = self.get_grid_options_defaults_dict() + descriptions = self.get_grid_options_descriptions() - # Get the options and the description - options = grid_options_defaults_dict - descriptions = grid_options_descriptions + # Get those that do not have a description + not_described_yet = list(set(options) - set(descriptions)) - # Get those that do not have a description - not_described_yet = list(set(options) - set(descriptions)) + # separate public and private options + public_options = [key for key in options if not key.startswith("_")] + private_options = [key for key in options if key.startswith("_")] - # separate public and private options - public_options = [key for key in options if not key.startswith("_")] - private_options = [key for key in options if key.startswith("_")] + # Check input + if not output_file.endswith(".rst"): + msg="Filename doesn't end with .rst, please provide a proper filename" + raise ValueError(msg) - # Check input - if not output_file.endswith(".rst"): - print("Filename doesn't end with .rst, please provide a proper filename") - return None + # M&S options + moe_di_stefano_default_options = self.get_Moe_di_Stefano_2017_default_options() + moe_di_stefano_default_options_description = self.get_Moe_di_Stefano_2017_default_options_description() - with open(output_file, "w") as f: - print("Population grid code options", file=f) - print("{}".format("=" * len("Population grid code options")), file=f) - print( - "The following chapter contains all grid code options, along with their descriptions", - file=f, - ) - print( - "There are {} options that are not described yet.".format( - len(not_described_yet) - ), - file=f, - ) - print("\n", file=f) - - # Start public options part - print_option_descriptions( - f, - public_options, - descriptions, - "Public options", - "The following options are meant to be changed by the user.", - ) - - # Moe & di Stefano options: - print_option_descriptions( - f, - moe_di_stefano_default_options, - moe_di_stefano_default_options_description, - "Moe & di Stefano sampler options", - "The following options are meant to be changed by the user.", - ) - - # Start private options part - print_option_descriptions( - f, - private_options, - descriptions, - "Private options", - "The following options are not meant to be changed by the user, as these options are used and set internally by the object itself. The description still is provided, but just for documentation purposes.", - ) - - -def print_option_descriptions(filehandle, options, descriptions, title, extra_text): - # Start public options part - print("{}".format(title), file=filehandle) - print("{}".format("-" * len("{}".format(title))), file=filehandle) - print("{}".format(extra_text), file=filehandle) - print("\n", file=filehandle) - - for option in sorted(options): - if option in descriptions: + with self.open(output_file, "w") as f: + print("Population grid code options", file=f) + print("{}".format("=" * len("Population grid code options")), file=f) print( - "| **{}**: {}".format( - option, descriptions[option].replace("\n", "\n\t") - ), - file=filehandle, + "The following chapter contains all grid code options, along with their descriptions", + file=f, ) - else: print( - "| **{}**: No description available yet".format(option), - file=filehandle, + "There are {} options that are not described yet.".format( + len(not_described_yet) + ), + file=f, + ) + print("\n", file=f) + + # Start public options part + self.print_option_descriptions( + f, + public_options, + descriptions, + "Public options", + "The following options are meant to be changed by the user.", ) - print("", file=filehandle) + + # Moe & di Stefano options: + self.print_option_descriptions( + f, + moe_di_stefano_default_options, + moe_di_stefano_default_options_description, + "Moe & di Stefano sampler options", + "The following options are meant to be changed by the user.", + ) + + # Start private options part + self.print_option_descriptions( + f, + private_options, + descriptions, + "Private options", + "The following options are not meant to be changed by the user, as these options are used and set internally by the object itself. The description still is provided, but just for documentation purposes.", + ) + + def print_option_descriptions(self, filehandle, options, descriptions, title, extra_text): + """ + Function to print the description of an option + """ + + # Start public options part + print("{}".format(title), file=filehandle) + print("{}".format("-" * len("{}".format(title))), file=filehandle) + print("{}".format(extra_text), file=filehandle) + print("\n", file=filehandle) + + for option in sorted(options): + if option in descriptions: + print( + "| **{}**: {}".format( + option, descriptions[option].replace("\n", "\n\t") + ), + file=filehandle, + ) + else: + print( + "| **{}**: No description available yet".format(option), + file=filehandle, + ) + print("", file=filehandle) + + def default_cache_dir(self): + """ + Return a default cache directory path, or None if we cannot find one. + """ + error_string = "__*ERR*__" # string that cannot be a path + for path in [ + os.path.join(os.environ.get("HOME", error_string), ".cache", "binary_c"), + os.path.join(os.environ.get("TMP", error_string), "cache"), + ]: + if not error_string in path and os.path.isdir(path): + return path + return None diff --git a/binarycpython/utils/gridcode.py b/binarycpython/utils/gridcode.py new file mode 100644 index 0000000000000000000000000000000000000000..800c26ed0a714a4f75c15c6eea154d80121cabe9 --- /dev/null +++ b/binarycpython/utils/gridcode.py @@ -0,0 +1,1125 @@ +""" +Module containing the gridcode generation functions for the binarycpython package. + +This class object is an extension to the population grid object +""" + +# pylint: disable=E1101 + +import datetime +import importlib +import json +import os +from typing import Union, Any + + +_count = 0 # used for file symlinking (for testing only) +_numba = False # activate experimental numba code? + + +class gridcode: + """ + Extension to the population grid object that contains functionality to handle the metadata that will be put in the ensemble + """ + + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + ################################################### + # Grid code functions + # + # Function below are used to run populations with + # a variable grid + ################################################### + def _gridcode_filename(self): + """ + Returns a filename for the gridcode. + """ + if self.HPC_job(): + filename = os.path.join( + self.grid_options["tmp_dir"], + "binary_c_grid_{population_id}.{jobid}.py".format( + population_id=self.grid_options["_population_id"], + jobid=self.jobID(), + ), + ) + else: + filename = os.path.join( + self.grid_options["tmp_dir"], + "binary_c_grid_{population_id}.py".format( + population_id=self.grid_options["_population_id"] + ), + ) + return filename + + def _add_code(self, *args, indent=0): + """ + Function to add code to the grid code string + + add code to the code_string + + indent (=0) is added once at the beginning + mindent (=0) is added for every line + + don't use both! + """ + + indent_block = self._indent_block(indent) + for thing in args: + self.code_string += indent_block + thing + + def _indent_block(self, n=0): + """ + return an indent block, with n extra blocks in it + """ + return (self.indent_depth + n) * self.indent_string + + def _increment_indent_depth(self, delta): + """ + increment the indent indent_depth by delta + """ + self.indent_depth += delta + + def _generate_grid_code(self, dry_run=False): + """ + Function that generates the code from which the population will be made. + + dry_run: when True, it will return the starcount at the end so that we know + what the total number of systems is. + + The phasevol values are handled by generating a second array + + Results in a generated file that contains a system_generator function. + + # TODO: make sure running systems with multiplicity 3+ is also possible. + """ + self.verbose_print("Generating grid code", self.grid_options["verbosity"], 1) + + total_grid_variables = len(self.grid_options["_grid_variables"]) + + self._add_code( + # Import packages + "import math\n", + "import numpy as np\n", + "from collections import OrderedDict\n", + "from binarycpython.utils.distribution_functions import *\n", + "from binarycpython.utils.spacing_functions import *\n", + "from binarycpython.utils.useful_funcs import *\n", + "import numba" if _numba else "", + "\n\n", + # Make the function + "def grid_code(self, print_results=True):\n", + ) + + # Increase indent_depth + self._increment_indent_depth(+1) + + self._add_code( + # Write some info in the function + "# Grid code generated on {}\n".format(datetime.datetime.now().isoformat()), + "# This function generates the systems that will be evolved with binary_c\n\n" + # Set some values in the generated code: + "# Set initial values\n", + "_total_starcount = 0\n", + "starcounts = [0 for i in range({})]\n".format(total_grid_variables + 1), + "probabilities = {}\n", + "probabilities_list = [0 for i in range({})]\n".format( + total_grid_variables + 1 + ), + "probabilities_sum = [0 for i in range({})]\n".format( + total_grid_variables + 1 + ), + "parameter_dict = {}\n", + "phasevol = 1\n", + ) + + # Set up the system parameters + self._add_code( + "M_1 = None\n", + "M_2 = None\n", + "M_3 = None\n", + "M_4 = None\n", + "orbital_period = None\n", + "orbital_period_triple = None\n", + "orbital_period_quadruple = None\n", + "eccentricity = None\n", + "eccentricity2 = None\n", + "eccentricity3 = None\n", + "\n", + # Prepare the probability + "# set probability lists\n", + ) + + for grid_variable_el in sorted( + self.grid_options["_grid_variables"].items(), + key=lambda x: x[1]["grid_variable_number"], + ): + # Make probabilities dict + grid_variable = grid_variable_el[1] + self._add_code('probabilities["{}"] = 0\n'.format(grid_variable["name"])) + + ################################################################################# + # Start of code generation + ################################################################################# + self._add_code("\n") + + # turn vb to True to have debugging output + vb = False + + # Generate code + for loopnr, grid_variable_el in enumerate( + sorted( + self.grid_options["_grid_variables"].items(), + key=lambda x: x[1]["grid_variable_number"], + ) + ): + self.verbose_print( + "Constructing/adding: {}".format(grid_variable_el[0]), + self.grid_options["verbosity"], + 2, + ) + grid_variable = grid_variable_el[1] + + #################### + # top code + if grid_variable["topcode"]: + self._add_code(grid_variable["topcode"]) + + ######################### + # Set up the for loop + # Add comment for for loop + self._add_code( + "# for loop for variable {name} gridtype {gridtype}".format( + name=grid_variable["name"], + gridtype=grid_variable["gridtype"], + ) + + "\n", + "sampled_values_{} = {}".format( + grid_variable["name"], grid_variable["samplerfunc"] + ) + + "\n", + ) + + if vb: + self._add_code( + "print('samples','{name}',':',sampled_values_{name})\n".format( + name=grid_variable["name"], + ) + ) + + if vb: + self._add_code( + "print('sample {name} from',sampled_values_{name})".format( + name=grid_variable["name"] + ) + + "\n" + ) + + # calculate number of values and starting location + # + # if we're sampling a continuous variable, we + # have one fewer grid point than the length of the + # sampled_values list + if grid_variable["gridtype"] in [ + "centred", + "centre", + "center", + "edge", + "left edge", + "left", + "right", + "right edge", + ]: + offset = -1 + elif grid_variable["gridtype"] == "discrete": + # discrete variables sample all the points + offset = 0 + + start = 0 + + # for loop over the variable + if vb: + self._add_code( + 'print("var {name} values ",sampled_values_{name}," len ",len(sampled_values_{name})+{offset}," gridtype {gridtype} offset {offset}\\n")\n'.format( + name=grid_variable["name"], + offset=offset, + gridtype=grid_variable["gridtype"], + ) + ) + + stop = "len(sampled_values_{name})+{offset}".format( + name=grid_variable["name"], offset=offset + ) + + if _numba and grid_variable["dry_parallel"]: + # Parallel outer loop + self._add_code("@numba.jit(parallel=True)\n") + self._add_code("def __parallel_func(phasevol,_total_starcount):\n") + self._increment_indent_depth(+1) + self._add_code( + "for {name}_sample_number in numba.prange({stop}):\n".format( + name=grid_variable["name"], + stop=stop, + ) + ) + self._increment_indent_depth(+1) + if start > 0: + self._add_code( + "if {name}_sample_number < {start}:\n".format( + name=grid_variable["name"], + start=start, + ) + ) + self._add_code("continue\n", indent=1) + else: + self._add_code( + "for {name}_sample_number in range({start},{stop}):\n".format( + name=grid_variable["name"], + start=start, + stop=stop, + ) + ) + self._increment_indent_depth(+1) + + # {}_this_index is this grid point's index + # {}_prev_index and {}_next_index are the previous and next grid points, + # (which can be None if there is no previous or next, or if + # previous and next should not be used: this is deliberate) + # + + if grid_variable["gridtype"] == "discrete": + # discrete grids only care about this, + # both prev and next should be None to + # force errors where they are used + self._add_code( + "{name}_this_index = {name}_sample_number ".format( + name=grid_variable["name"], + ), + ) + self._add_code( + "\n", + "{name}_prev_index = None if {name}_this_index == 0 else ({name}_this_index - 1) ".format( + name=grid_variable["name"], + ), + "\n", + ) + self._add_code( + "\n", + "{name}_next_index = None if {name}_this_index >= (len(sampled_values_{name})+{offset} - 1) else ({name}_this_index + 1)".format( + name=grid_variable["name"], offset=offset + ), + "\n", + ) + + elif grid_variable["gridtype"] in [ + "centred", + "centre", + "center", + "edge", + "left", + "left edge", + ]: + + # left and centred grids + self._add_code( + "if {}_sample_number == 0:\n".format(grid_variable["name"]) + ) + self._add_code( + "{}_this_index = 0;\n".format(grid_variable["name"]), indent=1 + ) + self._add_code("else:\n") + self._add_code( + "{name}_this_index = {name}_sample_number ".format( + name=grid_variable["name"] + ), + indent=1, + ) + self._add_code("\n") + self._add_code( + "{name}_prev_index = ({name}_this_index - 1) if {name}_this_index > 0 else None ".format( + name=grid_variable["name"] + ) + ) + self._add_code("\n") + self._add_code( + "{name}_next_index = {name}_this_index + 1".format( + name=grid_variable["name"] + ) + ) + self._add_code("\n") + + elif grid_variable["gridtype"] in ["right", "right edge"]: + + # right edged grid + self._add_code( + "if {name}_sample_number == 0:\n".format(name=grid_variable["name"]) + ) + self._add_code( + "{name}_this_index = 1;\n".format(name=grid_variable["name"]), + indent=1, + ) + self._add_code("else:\n") + self._add_code( + "{name}_this_index = {name}_sample_number + 1 ".format( + name=grid_variable["name"], + ), + indent=1, + ) + self._add_code("\n") + self._add_code( + "{name}_prev_index = {name}_this_index - 1".format( + name=grid_variable["name"] + ) + ) + self._add_code("\n") + self._add_code( + "{name}_next_index = ({name}_this_index + 1) if {name}_this_index < len(sampled_values_{name}) else None".format( + name=grid_variable["name"] + ) + ) + self._add_code("\n") + + # calculate phase volume + if grid_variable["dphasevol"] == -1: + # no phase volume required so set it to 1.0 + self._add_code( + "dphasevol_{name} = 1.0 # 666\n".format(name=grid_variable["name"]) + ) + + elif grid_variable["gridtype"] in ["right", "right edge"]: + # right edges always have this and prev defined + self._add_code( + "dphasevol_{name} = (sampled_values_{name}[{name}_this_index] - sampled_values_{name}[{name}_prev_index])".format( + name=grid_variable["name"] + ) + + "\n" + ) + elif grid_variable["gridtype"] == "discrete": + # discrete might have next defined, use it if we can, + # otherwise use prev + self._add_code( + "dphasevol_{name} = (sampled_values_{name}[{name}_next_index] - sampled_values_{name}[{name}_this_index]) if {name}_next_index else (sampled_values_{name}[{name}_this_index] - sampled_values_{name}[{name}_prev_index])".format( + name=grid_variable["name"] + ) + + "\n" + ) + else: + # left and centred always have this and next defined + self._add_code( + "dphasevol_{name} = (sampled_values_{name}[{name}_next_index] - sampled_values_{name}[{name}_this_index])".format( + name=grid_variable["name"] + ) + + "\n" + ) + + ############## + # Add phasevol check: + self._add_code( + "if dphasevol_{name} <= 0:\n".format(name=grid_variable["name"]) + ) + + # n that case we need another local variable which will prevent it from being run but will track those parameters + # Add phasevol check action: + self._add_code( + 'print("Grid generator: dphasevol_{name} <= 0! (this=",{name}_this_index,"=",sampled_values_{name}[{name}_this_index],", next=",{name}_next_index,"=",sampled_values_{name}[{name}_next_index],") Skipping current sample.")'.format( + name=grid_variable["name"] + ) + + "\n", + "continue\n", + indent=1, + ) + + if vb: + self._add_code( + "print('sample {name} from ',sampled_values_{name},' at this=',{name}_this_index,', next=',{name}_next_index)".format( + name=grid_variable["name"] + ) + + "\n" + ) + + # select sampled point location based on gridtype (left, centre or right) + if grid_variable["gridtype"] in [ + "edge", + "left", + "left edge", + "right", + "right edge", + "discrete", + ]: + self._add_code( + "{name} = sampled_values_{name}[{name}_this_index]".format( + name=grid_variable["name"] + ) + + "\n" + ) + elif grid_variable["gridtype"] in ["centred", "centre", "center"]: + self._add_code( + "{name} = 0.5 * (sampled_values_{name}[{name}_next_index] + sampled_values_{name}[{name}_this_index])".format( + name=grid_variable["name"] + ) + + "\n" + ) + else: + msg = "Unknown gridtype value {type}.".format( + type=grid_variable["gridtype"] + ) + raise ValueError(msg) + + if vb: + self._add_code( + "print('hence {name} = ',{name})\n".format( + name=grid_variable["name"] + ) + ) + + ################################################################################# + # Check condition and generate for loop + + # If the grid variable has a condition, write the check and the action + if grid_variable["condition"]: + self._add_code( + # Add comment + "# Condition for {name}\n".format(name=grid_variable["name"]), + # Add condition check + "if not {condition}:\n".format( + condition=grid_variable["condition"] + ), + indent=0, + ) + + # Add condition failed action: + if self.grid_options["verbosity"] >= 4: + self._add_code( + 'print("Grid generator: Condition for {name} not met!")'.format( + name=grid_variable["name"] + ) + + "\n", + "continue" + "\n", + indent=1, + ) + else: + self._add_code( + "continue" + "\n", + indent=1, + ) + # Add some whitespace + self._add_code("\n") + + # Add some whitespace + self._add_code("\n") + + ######################### + # Set up pre-code and value in some cases + # Add pre-code + if grid_variable["precode"]: + self._add_code( + "{precode}".format( + precode=grid_variable["precode"].replace( + "\n", "\n" + self._indent_block(0) + ) + ) + + "\n" + ) + + # Set phasevol + self._add_code( + "phasevol *= dphasevol_{name}\n".format( + name=grid_variable["name"], + ) + ) + + ####################### + # Probabilities + # Calculate probability + self._add_code( + "\n", + "# Set probabilities\n", + "dprob_{name} = dphasevol_{name} * ({probdist})".format( + name=grid_variable["name"], + probdist=grid_variable["probdist"], + ) + + "\n", + # Save probability sum + "probabilities_sum[{n}] += dprob_{name}".format( + n=grid_variable["grid_variable_number"], name=grid_variable["name"] + ) + + "\n", + ) + + if grid_variable["grid_variable_number"] == 0: + self._add_code( + "probabilities_list[0] = dprob_{name}".format( + name=grid_variable["name"] + ) + + "\n" + ) + else: + self._add_code( + "probabilities_list[{this}] = probabilities_list[{prev}] * dprob_{name}".format( + this=grid_variable["grid_variable_number"], + prev=grid_variable["grid_variable_number"] - 1, + name=grid_variable["name"], + ) + + "\n" + ) + + ############## + # postcode + if grid_variable["postcode"]: + self._add_code( + "{postcode}".format( + postcode=grid_variable["postcode"].replace( + "\n", "\n" + self._indent_block(0) + ) + ) + + "\n" + ) + + ####################### + # Increment starcount for this parameter + self._add_code( + "\n", + "# Increment starcount for {name}\n".format(name=grid_variable["name"]), + "starcounts[{n}] += 1".format( + n=grid_variable["grid_variable_number"], + ) + + "\n", + # Add value to dict + 'parameter_dict["{name}"] = {name}'.format( + name=grid_variable["parameter_name"] + ) + + "\n", + "\n", + ) + + self._increment_indent_depth(-1) + + # The final parts of the code, where things are returned, are within the deepest loop, + # but in some cases code from a higher loop needs to go under it again + # SO I think its better to put an if statement here that checks + # whether this is the last loop. + if loopnr == len(self.grid_options["_grid_variables"]) - 1: + self._write_gridcode_system_call( + grid_variable, + dry_run, + grid_variable["branchpoint"], + grid_variable["branchcode"], + ) + + # increment indent_depth + self._increment_indent_depth(+1) + + #################### + # bottom code + if grid_variable["bottomcode"]: + self._add_code(grid_variable["bottomcode"]) + + self._increment_indent_depth(-1) + self._add_code("\n") + + # Write parts to write below the part that yield the results. + # this has to go in a reverse order: + # Here comes the stuff that is put after the deepest nested part that calls returns stuff. + # Here we will have a + reverse_sorted_grid_variables = sorted( + self.grid_options["_grid_variables"].items(), + key=lambda x: x[1]["grid_variable_number"], + reverse=True, + ) + for loopnr, grid_variable_el in enumerate(reverse_sorted_grid_variables): + grid_variable = grid_variable_el[1] + + self._increment_indent_depth(+1) + self._add_code( + "#" * 40 + "\n", + "# Code below is for finalising the handling of this iteration of the parameter {name}\n".format( + name=grid_variable["name"] + ), + ) + + # Set phasevol + # TODO: fix. this isn't supposed to be the value that we give it here. discuss + self._add_code( + "phasevol /= dphasevol_{name}\n\n".format(name=grid_variable["name"]) + ) + + self._increment_indent_depth(-2) + + if _numba and grid_variable["dry_parallel"]: + self._add_code("__parallel_func(phasevol,_total_starcount)\n") + self._increment_indent_depth(-1) + + # Check the branchpoint part here. The branchpoint makes sure that we can construct + # a grid with several multiplicities and still can make the system calls for each + # multiplicity without reconstructing the grid each time + if grid_variable["branchpoint"] > 0: + + self._increment_indent_depth(+1) + + self._add_code( + # Add comment + "# Condition for branchpoint at {}".format( + reverse_sorted_grid_variables[loopnr + 1][1]["name"] + ) + + "\n", + # # Add condition check + # "if not {}:".format(grid_variable["condition"]) + # + "\n" + # Add branchpoint + "if multiplicity=={}:".format(grid_variable["branchpoint"]) + "\n", + ) + + self._write_gridcode_system_call( + reverse_sorted_grid_variables[loopnr + 1][1], + dry_run, + grid_variable["branchpoint"], + grid_variable["branchcode"], + ) + self._increment_indent_depth(-1) + self._add_code("\n") + + ############################### + # Finalise print statements + # + self._increment_indent_depth(+1) + self._add_code("\n", "#" * 40 + "\n", "if print_results:\n") + self._add_code( + "print('Grid has handled {starcount} stars with a total probability of {probtot:g}'.format(starcount=_total_starcount,probtot=self.grid_options['_probtot']))\n", + indent=1, + ) + + ################ + # Finalise return statement for dry run. + # + if dry_run: + self._add_code("return _total_starcount\n") + + self._increment_indent_depth(-1) + ################################################################################# + # Stop of code generation. Here the code is saved and written + + # Save the grid code to the grid_options + self.verbose_print( + "Save grid code to grid_options", self.grid_options["verbosity"], 1 + ) + + self.grid_options["code_string"] = self.code_string + + # Write to file + gridcode_filename = self._gridcode_filename() + + self.grid_options["gridcode_filename"] = gridcode_filename + + self.verbose_print( + "{blue}Write grid code to {file} [dry_run = {dry}]{reset}".format( + blue=self.ANSI_colours["blue"], + file=gridcode_filename, + dry=dry_run, + reset=self.ANSI_colours["reset"], + ), + self.grid_options["verbosity"], + 1, + ) + + with self.open(gridcode_filename, "w", encoding="utf-8") as file: + file.write(self.code_string) + + # perhaps create symlink + if not self.HPC_job() and self.grid_options["symlink_latest_gridcode"]: + global _count + symlink = os.path.join( + self.grid_options["tmp_dir"], "binary_c_grid-latest" + str(_count) + ) + _count += 1 + try: + os.unlink(symlink) + except: + pass + + try: + os.symlink(gridcode_filename, symlink) + self.verbose_print( + "{blue}Symlinked grid code to {symlink} {reset}".format( + blue=self.ANSI_colours["blue"], + symlink=symlink, + reset=self.ANSI_colours["reset"], + ), + self.grid_options["verbosity"], + 1, + ) + except OSError: + print("symlink failed") + + def _write_gridcode_system_call( + self, grid_variable, dry_run, branchpoint, branchcode + ): + ################################################################################# + # Here are the calls to the queue or other solution. + # this part is for every system + # + self._increment_indent_depth(+1) + self._add_code("#" * 40 + "\n") + + if branchcode: + self._add_code( + "# Branch code\nif {branchcode}:\n".format(branchcode=branchcode) + ) + + if branchpoint: + self._add_code( + "# Code below will get evaluated for every system at this level of multiplicity (last one of that being {name})\n".format( + name=grid_variable["name"] + ) + ) + else: + self._add_code( + "# Code below will get evaluated for every generated system\n" + ) + + # Factor in the custom weight input + self._add_code( + "\n", + "# Weigh the probability by a custom weighting factor\n", + 'probability = self.grid_options["weight"] * probabilities_list[{n}]'.format( + n=grid_variable["grid_variable_number"] + ) + + "\n", + # Take into account the multiplicity fraction: + "\n", + "# Factor the multiplicity fraction into the probability\n", + "probability *= self._calculate_multiplicity_fraction(parameter_dict)" + + "\n", + # Divide by number of repeats + "\n", + "# Divide the probability by the number of repeats\n", + 'probability /= self.grid_options["repeat"]' + "\n", + # Now we yield the system self.grid_options["repeat"] times. + "\n", + "# Loop over the repeats\n", + 'for _ in range(self.grid_options["repeat"]):' + "\n", + ) + self._add_code( + "_total_starcount += 1\n", + # set probability and phasevol values into the system dict + 'parameter_dict["{p}"] = {p}'.format(p="probability") + "\n", + 'parameter_dict["{v}"] = {v}'.format(v="phasevol") + "\n", + # Increment total probability + "self._increment_probtot(probability)\n", + indent=1, + ) + + if not dry_run: + # Handle what is returned, or what is not. + self._add_code("yield(parameter_dict)\n", indent=1) + + # If its a dry run, dont do anything with it + else: + # run the hook function, only if given + if self.grid_options["dry_run_hook"]: + self._add_code( + "self.grid_options['dry_run_hook'](self, parameter_dict)\n", indent=1 + ) + else: + # or pass + self._add_code("pass\n", indent=1) + + self._add_code("#" * 40 + "\n") + + self._increment_indent_depth(-1) + + return self.code_string + + def _load_grid_function(self): + """ + Function that loads the grid code from file + """ + + # Code to load the + self.verbose_print( + message="Load grid code function from {file}".format( + file=self.grid_options["gridcode_filename"] + ), + verbosity=self.grid_options["verbosity"], + minimal_verbosity=1, + ) + + spec = importlib.util.spec_from_file_location( + "binary_c_python_grid", + os.path.join(self.grid_options["gridcode_filename"]), + ) + grid_file = importlib.util.module_from_spec(spec) + spec.loader.exec_module(grid_file) + generator = grid_file.grid_code + + self.grid_options["_system_generator"] = generator + + self.verbose_print("Grid code loaded", self.grid_options["verbosity"], 1) + + def _last_grid_variable(self): + """ + Function that returns the last grid variable (i.e. the one with the highest grid_variable_number) + + TODO: this function does not require this structure and can be simplified + """ + + number = len(self.grid_options["_grid_variables"]) + for grid_variable in self.grid_options["_grid_variables"]: + if ( + self.grid_options["_grid_variables"][grid_variable][ + "grid_variable_number" + ] + == number - 1 + ): + return grid_variable + + def update_grid_variable(self, name: str, **kwargs) -> None: + """ + Function to update the values of a grid variable. + + Args: + name: + name of the grid variable to be changed. + **kwargs: + key-value pairs to override the existing grid variable data. See add_grid_variable for these names. + """ + + if name in self.grid_options["_grid_variables"]: + grid_variable = self.grid_options["_grid_variables"][name] + + # Set the value and print + for key, value in kwargs.items(): + grid_variable[key] = value + self.verbose_print( + "Updated grid variable: {}".format( + json.dumps(grid_variable, indent=4, ensure_ascii=False) + ), + self.grid_options["verbosity"], + 1, + ) + else: + msg = "Unknown grid variable {} - please create it with the add_grid_variable() method.".format( + name + ) + raise KeyError(msg) + + def delete_grid_variable( + self, + name: str, + ) -> None: + """ + Function to delete a grid variable with the given name. + + Args: + name: + name of the grid variable to be deleted. + """ + + if name in self.grid_options["_grid_variables"]: + del self.grid_options["_grid_variables"][name] + self.verbose_print( + "Deleted grid variable: {}".format(name), + self.grid_options["verbosity"], + 1, + ) + else: + msg = "Failed to remove grid variable {}: please check it exists.".format( + name + ) + raise ValueError(msg) + + def rename_grid_variable(self, oldname: str, newname: str) -> None: + """ + Function to rename a grid variable. + + note: this does NOT alter the order + of the self.grid_options["_grid_variables"] dictionary. + + The order in which the grid variables are loaded into the grid is based on their + `grid_variable_number` property + + Args: + oldname: + old name of the grid variable + newname: + new name of the grid variable + """ + + if oldname in self.grid_options["_grid_variables"]: + self.grid_options["_grid_variables"][newname] = self.grid_options[ + "_grid_variables" + ].pop(oldname) + self.grid_options["_grid_variables"][newname]["name"] = newname + self.verbose_print( + "Rename grid variable: {} to {}".format(oldname, newname), + self.grid_options["verbosity"], + 1, + ) + else: + msg = "Failed to rename grid variable {} to {}.".format(oldname, newname) + raise ValueError(msg) + + def add_grid_variable( + self, + name: str, + parameter_name: str, + longname: str, + valuerange: Union[list, str], + samplerfunc: str, + probdist: str, + dphasevol: Union[str, int] = -1, + gridtype: str = "centred", + branchpoint: int = 0, + branchcode: Union[str, None] = None, + precode: Union[str, None] = None, + postcode: Union[str, None] = None, + topcode: Union[str, None] = None, + bottomcode: Union[str, None] = None, + condition: Union[str, None] = None, + index: Union[int, None] = None, + dry_parallel: Union[bool, None] = False, + ) -> None: + """ + Function to add grid variables to the grid_options. + + The execution of the grid generation will be through a nested for loop. + Each of the grid variables will get create a deeper for loop. + + The real function that generates the numbers will get written to a new file in the TMP_DIR, + and then loaded imported and evaluated. + beware that if you insert some destructive piece of code, it will be executed anyway. + Use at own risk. + + Args: + name: + name of parameter used in the grid Python code. + This is evaluated as a parameter and you can use it throughout + the rest of the function + + Examples: + name = 'lnM_1' + + parameter_name: + name of the parameter in binary_c + + This name must correspond to a Python variable of the same name, + which is automatic if parameter_name == name. + + Note: if parameter_name != name, you must set a + variable in "precode" or "postcode" to define a Python variable + called parameter_name + + longname: + Long name of parameter + + Examples: + longname = 'Primary mass' + range: + Range of values to take. Does not get used really, the samplerfunc is used to + get the values from + + Examples: + range = [math.log(m_min), math.log(m_max)] + samplerfunc: + Function returning a list or numpy array of samples spaced appropriately. + You can either use a real function, or a string representation of a function call. + + Examples: + samplerfunc = "self.const_linear(math.log(m_min), math.log(m_max), {})".format(resolution['M_1']) + + precode: + Extra room for some code. This code will be evaluated within the loop of the + sampling function (i.e. a value for lnM_1 is chosen already) + + Examples: + precode = 'M_1=math.exp(lnM_1);' + postcode: + Code executed after the probability is calculated. + probdist: + Function determining the probability that gets assigned to the sampled parameter + + Examples: + probdist = 'Kroupa2001(M_1)*M_1' + dphasevol: + part of the parameter space that the total probability is calculated with. Put to -1 + if you want to ignore any dphasevol calculations and set the value to 1 + Examples: + dphasevol = 'dlnM_1' + condition: + condition that has to be met in order for the grid generation to continue + Examples: + condition = 'self.grid_options['binary']==1' + gridtype: + Method on how the value range is sampled. Can be either 'edge' (steps starting at + the lower edge of the value range) or 'centred' + (steps starting at lower edge + 0.5 * stepsize). + + dry_parallel: + If True, try to parallelize this variable in dry runs. + + topcode: + Code added at the very top of the block. + + bottomcode: + Code added at the very bottom of the block. + """ + + # check parameters + # if False and dphasevol != -1.0 and gridtype == "discrete": + if dphasevol != -1.0 and gridtype == "discrete": + print( + "Error making grid: you have set the phasevol to be not -1 and gridtype to discrete, but a discrete grid has no phasevol calculation. You should only set the gridtype to discrete and not set the phasevol in this case." + ) + + self.exit(code=1) + + # Add grid_variable + grid_variable = { + "name": name, + "parameter_name": parameter_name, + "longname": longname, + "valuerange": valuerange, + "samplerfunc": samplerfunc, + "precode": precode, + "postcode": postcode, + "probdist": probdist, + "dphasevol": dphasevol, + "condition": condition, + "gridtype": gridtype, + "branchpoint": branchpoint, + "branchcode": branchcode, + "topcode": topcode, + "bottomcode": bottomcode, + "grid_variable_number": len(self.grid_options["_grid_variables"]), + "dry_parallel": dry_parallel, + } + + # Check for gridtype input + allowed_gridtypes = [ + "edge", + "right", + "right edge", + "left", + "left edge", + "centred", + "centre", + "center", + "discrete", + ] + + if gridtype not in allowed_gridtypes: + msg = "Unknown gridtype {gridtype}. Please choose one of: ".format( + gridtype=gridtype + ) + ",".join(allowed_gridtypes) + raise ValueError(msg) + + # Load it into the grid_options + self.grid_options["_grid_variables"][grid_variable["name"]] = grid_variable + + self.verbose_print( + "Added grid variable: {}".format( + json.dumps(grid_variable, indent=4, ensure_ascii=False) + ), + self.grid_options["verbosity"], + 2, + ) diff --git a/binarycpython/utils/hpc_functions.py b/binarycpython/utils/hpc_functions.py deleted file mode 100644 index 87d3fe8b909b4c3496f430a9add8454016f8ac2a..0000000000000000000000000000000000000000 --- a/binarycpython/utils/hpc_functions.py +++ /dev/null @@ -1,151 +0,0 @@ -""" -File containing functions for HPC computing, distributed tasks on clusters etc. - -Functions that the Slurm and Condor subroutines of the population object use. - -Mainly divided in 2 sections: Slurm and Condor -""" - -# import os -# import sys -# import time -# import subprocess -# from typing import Union -# import __main__ as main - - -# def get_slurm_version() -> Union[str, None]: -# """ -# Function that checks whether slurm is installed and returns the version if its installed. - -# Only tested this with slurm v17+ - -# Returns: -# slurm version, or None -# """ - -# slurm_version = None - -# try: -# slurm_version = ( -# subprocess.run(["sinfo", "-V"], stdout=subprocess.PIPE, check=True) -# .stdout.decode("utf-8") -# .split() -# )[1] -# except FileNotFoundError as err: -# print(err) -# print(err.args) -# print("Slurm is not installed or not loaded") -# except Exception as err: -# print(err) -# print(err.args) -# print("Unknown error, contact me about this") - -# return slurm_version - - -# def get_condor_version() -> Union[str, None]: -# """ -# Function that checks whether slurm is installed and returns the version if its installed. - -# otherwise returns None - -# Result has to be condor v8 or higher - -# Returns: -# condor version, or None -# """ - -# condor_version = None - -# try: -# condor_version = ( -# subprocess.run( -# ["condor_q", "--version"], stdout=subprocess.PIPE, check=True -# ) -# .stdout.decode("utf-8") -# .split() -# )[1] -# except FileNotFoundError as err: -# print("Slurm is not installed or not loaded: ") -# print(err) -# print(err.args) -# except Exception as err: -# print("Unknown error, contact me about this: ") -# print(err) -# print(err.args) - -# return condor_version - - -# def create_directories_hpc(working_dir: str) -> None: -# """ -# Function to create a set of directories, given a root directory - -# These directories will contain stuff for the HPC runs - -# Args: -# working_dir: main working directory of the run. Under this directory all the dirs will be created -# """ - -# # Check if working_dir exists -# if not os.path.isdir(working_dir): -# print("Error. Working directory {} does not exist! Aborting") -# raise ValueError - -# directories_list = [ -# "scripts", -# "stdout", -# "stderr", -# "results", -# "logs", -# "status", -# "joining", -# ] - -# # Make directories. -# for subdir in directories_list: -# full_path = os.path.join(working_dir, subdir) -# os.makedirs(full_path, exist_ok=True) - -# # Since the directories are probably made on some mount which has to go over NFS -# # we should explicitly check if they are created -# print("Checking if creating the directories has finished...") -# directories_exist = False -# while directories_exist: -# directories_exist = True - -# for subdir in directories_list: -# full_path = os.path.join(working_dir, subdir) - -# if not os.path.isdir(full_path): -# time.sleep(1) -# directories_exist = False -# print("..Finished! Directories exist.") - - -# def path_of_calling_script() -> str: -# """ -# Function to get the name of the script the user executes. -# TODO: fix this function. seems not to work properly. -# """ - -# return main.__file__ - - -# def get_python_details() -> dict: -# """ -# Function to get some info about the used python version and virtualenv etc - -# Returns: -# dictionary with python executable, virtual environment and version information. -# """ - -# python_info_dict = {} - -# # -# python_info_dict["virtualenv"] = os.getenv("VIRTUAL_ENV") -# python_info_dict["executable"] = sys.executable -# python_info_dict["version"] = sys.version - -# return python_info_dict diff --git a/binarycpython/utils/metadata.py b/binarycpython/utils/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..d577bf3917529bc8da5611e2a60da3f384c82dd2 --- /dev/null +++ b/binarycpython/utils/metadata.py @@ -0,0 +1,139 @@ +""" +Module containing the metadata functions for the binarycpython package. + +This class object is an extension to the population grid object +""" + +# pylint: disable=E1101 + +import json +import platform + +from binarycpython.utils.dicts import ( + multiply_values_dict, +) +from binarycpython.utils.ensemble import ( + binaryc_json_serializer, +) + + +class metadata: + """ + Extension to the population grid object that contains functionality to handle the metadata that will be put in the ensemble + """ + + def __init__(self, **kwargs): + return + + def add_system_metadata(self): + """ + Add system's metadata to the grid_ensemble_results, and add some system information to metadata. + """ + + # add metadata if it doesn't exist + if not "metadata" in self.grid_ensemble_results: + self.grid_ensemble_results["metadata"] = {} + + # add date + self.grid_ensemble_results["metadata"]["date"] = self.now() + + # add platform and build information + self.grid_ensemble_results["metadata"]["platform"] = platform.platform() + self.grid_ensemble_results["metadata"]["platform_uname"] = list( + platform.uname() + ) + self.grid_ensemble_results["metadata"][ + "platform_machine" + ] = platform.machine() + self.grid_ensemble_results["metadata"]["platform_node"] = platform.node() + self.grid_ensemble_results["metadata"][ + "platform_release" + ] = platform.release() + self.grid_ensemble_results["metadata"][ + "platform_version" + ] = platform.version() + self.grid_ensemble_results["metadata"][ + "platform_processor" + ] = platform.processor() + self.grid_ensemble_results["metadata"]["platform_python_build"] = " ".join( + platform.python_build() + ) + self.grid_ensemble_results["metadata"][ + "platform_python_version" + ] = platform.python_version() + + # Get hostname + self.grid_ensemble_results["metadata"]["hostname"] = platform.uname()[1] + + # Calculate time elapsed + self.grid_ensemble_results["metadata"]["duration"] = self.time_elapsed() + + # Calculate cpu time + self.grid_ensemble_results["metadata"]["CPU_time"] = self.CPU_time() + + def add_ensemble_metadata(self, combined_output_dict): + """ + Function to add metadata to the grid_ensemble_results and grid_options + """ + + self.grid_ensemble_results["metadata"] = {} + + self.grid_ensemble_results["metadata"]["population_id"] = self.grid_options[ + "_population_id" + ] + self.grid_ensemble_results["metadata"][ + "total_probability_weighted_mass" + ] = combined_output_dict["_total_probability_weighted_mass_run"] + self.grid_ensemble_results["metadata"][ + "factored_in_probability_weighted_mass" + ] = False + if self.grid_options["ensemble_factor_in_probability_weighted_mass"]: + multiply_values_dict( + self.grid_ensemble_results["ensemble"], + 1.0 + / self.grid_ensemble_results["metadata"][ + "total_probability_weighted_mass" + ], + ) + self.grid_ensemble_results["metadata"][ + "factored_in_probability_weighted_mass" + ] = True + self.grid_ensemble_results["metadata"]["_killed"] = self.grid_options["_killed"] + + # Add settings of the populations + all_info = self.return_all_info( + include_population_settings=True, + include_binary_c_defaults=True, + include_binary_c_version_info=True, + include_binary_c_help_all=True, + ) + self.grid_ensemble_results["metadata"]["settings"] = json.loads( + json.dumps(all_info, default=binaryc_json_serializer, ensure_ascii=False) + ) + + ############################## + # Update grid options + for x in self._metadata_keylist(): + self.grid_options[x] = combined_output_dict[x] + self.grid_options["_failed_systems_error_codes"] = list( + set(combined_output_dict["_failed_systems_error_codes"]) + ) + + def _metadata_keylist(self): + """ + Function that returns the list of metadata keys + + TODO: Consider just setting this list as a property of the object in the init + """ + return [ + "_failed_count", + "_failed_prob", + "_errors_exceeded", + "_errors_found", + "_probtot", + "_count", + "_total_mass_run", + "_total_probability_weighted_mass_run", + "_zero_prob_stars_skipped", + "_killed", + ] diff --git a/binarycpython/utils/plot_functions.py b/binarycpython/utils/plot_functions.py index 19bfbd45d3f090d69fb06c0ba4d5ab35e3cf5c1d..0bc8e792ef7ca20d10bc9a786a3d1906fe065262 100644 --- a/binarycpython/utils/plot_functions.py +++ b/binarycpython/utils/plot_functions.py @@ -1,7 +1,7 @@ """ -Module that contains functionality to plot some properties of (binary) systems. +Module that contains functionality to plot some properties of (binary) systems. -Different routines are defined here to plot orbits, masses, angular momenta etc. +Different routines are defined here to plot orbits, masses, angular momenta etc. Structure of file: custom logging strings @@ -13,8 +13,8 @@ All the loose components here can of course be used in other routines if you wan There is no pre loaded matplotlib rc, you should do that yourself -These plotting routines are designed for binary systems, and admittedly they are here mostly for -inspirational purposes, since one would probably want to customise the plots. +These plotting routines are designed for binary systems, and admittedly they are here mostly for +inspirational purposes, since one would probably want to customise the plots. Regardless, having some plotting routines in here seemed like a nice idea Tasks @@ -164,10 +164,10 @@ def plot_HR_diagram( prefactor = (1 / (4 * math.pi * omega_sb)) ** (1.0 / 4) if show_stellar_types: - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(20, 20)) - fig, colors = add_stellar_types_bar(df, fig, ax_index=-1, only_colorbar=True) + fig, _ = plt.subplots(ncols=1, nrows=1, figsize=(20, 20)) + # fig, colors = add_stellar_types_bar(df, fig, ax_index=-1, only_colorbar=True) else: - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(20, 20)) + fig, _ = plt.subplots(ncols=1, nrows=1, figsize=(20, 20)) df = df.assign( teff_1=prefactor @@ -229,8 +229,7 @@ def plot_HR_diagram( # Show or return if show_plot: plt.show() - else: - return fig + return fig def plot_orbit(df, show_stellar_types: bool = False, show_plot: bool = True): @@ -254,10 +253,10 @@ def plot_orbit(df, show_stellar_types: bool = False, show_plot: bool = True): """ if show_stellar_types: - fig, ax = plt.subplots(ncols=1, nrows=4, figsize=(20, 10)) + fig, _ = plt.subplots(ncols=1, nrows=4, figsize=(20, 10)) fig.subplots_adjust(hspace=0) else: - fig, ax = plt.subplots(ncols=1, nrows=3, figsize=(20, 10), sharex=True) + fig, _ = plt.subplots(ncols=1, nrows=3, figsize=(20, 10), sharex=True) fig.subplots_adjust(hspace=0) # @@ -286,8 +285,7 @@ def plot_orbit(df, show_stellar_types: bool = False, show_plot: bool = True): # Show or return if show_plot: plt.show() - else: - return fig + return fig def plot_masses(df, show_stellar_types: bool = False, show_plot: bool = True): @@ -322,10 +320,10 @@ def plot_masses(df, show_stellar_types: bool = False, show_plot: bool = True): """ if show_stellar_types: - fig, ax = plt.subplots(ncols=1, nrows=2, figsize=(20, 10)) + fig, _ = plt.subplots(ncols=1, nrows=2, figsize=(20, 10)) fig.subplots_adjust(hspace=0) else: - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(20, 10)) + fig, _ = plt.subplots(ncols=1, nrows=1, figsize=(20, 10)) max_total_mass = ( df["pms_mass_1"].values.tolist()[0] + df["pms_mass_2"].values.tolist()[0] @@ -393,15 +391,16 @@ def plot_masses(df, show_stellar_types: bool = False, show_plot: bool = True): # Show or return if show_plot: plt.show() - else: - return fig + return fig # Define the parse functions for the plotting routines def dummy(): - """Placeholder""" - pass + """ + Placeholder function + """ + return None def parse_function_hr_diagram(output: str): """ @@ -543,11 +542,6 @@ def plot_system(plot_type, **kwargs): - orbit_evolution - hr_diagram - Tasks: - - TODO: Complex Function! - - TODO: make sure this way of passing args works correctly. - - TODO: make the plotting specific keywords available via the inspect stuff - All keywords are considered kwargs, except for plot_type Args: @@ -602,15 +596,15 @@ def plot_system(plot_type, **kwargs): # run_system will handle the rest run_system_arg_dict = {} - for key in kwargs.keys(): + for key, value in kwargs.items(): if key == "show_plot": - show_plot = kwargs[key] + show_plot = value elif key == "show_stellar_types": - show_stellar_types = kwargs[key] + show_stellar_types = value # The rest will be passed to run_system else: - run_system_arg_dict[key] = kwargs[key] + run_system_arg_dict[key] = value # TODO: When a list of plot_types is passed, make it so that the strings are chained, # and that the output of the binary_c call is handled by multiple parsers @@ -628,22 +622,4 @@ def plot_system(plot_type, **kwargs): binary_c_output_df, show_plot=show_plot, show_stellar_types=show_stellar_types ) - if not show_plot: - return fig - - -# from david_phd_functions.plotting.custom_mpl_settings import load_mpl_rc -# load_mpl_rc() - -# fig = plot_system( -# plot_type="mass_evolution", -# M_1=10, -# M_2=5, -# separation=1000000, -# orbital_period=100000000, -# max_evolution_time=15000, -# show_plot=True, -# ) - -# fig.axes[0].set_xlim(0, 150) -# plt.show() + return fig diff --git a/binarycpython/utils/run_system_wrapper.py b/binarycpython/utils/run_system_wrapper.py index 51559f62c95101fdf2d83577cf8d1fccf303ba23..b222b85ae34f55b4478216071b7b2df4253b5585 100644 --- a/binarycpython/utils/run_system_wrapper.py +++ b/binarycpython/utils/run_system_wrapper.py @@ -28,8 +28,8 @@ def run_system(**kwargs): - run_system(M_1=10, parse_function=fancy_parsing_function) Tasks: - - TODO: Expand functionality. - TODO: Notify user when an unknown keyword is passed. + - TODO: Centralise the input checking routines here, abstract them and have the population object and this one use the same All the arguments known to binary_c can be passed to this function as kwargs. Several extra arguments can be passed through the kwargs: @@ -65,19 +65,18 @@ def run_system(**kwargs): binary_c_args = {} # Check which binary_c arguments have been passed and put them into a dict - for key in kwargs: + for key, value in kwargs.items(): if key in available_binary_c_arg_keywords: - binary_c_args[key] = kwargs[key] + binary_c_args[key] = value elif any( - [ - True - if (key.startswith(param[:-2]) and len(param[:-2]) < len(key)) - else False - for param in special_keywords - ] + bool((key.startswith(param[:-2]) and len(param[:-2]) < len(key))) + # True + # if (key.startswith(param[:-2]) and len(param[:-2]) < len(key)) + # else False + for param in special_keywords ): - binary_c_args[key] = kwargs[key] + binary_c_args[key] = value # Notify user when this key wont be used else: diff --git a/binarycpython/utils/slurm.py b/binarycpython/utils/slurm.py new file mode 100644 index 0000000000000000000000000000000000000000..2117def6eb680b6e890b0af56d07d4baa2b6bbb9 --- /dev/null +++ b/binarycpython/utils/slurm.py @@ -0,0 +1,444 @@ +""" +Module containing the Slurm functions for the binarycpython package. + +This class object is an extension to the population grid object +""" + +# pylint: disable=E1101 + +import os +import sys +import time +import stat +import pathlib +import subprocess + +import datasize +import lib_programname + + +class slurm: + """ + Extension for the Population class containing the code for Slurm grid simulations + """ + + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + def slurmID(self, jobid=None, jobarrayindex=None): + """ + Function to return a Slurm job ID. The jobid and jobarrayindex passed in are used if given, otherwise we default to the jobid and jobarrayindex in grid_options. + """ + if jobid is None: + jobid = self.grid_options["slurm_jobid"] + if jobarrayindex is None: + jobarrayindex = self.grid_options["slurm_jobarrayindex"] + return "{jobid}.{jobarrayindex}".format( + jobid=jobid, jobarrayindex=jobarrayindex + ) + + def slurmpath(self, path, slurm_dir=None): + """ + Function to return the full slurm directory path. + """ + if slurm_dir is None: + slurm_dir = self.grid_options["slurm_dir"] + + return os.path.abspath(os.path.join(slurm_dir, path)) + + def slurm_status_file(self, jobid=None, jobarrayindex=None, slurm_dir=None): + """ + Return the slurm status file corresponding to the jobid and jobarrayindex, which default to grid_options slurm_jobid and slurm_jobarrayindex, respectively. + """ + return os.path.join( + self.slurmpath("status", slurm_dir=slurm_dir), + self.slurmID(jobid=jobid, jobarrayindex=jobarrayindex), + ) + + def slurm_check_requirements(self): + """ + Function to check whether the slurm parameters in grid_options have been set appropriately. + """ + if self.grid_options["slurm"] > 0 and self.grid_options["slurm_dir"] is None: + return ( + False, + "You have set slurm={slurm} but not set slurm_dir (which is {slurm_dir}). Please set it and try again.".format( + slurm=self.grid_options["slurm"], + slurm_dir=self.grid_options["slurm_dir"], + ), + ) + return (True, "") + + def slurm_dirs(self): + """ + Directories associated specifically with this slurm job. + """ + return ["slurm_dir"] + + def set_slurm_status(self, string, slurm_dir=None): + """ + Set the slurm status corresponing to the self object, which should have slurm_jobid and slurm_jobarrayindex set. + + Args: + string : the status string to be set + slurm_dir : the directory in which the status directory is held. If not set, this defaults to the HPC directory (e.g. slurm_dir or condor_dir). + """ + # save slurm jobid to file + if slurm_dir is None: + slurm_dir = self.grid_options["slurm_dir"] + + idfile = os.path.join(slurm_dir, "jobid") + if not os.path.exists(idfile): + with self.open(idfile, "w", encoding="utf-8") as fjobid: + fjobid.write("{jobid}\n".format(jobid=self.grid_options["slurm_jobid"])) + fjobid.close() + self.NFS_flush_hach(idfile) + + # save slurm status + file = self.slurm_status_file(slurm_dir=slurm_dir) + if file: + with self.open(file, "w", encoding="utf-8") as f: + f.write(string) + f.close() + self.NFS_fluch_hack(file) + + def get_slurm_status(self, jobid=None, jobarrayindex=None, slurm_dir=None): + """ + Get and return the slurm status string corresponing to the self object, or jobid.jobarrayindex if they are passed in. If no status is found, returns an empty string. + """ + if jobid is None: + jobid = self.grid_options["slurm_jobid"] + if jobarrayindex is None: + jobarrayindex = self.grid_options["slurm_jobarrayindex"] + if jobid is None or jobarrayindex is None: + return None + try: + + path = pathlib.Path( + self.slurm_status_file( + slurm_dir=slurm_dir, jobid=jobid, jobarrayindex=jobarrayindex + ) + ) + if path: + self.NFS_flush_hack(path) + return path.read_text().strip() + return "" + # TODO: Fix bare exception + except: + return "" + + def slurm_outfile(self, slurm_dir=None): + """ + return a standard filename for the slurm chunk files + """ + file = "{id}.gz".format( + id=self.slurmID(), + ) + if slurm_dir is None: + slurm_dir = self.grid_options["slurm_dir"] + return os.path.abspath(os.path.join(slurm_dir, "results", file)) + + def make_slurm_dirs(self, slurm_dir=None): + """ + Function to create the necessary slurm directories + """ + + # TODO: replace the code block below with a function call to slurmpath + # make the slurm directories + if slurm_dir is None: + slurm_dir = self.grid_options["slurm_dir"] + + if not slurm_dir: + print( + "You must set self.grid_options['slurm_dir'] (or pass slurm_dir=whatever to make_slurm_dirs()) to a directory which we can use to set up binary_c-python's Slurm files. This should be unique to your set of grids." + ) + os.exit() + + # make a list of directories, these contain the various slurm + # output, status files, etc. + dirs = [] + for slurm_subdir in ["stdout", "stderr", "results", "status", "snapshots"]: + dirs.append(self.slurmpath(slurm_subdir)) + + # make the directories: we do not allow these to already exist + # as the slurm directory should be a fresh location for each set of jobs + for d in dirs: + try: + pathlib.Path(self.slurmpath(d, slurm_dir=slurm_dir)).mkdir( + exist_ok=False, parents=True + ) + # TODO: fix this bare exception + except: + print( + "Tried to make the directory {d} but it already exists. When you launch a set of binary_c jobs on Slurm, you need to set your slurm_dir to be a fresh directory with no contents.".format( + d=d + ) + ) + self.exit(code=1) + + # check that they have been made and exist: we need this + # because on network mounts (NFS) there's often a delay between the mkdir + # above and the actual directory being made. This shouldn't be too long... + fail = True + count = 0 + count_warn = 10 + while fail is True: + fail = False + count += 1 + if count > count_warn: + print( + "Warning: Have been waiting about {count} seconds for Slurm directories to be made, there seems to be significant delay...".format( + count=count + ) + ) + for d in dirs: + if os.path.isdir(d) is False: + fail = True + time.sleep(1) + break + + def slurm_grid(self): + """ + function to be called when running grids when grid_options['slurm']>=1 + + if grid_options['slurm']==1, we set up the slurm script and launch the jobs, then return True to exit. + if grid_options['slurm']==2, we run the stars, which means we return False to continue. + if grid_options['slurm']==3, we are being called from the jobs to run the grids, return False to continue. + + """ + + if self.grid_options["slurm"] == 2: + # run a grid of stars only, leaving the results + # in the appropriate outfile + return False + + if self.grid_options["slurm"] == 3: + # joining : set the evolution type to "join" and return + # False to continue + self.grid_options["evolution_type"] = "join" + return False + + if self.grid_options["slurm"] == 1: + # if slurm=1, we should have no evolution type, we + # set up the Slurm scripts and get them evolving + # in a Slurm array + self.grid_options["evolution_type"] = None + + # make dirs + self.make_slurm_dirs() + + # check we're not using too much RAM + if datasize.DataSize(self.grid_options["slurm_memory"]) > datasize.DataSize( + self.grid_options["slurm_warn_max_memory"] + ): + print( + "WARNING: you want to use {slurm_memory} MB of RAM : this is unlikely to be correct. If you believe it is, set slurm_warn_max_memory to something very large (it is currently {slurm_warn_max_memory} MB)\n".format( + slurm_memory=self.grid_options["slurm_memory"], + slurm_warn_max_memory=self.grid_options[ + "slurm_warn_max_memory" + ], + ) + ) + self.exit(code=1) + + # set up slurm_array + if not self.grid_options["slurm_array_max_jobs"]: + self.grid_options["slurm_array_max_jobs"] = self.grid_options[ + "slurm_njobs" + ] + slurm_array = self.grid_options[ + "slurm_array" + ] or "1-{njobs}%{max_jobs}".format( + njobs=self.grid_options["slurm_njobs"], + max_jobs=self.grid_options["slurm_array_max_jobs"], + ) + + # get job id (might be passed in) + # TODO: is this variable used? + jobid = ( + self.grid_options["slurm_jobid"] + if self.grid_options["slurm_jobid"] != "" + else "$SLURM_ARRAY_JOB_ID" + ) + + # get job array index + jobarrayindex = self.grid_options["slurm_jobarrayindex"] + if jobarrayindex is None: + jobarrayindex = "$SLURM_ARRAY_TASK_ID" + + if self.grid_options["slurm_njobs"] == 0: + print( + "binary_c-python Slurm : You must set grid_option slurm_njobs to be non-zero" + ) + self.exit(code=1) + + # build the grid command + grid_command = ( + [ + str(self.grid_options["slurm_env"]), + sys.executable, + str(lib_programname.get_path_executed_script()), + ] + + sys.argv[1:] + + [ + "start_at=" + str(jobarrayindex) + "-1", # do we need the -1? + "modulo=" + str(self.grid_options["slurm_njobs"]), + "slurm_njobs=" + str(self.grid_options["slurm_njobs"]), + "slurm_dir=" + self.grid_options["slurm_dir"], + "verbosity=" + str(self.grid_options["verbosity"]), + "num_cores=" + str(self.grid_options["num_processes"]), + ] + ) + + grid_command = " ".join(grid_command) + + # make slurm script + scriptpath = self.slurmpath("slurm_script") + try: + script = self.open(scriptpath, "w", encoding="utf-8") + except IOError: + print( + "Could not open Slurm script at {path} for writing: please check you have set {slurm_dir} correctly (it is currently {slurm_dir} and can write to this directory.".format( + path=scriptpath, slurm_dir=self.grid_options["slurm_dir"] + ) + ) + + slurmscript = """#!{bash} +# Slurm launch script created by binary_c-python + +# Slurm options +#SBATCH --error={slurm_dir}/stderr/%A.%a +#SBATCH --output={slurm_dir}/stdout/%A.%a +#SBATCH --job-name={slurm_jobname} +#SBATCH --partition={slurm_partition} +#SBATCH --time={slurm_time} +#SBATCH --mem={slurm_memory} +#SBATCH --ntasks={slurm_ntasks} +#SBATCH --array={slurm_array} +#SBATCH --cpus-per-task={ncpus} +""".format( + bash=self.grid_options["slurm_bash"], + slurm_dir=self.grid_options["slurm_dir"], + slurm_jobname=self.grid_options["slurm_jobname"], + slurm_partition=self.grid_options["slurm_partition"], + slurm_time=self.grid_options["slurm_time"], + slurm_ntasks=self.grid_options["slurm_ntasks"], + slurm_memory=self.grid_options["slurm_memory"], + slurm_array=slurm_array, + ncpus=self.grid_options["num_processes"], + ) + + for key in self.grid_options["slurm_extra_settings"]: + slurmscript += "#SBATCH --{key} = {value}\n".format( + key=key, value=self.grid_options["slurm_extra_settings"][key] + ) + + slurmscript += """ + +export BINARY_C_PYTHON_ORIGINAL_CMD_LINE={cmdline} +export BINARY_C_PYTHON_ORIGINAL_WD=`{pwd}` +export BINARY_C_PYTHON_ORIGINAL_SUBMISSION_TIME=`{date}` + +# set status to \"running\" +echo \"running\" > {slurm_dir}/status/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID + +# make list of files which is checked for joining +# echo {slurm_dir}/results/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID.gz >> {slurm_dir}/results/$SLURM_ARRAY_JOB_ID.all + +# run grid of stars and, if this returns 0, set status to finished +{grid_command} slurm=2 evolution_type=grid slurm_jobid=$SLURM_ARRAY_JOB_ID slurm_jobarrayindex=$SLURM_ARRAY_TASK_ID save_population_object={slurm_dir}/results/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID.gz && echo -n \"finished\" > {slurm_dir}/status/$SLURM_ARRAY_JOB_ID.$SLURM_ARRAY_TASK_ID && echo """.format( + slurm_dir=self.grid_options["slurm_dir"], + grid_command=grid_command, + cmdline=repr(self.grid_options["command_line"]), + date=self.grid_options["slurm_date"], + pwd=self.grid_options["slurm_pwd"], + ) + + if not self.grid_options["slurm_postpone_join"]: + slurmscript += """&& echo \"Checking if we can join...\" && echo && {grid_command} slurm=3 evolution_type=join joinlist={slurm_dir}/results/$SLURM_ARRAY_JOB_ID.all slurm_jobid=$SLURM_ARRAY_JOB_ID slurm_jobarrayindex=$SLURM_ARRAY_TASK_ID + """.format( + slurm_dir=self.grid_options["slurm_dir"], + grid_command=grid_command, + ) + else: + slurmscript += "\n" + + # write to script, close it and make it executable by + # all (so the slurm user can pick it up) + script.write(slurmscript) + script.close() + os.chmod( + scriptpath, + stat.S_IREAD + | stat.S_IWRITE + | stat.S_IEXEC + | stat.S_IRGRP + | stat.S_IXGRP + | stat.S_IROTH + | stat.S_IXOTH, + ) + + if not self.grid_options["slurm_postpone_sbatch"]: + # call sbatch to launch the jobs + cmd = [self.grid_options["slurm_sbatch"], scriptpath] + + with subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) as pipes: + + std_out, std_err = pipes.communicate() + if pipes.returncode != 0: + # an error happened! + err_msg = "{red}{err}\nReturn Code: {code}{reset}".format( + err=std_err.strip(), + code=pipes.returncode, + red=self.ANSI_colours["red"], + reset=self.ANSI_colours["reset"], + ) + raise Exception(err_msg) + + if len(std_err): + print( + "{red}{err}{reset}".format( + red=self.ANSI_colours["red"], + reset=self.ANSI_colours["reset"], + err=std_err.strip().decode("utf-8"), + ) + ) + + print( + "{yellow}{out}{reset}".format( + yellow=self.ANSI_colours["yellow"], + reset=self.ANSI_colours["reset"], + out=std_out.strip().decode("utf-8"), + ) + ) + else: + # just say we would have (use this for testing) + print( + "Slurm script is at {path} but has not been launched".format( + path=scriptpath + ) + ) + + # some messages to the user, then return + if self.grid_options["slurm_postpone_sbatch"] == 1: + print( + "Slurm script written, but launching the jobs with sbatch was postponed." + ) + else: + print("Slurm jobs launched") + print("All done in slurm_grid().") + + # return True so we exit immediately + return True + + def slurm_queue_stats(self): + """ + Function to XXX + + TODO: is this function finished? + """ + + return None diff --git a/binarycpython/utils/spacing_functions.py b/binarycpython/utils/spacing_functions.py index c3e0be388abf1320089604e61e8831a722480141..f4c70ec200ddfceff2c050799c72607ac34d8eb3 100644 --- a/binarycpython/utils/spacing_functions.py +++ b/binarycpython/utils/spacing_functions.py @@ -1,428 +1,649 @@ """ Module containing the spacing functions for the binarycpython package. Very under-populated at the moment, but more are likely to come soon +This class object is an extension to the population grid object + Tasks: TODO: add more spacing functions to this module. """ -from typing import Union -import functools -import math -import numpy as np -import py_rinterpolate -import sys -from binarycpython.utils.grid import Population - - -@functools.lru_cache(maxsize=16) -def const( - min_bound: Union[int, float], max_bound: Union[int, float], steps: int -) -> list: - """ - Samples a range linearly. Uses numpy linspace, and returns an array of floats. Do NOT use this for integers. - - Args: - min_bound: lower bound of range - max_bound: upper bound of range - steps: number of segments between min_bound and max_bound - - Returns: - np.linspace(min_bound, max_bound, steps) - """ - return np.linspace(min_bound, max_bound, steps) - -@functools.lru_cache(maxsize=16) -def const_int( - min_bound: Union[int, float], max_bound: Union[int, float], steps: int -) -> list: - """ - Samples an integer range linearly. Returns a list of ints. - - Args: - min_bound: lower bound of range, must be an integer (is converted to int) - max_bound: upper bound of range, must be an integer (is converted to int) - steps: number of segments between min_bound and max_bound - - Returns: - range(min_bound,max_bound,step) - - where step is int((int(max_bound)-int(min_bound))/steps) - """ - - step = int((int(max_bound)-int(min_bound))/(steps-1)) - if steps <= 1: - return int(min_bound) - else: - return range(int(min_bound),int(max_bound+step),step) - - -############################################################ -@functools.lru_cache(maxsize=16) -def const_ranges(ranges) -> list: - """ - Samples a series of ranges linearly. - - Args: - ranges: a tuple of tuples passed to the const() spacing function. - - Returns: - numpy array of masses - - Example: - The following allocates 10 stars between 0.1 and 0.65, 20 stars between 0.65 - and 0.85, and 10 stars between 0.85 and 10.0 Msun. - - samplerfunc="const_ranges((({},{},{}),({},{},{}),({},{},{})))".format( - 0.1,0.65,10, - 0.65,0.85,20, - 0.85,10.0,10 - ), - - """ +# pylint: disable=E1101 - masses = np.empty(0) - for range in ranges: - masses = np.append(masses, const(*range)) - return np.unique(masses) +import sys +import math +import json +import functools +from typing import Union +import traceback +import numpy as np +import cachetools +import diskcache -############################################################ -def peak_normalized_gaussian_func( - x: Union[int, float], mean: Union[int, float], sigma: Union[int, float] -) -> Union[int, float]: - """ - Function to evaluate a Gaussian at a given point, note - that the normalization is such that the peak is always 1.0, - not that the integral is 1.0 +from binarycpython.utils.grid import Population - Args: - x: location at which to evaluate the distribution - mean: mean of the Gaussian - sigma: standard deviation of the Gaussian +import py_rinterpolate - Returns: - value of the Gaussian at x - """ - gaussian_prefactor = 1.0 # / math.sqrt(2.0 * math.pi) - - r = 1.0 / sigma - y = (x - mean) * r - return math.exp(-0.5 * y ** 2) - - -############################################################ -@functools.lru_cache(maxsize=16) -def gaussian_zoom( - min_bound: Union[int, float], - max_bound: Union[int, float], - zoom_mean: Union[int, float], - zoom_dispersion: Union[int, float], - zoom_magnitude: Union[int, float], - steps: int, -) -> list: - """ - Samples such that a region is zoomed in according to a 1-Gaussian function - - Args: - min_bound: lower bound of range - max_bound: upper bound of range - zoom_mean: mean of the Gaussian zoom location - zoom_dispersion: dispersion of the Gaussian - zoom_magnitude: depth of the Gaussian (should be 0<= zoom_magntiude <1) - steps: number of segments between min_bound and max_bound assuming a linear step - this is what you'd normally call "resolution" - - Returns: - Numpy array of sample values - """ - # linear spacing: this is what we'd have - # in the absence of a Gaussian zoom - linear_spacing = (max_bound - min_bound) / (steps - 1) - - # make the list of values - x = min_bound - array = np.array([]) - while x <= max_bound: - array = np.append(array, x) - g = peak_normalized_gaussian_func(x, zoom_mean, zoom_dispersion) - f = 1.0 - zoom_magnitude * g - dx = linear_spacing * f - x = x + dx - - # force the last array member to be max_bound if it's not - if array[-1] != max_bound: - array[-1] = max_bound - - return np.unique(array) - - -@functools.lru_cache(maxsize=16) -def const_dt( - self, - dt=1000.0, - dlogt=0.1, - mmin=0.07, - mmax=100.0, - nres=1000, - logspacing=False, - tmin=3.0, # start at 3Myr - tmax=None, # use max_evolution_time by default - mindm=None, # tuple of tuples - maxdm=((0.07, 1.0, 0.1), (1.0, 300.0, 1.0)), # tuple of tuples - fsample=1.0, - factor=1.0, - logmasses=False, - log10masses=False, - showlist=False, - showtable=False, -): +class spacing_functions: """ - const_dt returns a list of masses spaced at a constant age difference - - Args: - dt: the time difference between the masses (1000.0 Myr, used when logspacing==False) - dlogt : the delta log10(time) difference between masses (0.1 dex, used when logspacing==True) - mmin: the minimum mass to be considered in the stellar lifetime interpolation table (0.07 Msun) - mmax: the maximum mass to be considered in the stellar lifetime interpolation table (100.0 Msun) - nres: the resolution of the stellar lifetime interpolation table (100) - logspacing: whether to use log-spaced time, in which case dt is actually d(log10(t)) - tmin: the minimum time to consider (Myr, default 3.0 Myr) - tmax: the maximum time to consider (Myr, default None which means we use the grid option 'max_evolution_time') - mindm: a tuple of tuples containing a mass range and minimum mass spacing in that range. The default is ((0.07,1.0,0.1),(1.0,300.0,1.0)) allocated a minimum dm of 0.1Msun in the mass range 0.07 to 1.0 Msun and 1.0Msun in the range 1.0 to 300.0 Msun. Anything you set overrides this. Note, if you use only one tuple, you must set it with a trailing comma, thus, e.g. ((0.07,1.0,0.1),). (default None) - maxdm: a list of tuples similar to mindm but specifying a maximum mass spacing. In the case of maxdm, if the third option in each tuple is negative it is treated as a log step (its absolute value is used as the step). (default None) - fsample: a global sampling (Shannon-like) factor (<1) to improve resolution (default 1.0, set to smaller to improve resolution) - factor: all masses generated are multiplied by this after generation - showtable: if True, the mass list and times are shown to stdout after generation - showlist: if True, show the mass list once generated - logmasses: if True, the masses are logged with math.log() - log10masses: if True, the masses are logged with math.log10() - - Returns: - Array of masses. - - Example: - # these are lines set as options to Population.add_grid_value(...) - - # linear time bins of 1Gyr - samplerfunc="const_dt(self,dt=1000,nres=100,mmin=0.07,mmax=2.0,showtable=True)" - - # logarithmic spacing in time, generally suitable for Galactic - # chemical evolution yield grids. - samplerfunc="const_dt(self,dlogt=0.1,nres=100,mmin=0.07,mmax=80.0,maxdm=((0.07,1.0,0.1),(1.0,10.0,1.0),(10.0,80.0,2.0)),showtable=True,logspacing=True,fsample=1.0/4.0)" - + Extension for the Population class containing the code for spacing functions """ - # first, make a stellar lifetime table - # - # we should use the bse_options from self - # so our lifetime_population uses the same physics - lifetime_population = Population() - lifetime_population.bse_options = dict(self.bse_options) - - # we only want to evolve the star during nuclear burning, - # we don't want a dry run of the grid - # we want to use the right number of CPU cores - lifetime_population.set( - do_dry_run=False, - num_cores=self.grid_options["num_cores"], - max_stellar_type_1=10, - save_ensemble_chunks=False, + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + @cachetools.cachedmethod(lambda self: self.caches["spacing_functions.const_linear"]) + def const_linear( + self, min_bound: Union[int, float], max_bound: Union[int, float], steps: int + ) -> list: + """ + Samples a range linearly. Uses numpy linspace, and returns an array of floats. Do NOT use this for integers. + + Args: + min_bound: lower bound of range + max_bound: upper bound of range + steps: number of segments between min_bound and max_bound + + Returns: + np.linspace(min_bound, max_bound, steps) + """ + return np.linspace(min_bound, max_bound, steps) + + @cachetools.cachedmethod(lambda self: self.caches["spacing_functions.const_int"]) + def const_int( + self, min_bound: Union[int, float], max_bound: Union[int, float], steps: int + ) -> list: + """ + Samples an integer range linearly. Returns a list of ints. + + Args: + min_bound: lower bound of range, must be an integer (is converted to int) + max_bound: upper bound of range, must be an integer (is converted to int) + steps: number of segments between min_bound and max_bound + + Returns: + range(min_bound,max_bound,step) + + where step is int((int(max_bound)-int(min_bound))/steps) + """ + + step = int((int(max_bound) - int(min_bound)) / max(1, steps - 1)) + if steps <= 1: + return [int(min_bound)] + return range(int(min_bound), int(max_bound + step), step) + + ############################################################ + @cachetools.cachedmethod(lambda self: self.caches["spacing_functions.const_ranges"]) + def const_ranges(self, ranges) -> list: + """ + Samples a series of ranges linearly. + + Args: + ranges: a tuple of tuples passed to the self.const_linear() spacing function. + + Returns: + numpy array of masses + + Example: + The following allocates 10 stars between 0.1 and 0.65, 20 stars between 0.65 + and 0.85, and 10 stars between 0.85 and 10.0 Msun. + + samplerfunc="const_ranges((({},{},{}),({},{},{}),({},{},{})))".format( + 0.1,0.65,10, + 0.65,0.85,20, + 0.85,10.0,10 + ), + + """ + + masses = np.empty(0) + for valuerange in ranges: + masses = np.append(masses, self.const_linear(*valuerange)) + return np.unique(masses) + + ############################################################ + def peak_normalized_gaussian_func( + self, x: Union[int, float], mean: Union[int, float], sigma: Union[int, float] + ) -> Union[int, float]: + """ + Function to evaluate a Gaussian at a given point, note + that the normalization is such that the peak is always 1.0, + not that the integral is 1.0 + + Args: + x: location at which to evaluate the distribution + mean: mean of the Gaussian + sigma: standard deviation of the Gaussian + + Returns: + value of the Gaussian at x + """ + + gaussian_prefactor = 1.0 # / math.sqrt(2.0 * math.pi) + + r = 1.0 / sigma + y = (x - mean) * r + return gaussian_prefactor * math.exp(-0.5 * y ** 2) + + ############################################################ + @cachetools.cachedmethod( + lambda self: self.caches["spacing_functions.gaussian_zoom"] ) - - # make a grid in M1 - lifetime_population.add_grid_variable( - name="lnM_1", - parameter_name="M_1", - longname="log Primary mass", # == single-star mass - valuerange=[math.log(mmin), math.log(mmax)], - samplerfunc="const(math.log({mmin}),math.log({mmax}),{nres})".format( - mmin=mmin, mmax=mmax, nres=nres - ), - probdist="1", # dprob/dm1 : we don't care, so just set it to 1 - dphasevol="dlnM_1", - precode="M_1=math.exp(lnM_1)", - condition="", # Impose a condition on this grid variable. Mostly for a check for yourself - gridtype="edge", - ) - - # set up the parse function - def _parse_function(self, output): - if output: - for line in output.splitlines(): - data = line.split() - if data[0] == "SINGLE_STAR_LIFETIME": - # append (log10(mass), log10(lifetime)) tuples - logm = math.log10(float(data[1])) - logt = math.log10(float(data[2])) - # print(line) - # print("logM=",logm,"M=",10.0**logm," -> logt=",logt) - self.grid_results["interpolation table m->t"][logm] = logt - self.grid_results["interpolation table t->m"][logt] = logm - - lifetime_population.set( - parse_function=_parse_function, - ) - - # run to build the interpolation table - print("Running population to make lifetime interpolation table, please wait") - lifetime_population.evolve() - # print("Data table",lifetime_population.grid_results['interpolation table t->m']) - - if not "interpolation table t->m" in lifetime_population.grid_results or len(lifetime_population.grid_results["interpolation table t->m"].keys()) == 0: - print("\n\n\nError: The t->m lifetime table is empty. One usual cause for this is that the tmax or max_evolution_time option (currently passed in to const_dt as {tmax}) is too short for there to be any entries in the table before the first timestep. Try increasing tmax and max_evolution_time, shorten the timestep or, if using log times, set tstart to be closer to 0.\n".format(tmax=tmax)) - exit() - - - # convert to nested lists for the interpolator - # - # make time -> mass table - data_table_time_mass = [] - times = sorted(lifetime_population.grid_results["interpolation table t->m"].keys()) - for time in times: - mass = lifetime_population.grid_results["interpolation table t->m"][time] - # we have to make sure the time is monotonic (not guaranteed at high mass) - if len(data_table_time_mass) == 0: - data_table_time_mass.append([time, mass]) - elif mass < data_table_time_mass[-1][1]: - data_table_time_mass.append([time, mass]) - - # make mass -> time table - data_table_mass_time = [] - masses = sorted(lifetime_population.grid_results["interpolation table m->t"].keys()) - for mass in masses: - time = lifetime_population.grid_results["interpolation table m->t"][mass] - data_table_mass_time.append([mass, time]) - - # set up interpolators - interpolator_time_mass = py_rinterpolate.Rinterpolate( - table=data_table_time_mass, nparams=1, ndata=1, verbosity=0 # mass # lifetime - ) - interpolator_mass_time = py_rinterpolate.Rinterpolate( - table=data_table_mass_time, nparams=1, ndata=1, verbosity=0 # lifetime # mass - ) - - # function to get a mass given a time (Myr) - def _mass_from_time(linear_time): - return 10.0 ** interpolator_time_mass.interpolate([math.log10(linear_time)])[0] - - # function to get a time given a mass (Msun) - def _time_from_mass(mass): - return 10.0 ** interpolator_mass_time.interpolate([math.log10(mass)])[0] - - # return a unique list - def _uniq(_list): - return sorted(list(set(_list))) - - # format a whole list like %g - def _format(_list): - return [float("{x:g}".format(x=x)) for x in _list] - - # construct mass list, always include the min and max - mass_list = [mmin, mmax] - - # first, make sure the stars are separated by only - # maxdm - if maxdm: - for x in maxdm: - range_min = x[0] - range_max = x[1] - dm = x[2] - if dm < 0.0: - # use log scale - dlogm = -dm - logm = math.log(mmin) - logmmax = math.log(mmax) - logrange_min = math.log(range_min) - logrange_max = math.log(range_max) - while logm <= logmmax: - if logm >= logrange_min and logm <= logrange_max: - mass_list.append(math.exp(logm)) - logm += dlogm - else: - # use linear scale - m = mmin - while m <= mmax: - if m >= range_min and m <= range_max: - mass_list.append(m) - m += dm - - # start time loop at tmax or max_evolution_time - t = tmax if tmax else self.bse_options["max_evolution_time"] - - # set default mass list - if logspacing: - logt = math.log10(t) - logtmin = math.log10(tmin) - while logt > logtmin: - m = _mass_from_time(10.0 ** logt) - mass_list.append(m) - logt = max(logtmin, logt - dlogt * fsample) - else: - while t > tmin: - m = _mass_from_time(t) - mass_list.append(m) - t = max(tmin, t - dt * fsample) - - # make mass list unique - mass_list = _uniq(mass_list) - - if mindm: - for x in mindm: - range_min = x[0] - range_max = x[1] - mindm = x[2] - # impose a minimum dm: if two masses in the list - # are separated by < this, remove the second - for index, mass in enumerate(mass_list): - if index > 0 and mass >= range_min and mass <= range_max: - dm = mass_list[index] - mass_list[index - 1] - if dm < mindm: - mass_list[index - 1] = 0.0 - mass_list = _uniq(mass_list) - if mass_list[0] == 0.0: - mass_list.remove(0.0) - - # apply multiplication factor if given - if factor and factor != 1.0: - mass_list = [m * factor for m in mass_list] - - # reformat numbers - mass_list = _format(mass_list) - - # show the mass<>time table? - if showtable: - twas = 0.0 - logtwas = 0.0 - for i, m in enumerate(mass_list): - t = _time_from_mass(m) - logt = math.log10(t) - if twas > 0.0: + def gaussian_zoom( + self, + min_bound: Union[int, float], + max_bound: Union[int, float], + zoom_mean: Union[int, float], + zoom_dispersion: Union[int, float], + zoom_magnitude: Union[int, float], + steps: int, + ) -> list: + """ + Samples such that a region is zoomed in according to a 1-Gaussian function + + Args: + min_bound: lower bound of range + max_bound: upper bound of range + zoom_mean: mean of the Gaussian zoom location + zoom_dispersion: dispersion of the Gaussian + zoom_magnitude: depth of the Gaussian (should be 0<= zoom_magntiude <1) + steps: number of segments between min_bound and max_bound assuming a linear step + this is what you'd normally call "resolution" + + Returns: + Numpy array of sample values + """ + + # linear spacing: this is what we'd have + # in the absence of a Gaussian zoom + linear_spacing = (max_bound - min_bound) / (steps - 1) + + # make the list of values + x = min_bound + array = np.array([]) + while x <= max_bound: + array = np.append(array, x) + g = self.peak_normalized_gaussian_func(x, zoom_mean, zoom_dispersion) + f = 1.0 - zoom_magnitude * g + dx = linear_spacing * f + x = x + dx + + # force the last array member to be max_bound if it's not + if array[-1] != max_bound: + array[-1] = max_bound + + return np.unique(array) + + def const_dt(self, cachedir=None, usecache=True, **kwargs): + """ + const_dt returns a list of masses spaced at a constant age difference + + Args: + dt: the time difference between the masses (1000.0 Myr, used when logspacing==False) + dlogt : the delta log10(time) difference between masses (0.1 dex, used when logspacing==True) + mmin: the minimum mass to be considered in the stellar lifetime interpolation table (0.07 Msun) + mmax: the maximum mass to be considered in the stellar lifetime interpolation table (100.0 Msun) + nres: the resolution of the stellar lifetime interpolation table (100) + logspacing: whether to use log-spaced time, in which case dt is actually d(log10(t)) + tmin: the minimum time to consider (Myr, default 3.0 Myr) + tmax: the maximum time to consider (Myr, default None which means we use the grid option 'max_evolution_time') + max_evolution_time: overrides bse_options['max_evolution_time'] if set + mindm: a tuple of tuples containing a mass range and minimum mass spacing in that range. The default is ((0.07,1.0,0.1),(1.0,300.0,1.0)) allocated a minimum dm of 0.1Msun in the mass range 0.07 to 1.0 Msun and 1.0Msun in the range 1.0 to 300.0 Msun. Anything you set overrides this. Note, if you use only one tuple, you must set it with a trailing comma, thus, e.g. ((0.07,1.0,0.1),). (default None) + maxdm: a list of tuples similar to mindm but specifying a maximum mass spacing. In the case of maxdm, if the third option in each tuple is negative it is treated as a log step (its absolute value is used as the step). (default None) + fsample: a global sampling (Shannon-like) factor (<1) to improve resolution (default 1.0, set to smaller to improve resolution) + factor: all masses generated are multiplied by this after generation + showtable: if True, the mass list and times are shown to stdout after generation + showlist: if True, show the mass list once generated + logmasses: if True, the masses are logged with math.log() + log10masses: if True, the masses are logged with math.log10() + usecache: if True (the default) uses cached results if they are saved (in cachedir) and cachedir is not None + cachedir: where the cache is stored. if None, defaults to grid_options['cache_dir']+'/const_dt_cache' + vb : verbose logging flag (default False) + + Returns: + Array of masses. + + Example: + # these are lines set as options to Population.add_grid_value(...) + + # linear time bins of 1Gyr + samplerfunc="self.const_dt(self,dt=1000,nres=100,mmin=0.07,mmax=2.0,showtable=True)" + + # logarithmic spacing in time, generally suitable for Galactic + # chemical evolution yield grids. + samplerfunc="self.const_dt(self,dlogt=0.1,nres=100,mmin=0.07,mmax=80.0,maxdm=((0.07,1.0,0.1),(1.0,10.0,1.0),(10.0,80.0,2.0)),showtable=True,logspacing=True,fsample=1.0/4.0)" + + """ + + if usecache: + if cachedir is None: + cachedir = self.grid_options["cache_dir"] + + if cachedir is not None: + cachedir += "/const_dt_cache" + cache = diskcache.Cache(cachedir) print( - "{i:4d} m={m:13g} t={t:13g} log10(t)={logt:13g} dt={dt:13g} dlog10(t)={dlogt:13g}".format( - i=i, m=m, t=t, logt=logt, dt=twas - t, dlogt=logtwas - logt - ) + "Use const_dt cache in {} [cache object {}]".format(cachedir, cache) ) else: + print("const_dt uses no cache") + cache = None + + def _const_dt_wrapper( + cachedir=None, + num_cores=None, + bse_options=None, + dt=1000.0, + dlogt=0.1, + mmin=0.07, + mmax=100.0, + nres=1000, + logspacing=False, + tmin=3.0, # start at 3Myr + tmax=None, # use max_evolution_time by default + max_evolution_time=None, + mindm=None, # tuple of tuples + maxdm=((0.07, 1.0, 0.1), (1.0, 300.0, 1.0)), # tuple of tuples + fsample=1.0, + factor=1.0, + logmasses=False, + log10masses=False, + showlist=False, + showtable=False, + usecache=True, + vb=False, + ): + print( + "call _const_dt num_cores={} dt={} dlogt={} mmin={} mmax={} nres={} logspacing={} tmin={} mindm={} maxdm={} fsample={} factor={} logmasses={} log10masses={} showlist={} usecache={} [cache={} vb={}]".format( + num_cores, + dt, + dlogt, + mmin, + mmax, + nres, + logspacing, + tmin, + mindm, + maxdm, + fsample, + factor, + logmasses, + log10masses, + showlist, + usecache, + cache, + vb, + ) + ) + + if vb: + traceback.print_stack() + + # strip bse_options of options that will not affect + # _const_dt + bse_stripped = bse_options.copy() + + del_keys = ["multiplicity"] + for del_key in del_keys: + if del_key in bse_stripped: + del bse_stripped[del_key] + + # make a JSON string of the options (this can be + # used to check the cache) + bse_options_json = json.dumps( + bse_stripped, sort_keys=True, ensure_ascii=False + ) + if vb: + print("BSE options JSON:", bse_options_json) + + return _const_dt( + cachedir=cachedir, + num_cores=num_cores, + bse_options_json=bse_options_json, + dt=dt, + dlogt=dlogt, + mmin=mmin, + mmax=mmax, + nres=nres, + logspacing=logspacing, + tmin=tmin, + tmax=tmax, + max_evolution_time=max_evolution_time, + mindm=mindm, + maxdm=maxdm, + fsample=fsample, + logmasses=logmasses, + log10masses=log10masses, + showlist=showlist, + showtable=showtable, + usecache=usecache, + ) + + # if we want to use the cache, set the __decorator + # to just be the cache.memoize function, otherwise + # make it a wrapped function that just returns the + # _const_dt function acting on its arguments + def __dummy_decorator(func): + @functools.wraps(func) + def wrapped(*args, **kwargs): + return func(*args, **kwargs) + + return wrapped + + if cache: + __decorator = cache.memoize + else: + __decorator = __dummy_decorator + + @__decorator + def _const_dt( + cachedir=None, + num_cores=None, + bse_options_json=None, # JSON string + dt=1000.0, + dlogt=0.1, + mmin=0.07, + mmax=100.0, + nres=1000, + logspacing=False, + tmin=3.0, # start at 3Myr + tmax=None, # use max_evolution_time by default + max_evolution_time=None, + mindm=None, # tuple of tuples + maxdm=((0.07, 1.0, 0.1), (1.0, 300.0, 1.0)), # tuple of tuples + fsample=1.0, + factor=1.0, + logmasses=False, + log10masses=False, + showlist=False, + showtable=False, + usecache=True, + ): + # first thing to do is make a stellar lifetime table + # + # we should use the bse_options_json passed in + # so our lifetime_population uses the same physics + # as the main grid + + # convert bse_options to dict + bse_options = json.loads(bse_options_json) + + # perhaps override max_evolution_time + if max_evolution_time: + bse_options["max_evolution_time"] = max_evolution_time + + lifetime_population = Population() + lifetime_population.bse_options = bse_options + + # we only want to evolve the star during nuclear burning, + # we don't want a dry run of the grid + # we want to use the right number of CPU cores + lifetime_population.set( + do_dry_run=False, + num_cores=num_cores, + max_stellar_type_1=10, + save_ensemble_chunks=False, + symlink_latest_gridcode=False, + modulo=1, + start_at=0, + slurm=0, + condor=0, + multiplicity=1, + ensemble=0, + ensemble_dt=1e3, + ensemble_logdt=0.1, + # for debugging + verbosity=1, + log_dt=1, + ) + + # make a grid in M1 + lifetime_population.add_grid_variable( + name="lnM_1", + parameter_name="M_1", + longname="log Primary mass", # == single-star mass + valuerange=[math.log(mmin), math.log(mmax)], + samplerfunc="self.const_linear(math.log({mmin}),math.log({mmax}),{nres})".format( + mmin=mmin, mmax=mmax, nres=nres + ), + probdist="1", # dprob/dm1 : we don't care, so just set it to 1 + dphasevol="dlnM_1", + precode="M_1=math.exp(lnM_1)", + condition="", # Impose a condition on this grid variable. Mostly for a check for yourself + gridtype="edge", + ) + + # set up the parse function + def _parse_function(self, output): + if output: + for line in output.splitlines(): + data = line.split() + if data[0] == "SINGLE_STAR_LIFETIME": + # append (log10(mass), log10(lifetime)) tuples + logm = math.log10(float(data[1])) + logt = math.log10(float(data[2])) + # print(line) + # print("logM=",logm,"M=",10.0**logm," -> logt=",logt) + self.grid_results["interpolation table m->t"][logm] = logt + self.grid_results["interpolation table t->m"][logt] = logm + + lifetime_population.set( + parse_function=_parse_function, + ) + + # run to build the interpolation table + print( + "Running population to make lifetime interpolation table, please wait" + ) + lifetime_population.evolve() + # print("Data table",lifetime_population.grid_results['interpolation table t->m']) + + if ( + "interpolation table t->m" not in lifetime_population.grid_results + or len( + lifetime_population.grid_results["interpolation table t->m"].keys() + ) + == 0 + ): print( - "{i:4d} m={m:13g} t={t:13g} log10(t)={logt:13g}".format( - i=i, m=m, t=t, logt=logt + "\n\n\nError: The t->m lifetime table is empty. One usual cause for this is that the tmax or max_evolution_time option (currently passed in to const_dt as {tmax}) is too short for there to be any entries in the table before the first timestep. Try increasing tmax and max_evolution_time, shorten the timestep or, if using log times, set tstart to be closer to 0.\n".format( + tmax=tmax ) ) - twas = t - logtwas = logt - exit() - - # return the mass list as a numpy array - mass_list = np.unique(np.array(mass_list)) + sys.exit() + + # convert to nested lists for the interpolator + # + # make time -> mass table + data_table_time_mass = [] + times = sorted( + lifetime_population.grid_results["interpolation table t->m"].keys() + ) + for time in times: + mass = lifetime_population.grid_results["interpolation table t->m"][ + time + ] + # we have to make sure the time is monotonic (not guaranteed at high mass) + if len(data_table_time_mass) == 0: + data_table_time_mass.append([time, mass]) + elif mass < data_table_time_mass[-1][1]: + data_table_time_mass.append([time, mass]) + + # make mass -> time table + data_table_mass_time = [] + masses = sorted( + lifetime_population.grid_results["interpolation table m->t"].keys() + ) + for mass in masses: + time = lifetime_population.grid_results["interpolation table m->t"][ + mass + ] + data_table_mass_time.append([mass, time]) + + # set up interpolators + interpolator_time_mass = py_rinterpolate.Rinterpolate( + table=data_table_time_mass, + nparams=1, + ndata=1, + verbosity=0, # mass # lifetime + ) + interpolator_mass_time = py_rinterpolate.Rinterpolate( + table=data_table_mass_time, + nparams=1, + ndata=1, + verbosity=0, # lifetime # mass + ) + + # function to get a mass given a time (Myr) + def _mass_from_time(linear_time): + return ( + 10.0 + ** interpolator_time_mass.interpolate([math.log10(linear_time)])[0] + ) - # perhaps log the masses - if logmasses: - mass_list = np.log(mass_list) - if log10masses: - mass_list = np.log10(mass_list) + # function to get a time given a mass (Msun) + def _time_from_mass(mass): + return 10.0 ** interpolator_mass_time.interpolate([math.log10(mass)])[0] + + # return a unique list + def _uniq(_list): + return sorted(list(set(_list))) + + # format a whole list like %g + def _format(_list): + return [float("{x:g}".format(x=x)) for x in _list] + + # construct mass list, always include the min and max + mass_list = [mmin, mmax] + + # first, make sure the stars are separated by only + # maxdm + if maxdm: + for x in maxdm: + range_min = x[0] + range_max = x[1] + dm = x[2] + if dm < 0.0: + # use log scale + dlogm = -dm + logm = math.log(mmin) + logmmax = math.log(mmax) + logrange_min = math.log(range_min) + logrange_max = math.log(range_max) + while logm <= logmmax: + if logrange_min <= logm <= logrange_max: + mass_list.append(math.exp(logm)) + logm += dlogm + else: + # use linear scale + m = mmin + while m <= mmax: + if range_min <= m <= range_max: + mass_list.append(m) + m += dm + + # start time loop at tmax or max_evolution_time + t = tmax if tmax else bse_options["max_evolution_time"] + + # set default mass list + if logspacing: + logt = math.log10(t) + logtmin = math.log10(tmin) + while logt > logtmin: + m = _mass_from_time(10.0 ** logt) + mass_list.append(m) + logt = max(logtmin, logt - dlogt * fsample) + else: + while t > tmin: + m = _mass_from_time(t) + mass_list.append(m) + t = max(tmin, t - dt * fsample) - if showlist: - print("const_dt mass list ({} masses)\n".format(len(mass_list)), mass_list) + # make mass list unique + mass_list = _uniq(mass_list) - return mass_list + if mindm: + for x in mindm: + range_min = x[0] + range_max = x[1] + mindm = x[2] + # impose a minimum dm: if two masses in the list + # are separated by < this, remove the second + for index, mass in enumerate(mass_list): + if index > 0 and range_min <= mass <= range_max: + dm = mass_list[index] - mass_list[index - 1] + if dm < mindm: + mass_list[index - 1] = 0.0 + mass_list = _uniq(mass_list) + if mass_list[0] == 0.0: + mass_list.remove(0.0) + + # apply multiplication factor if given + if factor and factor != 1.0: + mass_list = [m * factor for m in mass_list] + + # reformat numbers + mass_list = _format(mass_list) + + # show the mass<>time table? + if showtable: + twas = 0.0 + logtwas = 0.0 + for i, m in enumerate(mass_list): + t = _time_from_mass(m) + logt = math.log10(t) + if twas > 0.0: + print( + "{i:4d} m={m:13g} t={t:13g} log10(t)={logt:13g} dt={dt:13g} dlog10(t)={dlogt:13g}".format( + i=i, + m=m, + t=t, + logt=logt, + dt=twas - t, + dlogt=logtwas - logt, + ) + ) + else: + print( + "{i:4d} m={m:13g} t={t:13g} log10(t)={logt:13g}".format( + i=i, m=m, t=t, logt=logt + ) + ) + twas = t + logtwas = logt + sys.exit() + + # return the mass list as a numpy array + mass_array = np.unique(np.array(mass_list)) + + # perhaps log the masses + if logmasses: + mass_array = np.log(mass_array) + if log10masses: + mass_array = np.log10(mass_array) + + return mass_array + + # call _const_dt and return the mass_list + # + # Note: because _const_dt is cached to disk, calling it may + # use the cached result. + # + # Note: we send a sorted JSON string instead of the + # bse_options dict to make sure the order is preserved + + mass_list = _const_dt_wrapper( + cachedir=cachedir, + num_cores=self.grid_options["num_cores"], + bse_options=self.bse_options, + **kwargs, + ) + if cache: + cache.close() + + if kwargs.get("showlist", True): + print("const_dt mass list ({} masses)\n".format(len(mass_list)), mass_list) + + return mass_list diff --git a/binarycpython/utils/useful_funcs.py b/binarycpython/utils/useful_funcs.py index 546d8b9c75d41463a61504706f9baa141b7f3a06..d5abce50abf62cbedccebe3e814a91c40290f048 100644 --- a/binarycpython/utils/useful_funcs.py +++ b/binarycpython/utils/useful_funcs.py @@ -16,7 +16,6 @@ Functions: Tasks: - TODO: check whether these functions are correct - - TODO: add unit test for maximum_mass_ratio_for_RLOF """ import math @@ -204,11 +203,11 @@ def roche_lobe(q: Union[int, float]) -> Union[int, float]: return 0.49 * p * p / (0.6 * p * p + math.log(1.0 + p)) -def ragb(m: Union[int, float], z: Union[int, float]) -> Union[int, float]: +def ragb(m: Union[int, float]) -> Union[int, float]: """ Function to calculate radius of a star in units of solar radii at first thermal pulse as a function of mass (Z=0.02 only, but also good for Z=0.0001) - TODO: ask rob about this function + TODO: ask rob about this function. Do we still need this? Can we make something better? (i.e. upon installation of the code run a grid of systems and get the data from there?) Args: m: mass of star in units of solar mass diff --git a/binarycpython/utils/version_info.py b/binarycpython/utils/version_info.py new file mode 100644 index 0000000000000000000000000000000000000000..8ea9fd89937302e6c118d3e003d619b7a8cb0373 --- /dev/null +++ b/binarycpython/utils/version_info.py @@ -0,0 +1,430 @@ +""" +File containing the class object containing the functions to handle binary_c version info. + +This class will be used to extend the population object + +NOTE: could these functions not just be normal functions rather than class methods? I see hardly any use of the self +""" + +# pylint: disable=E0203 + +import copy +import os + +from typing import Union + +from binarycpython import _binary_c_bindings +from binarycpython.utils.functions import isfloat + + +class version_info: + """ + Class object containing the functions to handle binary_c version info. + + This class will be used to extend the population object + """ + + def __init__(self, **kwargs): + # don't do anything: we just inherit from this class + return + + ######################################################## + # version_info functions + ######################################################## + def return_binary_c_version_info(self, parsed: bool = True) -> Union[str, dict]: + """ + Function that returns the version information of binary_c. This function calls the function + _binary_c_bindings.return_version_info() + + Args: + parsed: Boolean flag whether to parse the version_info output of binary_c. default = False + + Returns: + Either the raw string of binary_c or a parsed version of this in the form of a nested + dictionary + """ + + found_prev = False + if "BINARY_C_MACRO_HEADER" in os.environ: + # the env var is already present. lets save that and put that back later + found_prev = True + prev_value = os.environ["BINARY_C_MACRO_HEADER"] + + # + os.environ["BINARY_C_MACRO_HEADER"] = "macroxyz" + + # Get version_info + raw_version_info = _binary_c_bindings.return_version_info().strip() + + # delete value + del os.environ["BINARY_C_MACRO_HEADER"] + + # put stuff back if we found a previous one + if found_prev: + os.environ["BINARY_C_MACRO_HEADER"] = prev_value + + # parse if wanted + if parsed: + parsed_version_info = self.parse_binary_c_version_info(raw_version_info) + return parsed_version_info + + return raw_version_info + + def parse_binary_c_version_info(self, version_info_string: str) -> dict: + """ + Function that parses the binary_c version info. Long function with a lot of branches + + Args: + version_info_string: raw output of version_info call to binary_c + + Returns: + Parsed version of the version info, which is a dictionary containing the keys: 'isotopes' for isotope info, 'argpairs' for argument pair info (TODO: explain), 'ensembles' for ensemble settings/info, 'macros' for macros, 'elements' for atomic element info, 'DTlimit' for (TODO: explain), 'nucleosynthesis_sources' for nucleosynthesis sources, and 'miscellaneous' for all those that were not caught by the previous groups. 'git_branch', 'git_build', 'revision' and 'email' are also keys, but its clear what those contain. + """ + + version_info_dict = {} + + # Clean data and put in correct shape + splitted = version_info_string.strip().splitlines() + cleaned = {el.strip() for el in splitted if not el == ""} + + ########################## + # Network: + # Split off all the networks and parse the info. + + networks = {el for el in cleaned if el.startswith("Network ")} + cleaned = cleaned - networks + + networks_dict = {} + for el in networks: + network_dict = {} + split_info = el.split("Network ")[-1].strip().split("==") + + network_number = int(split_info[0]) + network_dict["network_number"] = network_number + + network_info_split = split_info[1].split(" is ") + + shortname = network_info_split[0].strip() + network_dict["shortname"] = shortname + + if not network_info_split[1].strip().startswith(":"): + network_split_info_extra = network_info_split[1].strip().split(":") + + longname = network_split_info_extra[0].strip() + network_dict["longname"] = longname + + implementation = ( + network_split_info_extra[1].strip().replace("implemented in", "") + ) + if implementation: + network_dict["implemented_in"] = [ + i.strip("()") for i in implementation.strip().split() + ] + + networks_dict[network_number] = copy.deepcopy(network_dict) + version_info_dict["networks"] = networks_dict if networks_dict else None + + ########################## + # Isotopes: + # Split off + isotopes = {el for el in cleaned if el.startswith("Isotope ")} + cleaned -= isotopes + + isotope_dict = {} + for el in isotopes: + split_info = el.split("Isotope ")[-1].strip().split(" is ") + + isotope_info = split_info[-1] + name = isotope_info.split(" ")[0].strip() + + # Get details + mass_g = float( + isotope_info.split(",")[0].split("(")[1].split("=")[-1][:-2].strip() + ) + mass_amu = float( + isotope_info.split(",")[0].split("(")[-1].split("=")[-1].strip() + ) + mass_mev = float( + isotope_info.split(",")[-3].split("=")[-1].replace(")", "").strip() + ) + A = int(isotope_info.split(",")[-1].strip().split("=")[-1].replace(")", "")) + Z = int(isotope_info.split(",")[-2].strip().split("=")[-1]) + + # + isotope_dict[int(split_info[0])] = { + "name": name, + "Z": Z, + "A": A, + "mass_mev": mass_mev, + "mass_g": mass_g, + "mass_amu": mass_amu, + } + version_info_dict["isotopes"] = isotope_dict if isotope_dict else None + + ########################## + # Arg pairs: + # Split off + argpairs = {el for el in cleaned if el.startswith("ArgPair")} + cleaned -= argpairs + + argpair_dict = {} + for el in sorted(argpairs): + split_info = el.split("ArgPair ")[-1].split(" ") + + if not argpair_dict.get(split_info[0], None): + argpair_dict[split_info[0]] = {split_info[1]: split_info[2]} + else: + argpair_dict[split_info[0]][split_info[1]] = split_info[2] + + version_info_dict["argpairs"] = argpair_dict if argpair_dict else None + + ########################## + # ensembles: + # Split off + ensembles = {el for el in cleaned if el.startswith("Ensemble")} + cleaned -= ensembles + + ensemble_dict = {} + ensemble_filter_dict = {} + for el in ensembles: + split_info = el.split("Ensemble ")[-1].split(" is ") + + if len(split_info) > 1: + if not split_info[0].startswith("filter"): + ensemble_dict[int(split_info[0])] = split_info[-1] + else: + filter_no = int(split_info[0].replace("filter ", "")) + ensemble_filter_dict[filter_no] = split_info[-1] + + version_info_dict["ensembles"] = ensemble_dict if ensemble_dict else None + version_info_dict["ensemble_filters"] = ( + ensemble_filter_dict if ensemble_filter_dict else None + ) + + ########################## + # macros: + # Split off + macros = {el for el in cleaned if el.startswith("macroxyz")} + cleaned -= macros + + param_type_dict = { + "STRING": str, + "FLOAT": float, + "MACRO": str, + "INT": int, + "LONG_INT": int, + "UINT": int, + } + + macros_dict = {} + for el in macros: + split_info = el.split("macroxyz ")[-1].split(" : ") + param_type = split_info[0] + + new_split = "".join(split_info[1:]).split(" is ") + param_name = new_split[0].strip() + param_value = " is ".join(new_split[1:]) + param_value = param_value.strip() + + # print("macro ",param_name,"=",param_value," float?",isfloat(param_value)," int?",isint(param_value)) + + # If we're trying to set the value to "on", check that + # it doesn't already exist. If it does, do nothing, as the + # extra information is better than just "on" + if param_name in macros_dict: + # print("already exists (is ",macros_dict[param_name]," float? ",isfloat(macros_dict[param_name]),", int? ",isint(macros_dict[param_name]),") : check that we can improve it") + if macros_dict[param_name] == "on": + # update with better value + store = True + elif ( + isfloat(macros_dict[param_name]) is False + and isfloat(param_value) is True + ): + # store the number we now have to replace the non-number we had + store = True + else: + # don't override existing number + store = False + + # if store: + # print("Found improved macro value of param",param_name,", was ",macros_dict[param_name],", is",param_value) + # else: + # print("Cannot improve: use old value") + else: + store = True + + if store: + # Sometimes the macros have extra information behind it. + # Needs an update in outputting by binary_c (RGI: what does this mean David???) + try: + macros_dict[param_name] = param_type_dict[param_type](param_value) + except ValueError: + macros_dict[param_name] = str(param_value) + + version_info_dict["macros"] = macros_dict if macros_dict else None + + ########################## + # Elements: + # Split off: + elements = {el for el in cleaned if el.startswith("Element")} + cleaned -= elements + + # Fill dict: + elements_dict = {} + for el in elements: + split_info = el.split("Element ")[-1].split(" : ") + name_info = split_info[0].split(" is ") + + # get isotope info + isotopes = {} + if not split_info[-1][0] == "0": + isotope_string = split_info[-1].split(" = ")[-1] + isotopes = { + int(split_isotope.split("=")[0]): split_isotope.split("=")[1] + for split_isotope in isotope_string.split(" ") + } + + elements_dict[int(name_info[0])] = { + "name": name_info[-1], + "atomic_number": int(name_info[0]), + "amt_isotopes": len(isotopes), + "isotopes": isotopes, + } + version_info_dict["elements"] = elements_dict if elements_dict else None + + ########################## + # dt_limits: + # split off + dt_limits = {el for el in cleaned if el.startswith("DTlimit")} + cleaned -= dt_limits + + # Fill dict + dt_limits_dict = {} + for el in dt_limits: + split_info = el.split("DTlimit ")[-1].split(" : ") + dt_limits_dict[split_info[1].strip()] = { + "index": int(split_info[0]), + "value": float(split_info[-1]), + } + + version_info_dict["dt_limits"] = dt_limits_dict if dt_limits_dict else None + + ############################## + # Units + + units = {el for el in cleaned if el.startswith("Unit ")} + cleaned -= units + units_dict = {} + for el in units: + split_info = el.split("Unit ")[-1].split(",") + s = split_info[0].split(" is ") + + if len(s) == 2: + long, short = [i.strip().strip('"') for i in s] + elif len(s) == 1: + long, short = None, s[0] + else: + print("Warning: Failed to split unit string {}".format(el)) + + to_cgs = (split_info[1].split())[3].strip().strip('"') + code_units = split_info[2].split() + code_unit_type_num = int(code_units[3].strip().strip('"')) + code_unit_type = code_units[4].strip().strip('"') + code_unit_cgs_value = code_units[9].strip().strip('"').strip(")") + units_dict[long] = { + "long": long, + "short": short, + "to_cgs": to_cgs, + "code_unit_type_num": code_unit_type_num, + "code_unit_type": code_unit_type, + "code_unit_cgs_value": code_unit_cgs_value, + } + + # Add the list of units + units = {el for el in cleaned if el.startswith("Units: ")} + cleaned -= units + for el in units: + el = el[7:] # removes "Units: " + units_dict["units list"] = el.strip("Units:") + + version_info_dict["units"] = units_dict + + ########################## + # Nucleosynthesis sources: + # Split off + nucsyn_sources = {el for el in cleaned if el.startswith("Nucleosynthesis")} + cleaned -= nucsyn_sources + + # Fill dict + nucsyn_sources_dict = {} + for el in nucsyn_sources: + split_info = el.split("Nucleosynthesis source")[-1].strip().split(" is ") + nucsyn_sources_dict[int(split_info[0])] = split_info[-1] + + version_info_dict["nucleosynthesis_sources"] = ( + nucsyn_sources_dict if nucsyn_sources_dict else None + ) + + ########################## + # miscellaneous: + # All those that I didn't catch with the above filters. Could try to get some more out though. + + misc_dict = {} + + # Filter out git revision + git_revision = [el for el in cleaned if el.startswith("git revision")] + misc_dict["git_revision"] = ( + git_revision[0].split("git revision ")[-1].replace('"', "") + ) + cleaned -= set(git_revision) + + # filter out git url + git_url = [el for el in cleaned if el.startswith("git URL")] + misc_dict["git_url"] = git_url[0].split("git URL ")[-1].replace('"', "") + cleaned -= set(git_url) + + # filter out version + version = [el for el in cleaned if el.startswith("Version")] + misc_dict["version"] = str(version[0].split("Version ")[-1]) + cleaned -= set(version) + + git_branch = [el for el in cleaned if el.startswith("git branch")] + misc_dict["git_branch"] = ( + git_branch[0].split("git branch ")[-1].replace('"', "") + ) + cleaned -= set(git_branch) + + build = [el for el in cleaned if el.startswith("Build")] + misc_dict["build"] = build[0].split("Build: ")[-1].replace('"', "") + cleaned -= set(build) + + email = [el for el in cleaned if el.startswith("Email")] + misc_dict["email"] = email[0].split("Email ")[-1].split(",") + cleaned -= set(email) + + other_items = {el for el in cleaned if " is " in el} + cleaned -= other_items + + for el in other_items: + split = el.split(" is ") + key = split[0].strip() + val = " is ".join(split[1:]).strip() + if key in misc_dict: + misc_dict[key + " (alt)"] = val + else: + misc_dict[key] = val + + misc_dict["uncaught"] = list(cleaned) + + version_info_dict["miscellaneous"] = misc_dict if misc_dict else None + return version_info_dict + + def minimum_stellar_mass(self): + """ + Function to return the minimum stellar mass (in Msun) from binary_c. + """ + if not self._minimum_stellar_mass: + self._minimum_stellar_mass = self.return_binary_c_version_info(parsed=True)[ + "macros" + ]["BINARY_C_MINIMUM_STELLAR_MASS"] + return self._minimum_stellar_mass diff --git a/setup.py b/setup.py index 624e08e87911755a945068ddd26fda7620148017..9ed029c681bd5cacabf8ed0114217530b83c02ae 100644 --- a/setup.py +++ b/setup.py @@ -262,10 +262,16 @@ setup( ], install_requires=[ "astropy", + "cachetools", "colorama", + "compress_pickle", + "datasize", + "diskcache", + "flufl.lock", "h5py", "halo", "humanize", + "lib_programname", "matplotlib", "msgpack", "numpy", @@ -276,6 +282,7 @@ setup( "py_rinterpolate", "seaborn", "setproctitle", + "str2bool", "psutil", "simplejson", "strip-ansi",