""" Module containing the Population grid class object. Here all the functionality of a Population object is defined. Useful for the user to understand the functionality, but copying functionality isn't recommended except if you know what you are doing """ import os import sys import copy import json import datetime import time import logging import argparse import importlib.util from pathos.helpers import mp as pathos_multiprocess # from pathos.multiprocessing import ProcessingPool as Pool from pathos.pools import _ProcessPool as Pool from binarycpython.utils.grid_options_defaults import grid_options_defaults_dict from binarycpython.utils.custom_logging_functions import ( autogen_C_logging_code, binary_c_log_code, create_and_load_logging_function, ) from binarycpython.utils.functions import ( get_defaults, parse_binary_c_version_info, remove_file, filter_arg_dict, get_help_all, return_binary_c_version_info, binaryc_json_serializer, verbose_print, binarycDecoder, merge_dicts, ) from binarycpython.utils.hpc_functions import ( get_condor_version, get_slurm_version, create_directories_hpc, path_of_calling_script, get_python_details, ) import binary_c_python_api # Tasks # TODO: add functionality to 'on-init' set arguments # TODO: add functionality to return the initial_abundance_hash # TODO: add functionality to return the isotope_hash # TODO: add functionality to return the isotope_list # TODO: add functionality to return the nuclear_mass_hash # TODO: add functionality to return the nuclear_mass_list # TODO: add functionality to return the source_list # TODO: add functionality to return the ensemble_list # TODO: change the grid_options dict structure so that there is room for descriptions # TODO: consider spreading the functions over more files. # TODO: indicate private or pbulic subroutines # TODO: make explicit comparison to perl # Make this function also an API call. Doest seem to get written to a buffer # that is stored into a python object. rather its just written to stdout class Population: """ Population Object. Contains all the necessary functions to set up, run and process a population of systems """ def __init__(self): """ Initialisation function of the population class """ self.defaults = get_defaults() self.cleaned_up_defaults = self.cleanup_defaults() # Different sections of options self.bse_options = {} # bse_options is just empty. # Setting stuff will check against the defaults to see if the input is correct. self.grid_options = grid_options_defaults_dict.copy() self.custom_options = {} # Argline dict self.argline_dict = {} # Set main process id self.grid_options["main_pid"] = os.getpid() # Set some memory dicts self.persistent_data_memory_dict = {} ################################################### # Argument functions ################################################### # General flow of generating the arguments for the binary_c call: # - user provides parameter and value via set (or manually but that is risky) # - The parameter names of these input get compared to the parameter names in the self.defaults; # with this, we know that its a valid parameter to give to binary_c. # - For a single system, the bse_options will be written as a arg line # - For a population the bse_options will get copied to a temp_bse_options dict and updated with # all the parameters generated by the grid # I will NOT create the argument line by fully writing ALL the defaults and overriding user # input, that seems not necessary because by using the get_defaults() function we already # know for sure which parameter names are valid for the binary_c version # And because binary_c uses internal defaults, its not necessary to explicitly pass them. # I do however suggest everyone to export the binary_c defaults to a file, so that you know # exactly which values were the defaults. def set(self, **kwargs): """ Function to set the values of the population. This is the preferred method to set values of functions, as it provides checks on the input. the bse_options will get populated with all the those that have a key that is present in the self.defaults the grid_options will get updated with all the those that have a key that is present in the self.grid_options If neither of above is met; the key and the value get stored in a custom_options dict. """ # Select the params that end with %d special_params = [el for el in list(self.defaults.keys()) if el.endswith("%d")] # Go over all the input for key in kwargs: # Filter out keys for the bse_options if key in self.defaults.keys(): verbose_print( "adding: {}={} to BSE_options".format(key, kwargs[key]), self.grid_options["verbosity"], 1, ) self.bse_options[key] = kwargs[key] # Extra check to check if the key fits one of parameter names that end with %d elif any( [ True if (key.startswith(param[:-2]) and len(param[:-2]) < len(key)) else False for param in special_params ] ): verbose_print( "adding: {}={} to BSE_options by catching the %d".format( key, kwargs[key] ), self.grid_options["verbosity"], 1, ) self.bse_options[key] = kwargs[key] # Filter out keys for the grid_options elif key in self.grid_options.keys(): verbose_print( "adding: {}={} to grid_options".format(key, kwargs[key]), self.grid_options["verbosity"], 1, ) self.grid_options[key] = kwargs[key] # The of the keys go into a custom_options dict else: print( "!! Key doesnt match previously known parameter: \ adding: {}={} to custom_options".format( key, kwargs[key] ) ) self.custom_options[key] = kwargs[key] def _set_bse_option(self, key, arg): """ Setter for the BSE options. # TODO: Put a check here that compares it to the defaults and says something """ self.bse_options[key] = arg def parse_cmdline(self): """ Function to handle settings values via the command line: TODO: remove the need for --cmdline """ parser = argparse.ArgumentParser() parser.add_argument( "--cmdline", help='Setting values via the commandline. Input like --cmdline "metallicity=0.02"', ) args = parser.parse_args() # How its set up now is that as input you need to give --cmdline "metallicity=0.002" # Its checked if this exists and handled accordingly. if args.cmdline: verbose_print( "Found cmdline args. Parsing them now", self.grid_options["verbosity"], 1, ) # Grab the input and split them up, while accepting only non-empty entries cmdline_args = args.cmdline split_args = [ cmdline_arg for cmdline_arg in cmdline_args.split(" ") if not cmdline_arg == "" ] # Make dict and fill it cmdline_dict = {} for cmdline_arg in split_args: split = cmdline_arg.split("=") parameter = split[0] value = split[1] # Add to dict cmdline_dict[parameter] = value # unpack the dictionary into the setting function that handles where the values are set self.set(**cmdline_dict) def return_argline(self, parameter_dict=None): """ Function to create the string for the arg line from a parameter dict """ if not parameter_dict: parameter_dict = self.bse_options argline = "binary_c " for param_name in sorted(parameter_dict): argline += "{} {} ".format(param_name, parameter_dict[param_name]) argline = argline.strip() return argline def add_grid_variable( self, name, longname, valuerange, resolution, spacingfunc, probdist, dphasevol, parameter_name, precode=None, condition=None, ): """ Function to add grid variables to the grid_options. TODO: Fix this complex function. TODO: update the description The execution of the grid generation will be through a nested forloop, and will rely heavily on the eval() functionality of python. Which, in terms of safety is very bad, but in terms of flexibility is very good. name: name of parameter example: name = 'lnm1' longname: Long name of parameter example: longname = 'Primary mass' range: Range of values to take example: range = [log($mmin),log($mmax)] resolution: Resolution of the sampled range (amount of samples) example: resolution = $resolution->{m1} spacingfunction: Function determining how the range is sampled example: spacingfunction = "const(log($mmin),log($mmax),$resolution->{m1})" precode: # TODO: think of good description. example: precode = '$m1=exp($lnm1);' probdist: FUnction determining the probability that gets asigned to the sampled parameter example: probdist = 'Kroupa2001($m1)*$m1' dphasevol: part of the parameter space that the total probability is calculated with example: dphasevol = '$dlnm1' condition: condition that has to be met in order for the grid generation to continue example: condition = '$self->{_grid_options}{binary}==1' """ # Add grid_variable grid_variable = { "name": name, "longname": longname, "valuerange": valuerange, "resolution": resolution, "spacingfunc": spacingfunc, "precode": precode, "probdist": probdist, "dphasevol": dphasevol, "parameter_name": parameter_name, "condition": condition, "grid_variable_number": len(self.grid_options["grid_variables"]), } # Load it into the grid_options self.grid_options["grid_variables"][grid_variable["name"]] = grid_variable verbose_print( "Added grid variable: {}".format(json.dumps(grid_variable, indent=4)), self.grid_options["verbosity"], 1, ) ################################################### # Return functions ################################################### def return_population_settings(self): """ Function that returns all the options that have been set. Can be combined with json to make a nice file. """ options = { "bse_options": self.bse_options, "grid_options": self.grid_options, "custom_options": self.custom_options, } return options def return_binary_c_version_info(self, parsed=False): """ Function that returns the version information of binary_c """ version_info = binary_c_python_api.return_version_info().strip() if parsed: version_info = parse_binary_c_version_info(version_info) return version_info def return_binary_c_defaults(self): """ Function that returns the defaults of the binary_c version that is used. """ return self.defaults def return_all_info( self, include_population_settings=True, include_binary_c_defaults=True, include_binary_c_version_info=True, include_binary_c_help_all=True, ): """ Function that returns all the information about the population and binary_c """ # all_info = {} # if include_population_settings: population_settings = self.return_population_settings() all_info["population_settings"] = population_settings # if include_binary_c_defaults: binary_c_defaults = self.return_binary_c_defaults() all_info["binary_c_defaults"] = binary_c_defaults if include_binary_c_version_info: binary_c_version_info = return_binary_c_version_info(parsed=True) all_info["binary_c_version_info"] = binary_c_version_info if include_binary_c_help_all: binary_c_help_all_info = get_help_all(print_help=False) all_info["binary_c_help_all"] = binary_c_help_all_info return all_info def export_all_info( self, use_datadir=True, outfile=None, include_population_settings=True, include_binary_c_defaults=True, include_binary_c_version_info=True, include_binary_c_help_all=True, ): """ Function that exports the all_info to a json file TODO: if any of the values in the dicts here is of a not-serializable form, then we need to change that to a string or something so, use a recursive function that goes over the all_info dict and finds those that fit TODO: Fix to write things to the directory. which options do which etc TODO: theres flawed logic here. rewrite this part pls TODO: consider actually just removing the whole 'output to file' part and let the user do this. """ all_info = self.return_all_info( include_population_settings=include_population_settings, include_binary_c_defaults=include_binary_c_defaults, include_binary_c_version_info=include_binary_c_version_info, include_binary_c_help_all=include_binary_c_help_all, ) # Copy dict all_info_cleaned = copy.deepcopy(all_info) if use_datadir: if not self.custom_options.get("base_filename", None): base_name = "simulation_{}".format( datetime.datetime.strftime(datetime.datetime.now(), "%Y%m%d_%H%M%S") ) else: base_name = os.path.splitext(self.custom_options["base_filename"])[0] settings_name = base_name + "_settings.json" # Check directory, make if necessary os.makedirs(self.custom_options["data_dir"], exist_ok=True) settings_fullname = os.path.join( self.custom_options["data_dir"], settings_name ) verbose_print( "Writing settings to {}".format(settings_fullname), self.grid_options["verbosity"], 1, ) # if not outfile.endswith('json'): with open(settings_fullname, "w") as file: file.write( json.dumps( all_info_cleaned, indent=4, default=binaryc_json_serializer ) ) else: verbose_print( "Writing settings to {}".format(outfile), self.grid_options["verbosity"], 1, ) # if not outfile.endswith('json'): with open(outfile, "w") as file: file.write( json.dumps( all_info_cleaned, indent=4, default=binaryc_json_serializer ) ) def set_custom_logging(self): """ Function/routine to set all the custom logging so that the function memory pointer is known to the grid. """ # C_logging_code gets priority of C_autogen_code verbose_print( "Creating and loading custom logging functionality", self.grid_options["verbosity"], 1, ) if self.grid_options["C_logging_code"]: # Generate entire shared lib code around logging lines custom_logging_code = binary_c_log_code( self.grid_options["C_logging_code"], verbose=self.grid_options["verbosity"], ) # Load memory adress ( self.grid_options["custom_logging_func_memaddr"], self.grid_options["custom_logging_shared_library_file"], ) = create_and_load_logging_function( custom_logging_code, verbose=self.grid_options["verbosity"] ) elif self.grid_options["C_auto_logging"]: # Generate real logging code logging_line = autogen_C_logging_code( self.grid_options["C_auto_logging"], verbose=self.grid_options["verbosity"], ) # Generate entire shared lib code around logging lines custom_logging_code = binary_c_log_code( logging_line, verbose=self.grid_options["verbosity"] ) # Load memory adress ( self.grid_options["custom_logging_func_memaddr"], self.grid_options["custom_logging_shared_library_file"], ) = create_and_load_logging_function( custom_logging_code, verbose=self.grid_options["verbosity"] ) ################################################### # Ensemble functions ################################################### def load_persistent_data_memory_dict(self): """ Function that loads a set amount (amt_cores) of persistent data memory adresses to pass to binary_c. """ for thread_nr in self.grid_options["amt_cores"]: persistent_data_memaddr = ( binary_c_python_api.binary_c_return_persistent_data_memaddr() ) self.persistent_data_memory_dict[thread_nr] = persistent_data_memaddr verbose_print( "Created the following dict with persistent memaddresses: {}".format( self.persistent_data_memory_dict ), self.grid_options["verbosity"], 1, ) def free_persistent_data_memory_and_combine_results_and_output(self): """ Function that loads a set amount of persisten data memory adresses to pass to binary_c. TODO: fix the function """ combined_ensemble_json = {} for key in self.persistent_data_memory_dict: persistent_data_memaddr = self.persistent_data_memory_dict[key] verbose_print( "Freeing {} (thread {})and merging output to combined dict".format( persistent_data_memaddr, key ), self.grid_options["verbosity"], 1, ) # Get the output and decode it correctly to get the numbers correct ensemble_json_output = binary_c_python_api.binary_c_free_persistent_data_memaddr_and_return_json_output( persistent_data_memaddr ) parsed_json = json.loads( ensemble_json_output.splitlines()[0][len("ENSEMBLE_JSON ") :], cls=binarycDecoder, ) # Combine the output with the main output combined_ensemble_json = merge_dicts(combined_ensemble_json, parsed_json) # Write results to file. # TODO: Make sure everything is checked beforehand full_output_filename = os.path.join( self.custom_options["data_dir"], self.custom_options["ensemble_output_name"] ) verbose_print( "Writing ensemble output to {}".format(full_output_filename), self.grid_options["verbosity"], 1, ) # Output to dir: with open(full_output_filename, "w") as output_file: output_file.write(json.dumps(combined_ensemble_json, indent=4)) ################################################### # Evolution functions ################################################### def evolve(self): """ Entrypoint function of the whole object. From here, based on the settings, we set up a SLURM or CONDOR grid, or if no setting is given we go straight to evolving the population """ # Check which type: if self.grid_options['slurm'] == 1: # Execute slurm subroutines self.slurm_grid() elif self.grid_options['condor'] == 1: # Execute condor subroutines self.condor_grid() else: # Execute population evolution subroutines self.evolve_population() def _evolve_population(self): """ Function to evolve populations. This handles the setting up, evolving and cleaning up of a population of stars. Choices here are: - to evolve a population via multiprocessing or linearly on 1 core. - to evolve a population via a variable grid, a source file or MC TODO: include options for different ways of generating a population here. """ ## # Prepare code/initialise grid. # set custom logging, set up store_memaddr, build grid code. dry run grid code. self.setup() ## # Evolve systems: via grid_options one can choose to do this linearly, or # multiprocessing method. if ( self.grid_options["evolution_type"] in self.grid_options["evolution_type_options"] ): if self.grid_options["evolution_type"] == "mp": self._evolve_population_mp() elif self.grid_options["evolution_type"] == "linear": self._evolve_population_lin() else: print( "Warning. you chose a wrong option for the grid evolution types.\ Please choose from the following: {}.".format( self.grid_options["evolution_type_options"] ) ) ## # Clean up code: remove files, unset values. self.cleanup() def _evolve_population_mp(self): """ Function to evolve the population with multiprocessing approach. Using pathos to be able to include class-owned functions. """ # TODO: make further use of a queue to handle jobs or at least # get information on the process ids etc # https://stackoverflow.com/questions/10190981/get-a-unique-id-for-worker-in-python-multiprocessing-pool # https://stackoverflow.com/questions/8640367/python-manager-dict-in-multiprocessing/9536888 # for muting values through dicts # https://python-forum.io/Thread-Dynamic-updating-of-a-nested-dictionary-in-multiprocessing-pool # https://stackoverflow.com/questions/28740955/working-with-pathos-multiprocessing-tool-in-python-and # TODO: make good example of how to deal with a result_dict manager = pathos_multiprocess.Manager() self.grid_options["result_dict"] = manager.dict() # Create pool pool = Pool(processes=self.grid_options["amt_cores"]) # Execute # TODO: calculate the chunksize value based on: total starcount and cores used. _ = list( pool.imap_unordered( self.evolve_system_mp, self.yield_system_mp(), chunksize=20 ) ) # Handle clean termination of the whole multiprocessing (making sure there are no zombie # processes (https://en.wikipedia.org/wiki/Zombie_process)) pool.close() pool.join() def _evolve_population_lin(self): """ Function to evolve the population linearly (i.e. 1 core, no multiprocessing methods) """ for i, system in enumerate(self.grid_options["system_generator"](self)): full_system_dict = self.bse_options.copy() full_system_dict.update(system) binary_cmdline_string = self.return_argline(full_system_dict) out = binary_c_python_api.run_system( argstring=binary_cmdline_string, custom_logging_func_memaddr=self.grid_options[ "custom_logging_func_memaddr" ], store_memaddr=self.grid_options["store_memaddr"], population=1, ) self.print_info( i + 1, self.grid_options["total_starcount"], full_system_dict ) if self.grid_options["parse_function"]: self.grid_options["parse_function"](self, out) def _evolve_system_mp(self, binary_cmdline_string): """ Function that the multiprocessing evolution method calls to evolve a system """ out = binary_c_python_api.run_system( argstring=binary_cmdline_string, custom_logging_func_memaddr=self.grid_options[ "custom_logging_func_memaddr" ], store_memaddr=self.grid_options["store_memaddr"], population=1, ) if self.grid_options["parse_function"]: self.grid_options["parse_function"](self, out) def _yield_system_mp(self): """ Function that the multiprocessing evolution method calls to yield systems """ for i, system in enumerate(self.grid_options["system_generator"](self)): full_system_dict = self.bse_options.copy() full_system_dict.update(system) binary_cmdline_string = self.return_argline(full_system_dict) self.print_info( i + 1, self.grid_options["total_starcount"], full_system_dict ) yield binary_cmdline_string print("generator done") # Single system def evolve_single(self, clean_up_custom_logging_files=True): """ Function to run a single system The output of the run gets returned, unless a parse function is given to this function. """ ### Custom logging code: self.set_custom_logging() # Get argument line argline = self.return_argline(self.bse_options) verbose_print("Running {}".format(argline), self.grid_options["verbosity"], 1) # Run system out = binary_c_python_api.run_system( argstring=argline, custom_logging_func_memaddr=self.grid_options[ "custom_logging_func_memaddr" ], store_memaddr=self.grid_options["store_memaddr"], population=0, ) # TODO: add call to function that cleans up the temp customlogging dir, # and unloads the loaded libraries. # TODO: make a switch to turn this off if clean_up_custom_logging_files: self.clean_up_custom_logging(evol_type="single") # Parse if self.grid_options["parse_function"]: return self.grid_options["parse_function"](self, out) return out def _setup(self): """ Function to set up the necessary stuff for the population evolution. The idea is to do all the stuff that is necessary for a population to run. Since we have different methods of running a population, this setup function will do different things depending on different settings # TODO: Make other kinds of populations possible. i.e, read out type of grid, and set up accordingly # TODO: make this function more general. Have it explicitly set the system_generator function """ if not self.grid_options["parse_function"]: print("Error: No parse function set. Aborting run") raise ValueError ####################### ### Custom logging code: self.set_custom_logging() ### Load store self.grid_options["store_memaddr"] = binary_c_python_api.return_store("") ### ensemble: ## Load persistent_data_memaddr if necessary: if self.bse_options["ensemble"] == 1: self.load_persistent_data_memory_dict() ## check the settings: if self.bse_options["ensemble"] == 1: if not self.bse_options["ensemble_defer"] == 1: verbose_print( "Error, if you want to run an ensemble in a population, the output needs to be deferred", self.grid_options["verbosity"], 0, ) raise ValueError # Check which type of population generation if grid_options["population_type_options"] == "grid": ####################### # Dry run and getting starcount self.grid_options["probtot"] = 0 # Put in check if len(self.grid_options["grid_variables"]) == 0: print("Error: you havent defined any grid variables! Aborting") raise ValueError # Set up the grid code with a dry run option to see total probability self.generate_grid_code(dry_run=True) # Load the grid code self.load_grid_function() # Do a dry run self.dry_run() print( "Total starcount for this run will be: {}".format( self.grid_options["total_starcount"] ) ) ####################### # Reset values and prepare the grid function self.grid_options[ "probtot" ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way self.grid_options[ "start_time_evolution" ] = time.time() # Setting start time of grid # self.generate_grid_code(dry_run=False) # self.load_grid_function() # Source file elif grid_options["population_type_options"] == "source_file": ####################### # Dry run and getting starcount self.grid_options["probtot"] = 0 # Load the grid code self.load_source_file_function() # Do a dry run self.dry_run_source_file() print( "Total starcount for this run will be: {}".format( self.grid_options["total_starcount"] ) ) ####################### # Reset values and prepare the grid function self.grid_options[ "probtot" ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way self.grid_options[ "start_time_evolution" ] = time.time() # Setting start time of grid # self.load_source_file_function(dry_run=False) # self.load_grid_function() ####### def _cleanup(self): """ Function that handles all the cleaning up after the grid has been generated and/or run - reset values to 0 - remove grid file - unload grid function/module - remove dry grid file - unload dry grid function/module """ # Output the ensemble if necessary: if self.bse_options["ensemble"] == 1: self.free_persistent_data_memory_and_combine_results_and_output() # Reset values self.grid_options["count"] = 0 self.grid_options["probtot"] = 0 self.grid_options["system_generator"] = None # Remove files # Unload functions # Unload store binary_c_python_api.binary_c_free_store_memaddr( self.grid_options["store_memaddr"] ) ################################################### # Gridcode functions # # Function below are used to run populations with # a variable grid ################################################### def generate_grid_code(self, dry_run=False): """ Function that generates the code from which the population will be made. dry_run: when True, it will return the starcount at the end so that we know what the total amount of systems is. The phasevol values are handled by generating a second array # TODO: Add correct logging everywhere # TODO: add part to handle separation if orbital_period is added. Idea. use default values # for orbital parameters and possibly overwrite those or something. # TODO: add centering center left right for the spacing. # TODO: add sensible description to this function. # TODO: Check whether all the probability and phasevol values are correct. Results in a generated file that contains a system_generator function. """ verbose_print("Generating grid code", self.grid_options["verbosity"], 1) # Some local values code_string = "" depth = 0 indent = " " total_grid_variables = len(self.grid_options["grid_variables"]) # Import packages code_string += "import math\n" code_string += "import numpy as np\n" code_string += "from binarycpython.utils.distribution_functions import *\n" code_string += "from binarycpython.utils.spacing_functions import *\n" code_string += "from binarycpython.utils.useful_funcs import *\n" code_string += "\n\n" # Make the function code_string += "def grid_code(self):\n" # Increase depth depth += 1 # Write some info in the function code_string += ( indent * depth + "# Grid code generated on {}\n".format( datetime.datetime.now().isoformat() ) + indent * depth + "# This function generates the systems that will be evolved with binary_c\n\n" ) # Set some values in the generated code: code_string += indent * depth + "# Setting initial values\n" code_string += indent * depth + "total_starcount = 0\n" code_string += indent * depth + "starcounts = [0 for i in range({})]\n".format( total_grid_variables ) code_string += indent * depth + "probabilities = {}\n" code_string += ( indent * depth + "probabilities_list = [0 for i in range({})]\n".format( total_grid_variables ) ) code_string += ( indent * depth + "probabilities_sum = [0 for i in range({})]\n".format( total_grid_variables ) ) code_string += indent * depth + "parameter_dict = {}\n" code_string += indent * depth + "phasevol = 1\n" code_string += indent * depth + "\n" code_string += indent * depth + "# setting probability lists\n" # Prepare the probability for grid_variable_el in sorted( self.grid_options["grid_variables"].items(), key=lambda x: x[1]["grid_variable_number"], ): # Make probabilities dict grid_variable = grid_variable_el[1] code_string += indent * depth + 'probabilities["{}"] = 0\n'.format( grid_variable["parameter_name"] ) ################################################################################# # Start of code generation ################################################################################# code_string += indent * depth + "\n" # Generate code print("Generating grid code") for loopnr, grid_variable_el in enumerate( sorted( self.grid_options["grid_variables"].items(), key=lambda x: x[1]["grid_variable_number"], ) ): print("Constructing/adding: {}".format(grid_variable_el[0])) grid_variable = grid_variable_el[1] ################################################################################# # Check condition and generate forloop # If the grid variable has a condition, write the check and the action if grid_variable["condition"]: # Add comment code_string += ( indent * depth + "# Condition for {}".format(grid_variable["parameter_name"]) + "\n" ) # Add condition check code_string += ( indent * depth + "if not {}:".format(grid_variable["condition"]) + "\n" ) # Add condition failed action: code_string += ( indent * (depth + 1) + 'print("Condition for {} not met!")'.format( grid_variable["parameter_name"] ) + "\n" ) code_string += indent * (depth + 1) + "raise ValueError" + "\n" # Add some whiteline code_string += indent * (depth + 1) + "\n" ######################### # Setting up the forloop # Add comment for forloop code_string += ( indent * depth + "# for loop for {}".format(grid_variable["parameter_name"]) + "\n" ) code_string += ( indent * depth + "sampled_values_{} = {}".format( grid_variable["name"], grid_variable["spacingfunc"] ) + "\n" ) # TODO: Make clear that the phasevol only works good # if you sample linearly in that thing. code_string += ( indent * depth + "phasevol_{} = sampled_values_{}[1]-sampled_values_{}[0]".format( grid_variable["name"], grid_variable["name"], grid_variable["name"] ) + "\n" ) # # Some print statement # code_string += ( # indent * depth # + "print('phasevol_{}:', phasevol_{})".format(grid_variable["name"], # grid_variable["name"]) # + "\n" # ) # Adding for loop structure code_string += ( indent * depth + "for {} in sampled_values_{}:".format( grid_variable["name"], grid_variable["name"] ) + "\n" ) ######################### # Setting up pre-code and value in some cases # Add pre-code if grid_variable["precode"]: code_string += ( indent * (depth + 1) + "{}".format( grid_variable["precode"].replace("\n", "\n" + indent * (depth)) ) + "\n" ) # Set phasevol code_string += indent * (depth + 1) + "phasevol *= phasevol_{}\n".format( grid_variable["name"], ) ####################### # Probabilities # Calculate probability code_string += indent * (depth + 1) + "\n" code_string += indent * (depth + 1) + "# Setting probabilities\n" code_string += ( indent * (depth + 1) + "d{} = phasevol_{} * {}".format( grid_variable["name"], grid_variable["name"], grid_variable["probdist"], ) + "\n" ) # Saving probability sum code_string += ( indent * (depth + 1) + "probabilities_sum[{}] += d{}".format( grid_variable["grid_variable_number"], grid_variable["name"] ) + "\n" ) if grid_variable["grid_variable_number"] == 0: code_string += ( indent * (depth + 1) + "probabilities_list[0] = d{}".format(grid_variable["name"]) + "\n" ) else: code_string += ( indent * (depth + 1) + "probabilities_list[{}] = probabilities_list[{}] * d{}".format( grid_variable["grid_variable_number"], grid_variable["grid_variable_number"] - 1, grid_variable["name"], ) + "\n" ) ####################### # Increment starcount for this parameter code_string += "\n" code_string += indent * ( depth + 1 ) + "# Increment starcount for {}\n".format(grid_variable["parameter_name"]) code_string += ( indent * (depth + 1) + "starcounts[{}] += 1".format(grid_variable["grid_variable_number"],) + "\n" ) # Add value to dict code_string += ( indent * (depth + 1) + 'parameter_dict["{}"] = {}'.format( grid_variable["parameter_name"], grid_variable["parameter_name"] ) + "\n" ) # Add some space code_string += "\n" # The final parts of the code, where things are returned, are within the deepest loop, # but in some cases code from a higher loop needs to go under it again # SO I think its better to put an ifstatement here that checks # whether this is the last loop. if loopnr == len(self.grid_options["grid_variables"]) - 1: ################################################################################# # Here are the calls to the queuing or other solution. this part is for every system # Add comment code_string += indent * (depth + 1) + "#" * 40 + "\n" code_string += ( indent * (depth + 1) + "# Code below will get evaluated for every generated system\n" ) # Calculate value code_string += ( indent * (depth + 1) + 'probability = self.grid_options["weight"] * probabilities_list[{}]'.format( grid_variable["grid_variable_number"] ) + "\n" ) code_string += ( indent * (depth + 1) + 'repeat_probability = probability / self.grid_options["repeat"]' + "\n" ) code_string += indent * (depth + 1) + "total_starcount += 1\n" # set probability and phasevol values code_string += ( indent * (depth + 1) + 'parameter_dict["{}"] = {}'.format("probability", "probability") + "\n" ) code_string += ( indent * (depth + 1) + 'parameter_dict["{}"] = {}'.format("phasevol", "phasevol") + "\n" ) # Some prints. will be removed # code_string += indent * (depth + 1) + "print(probabilities)\n" # code_string += ( # indent * (depth + 1) + 'print("total_starcount: ", total_starcount)\n' # ) # code_string += indent * (depth + 1) + "print(probability)\n" # Increment total probability code_string += ( indent * (depth + 1) + "self.increment_probtot(probability)\n" ) if not dry_run: # Handling of what is returned, or what is not. # TODO: think of whether this is a good method code_string += indent * (depth + 1) + "yield(parameter_dict)\n" # The below solution might be a good one to add things to specific queues # $self->queue_evolution_code_run($self->{_flexigrid}->{thread_q}, # $system); # If its a dry run, dont do anything with it else: code_string += indent * (depth + 1) + "pass\n" code_string += indent * (depth + 1) + "#" * 40 + "\n" # increment depth depth += 1 depth -= 1 code_string += "\n" # Write parts to write below the part that yield the results. # this has to go in a reverse order: # Here comes the stuff that is put after the deepest nested part that calls returns stuff. for loopnr, grid_variable_el in enumerate( sorted( self.grid_options["grid_variables"].items(), key=lambda x: x[1]["grid_variable_number"], reverse=True, ) ): grid_variable = grid_variable_el[1] code_string += indent * (depth + 1) + "#" * 40 + "\n" code_string += ( indent * (depth + 1) + "# Code below is for finalising the handling of this iteration of the parameter\n" ) # Set phasevol # TODO: fix. this isnt supposed to be the value that we give it here. discuss code_string += indent * (depth + 1) + "phasevol /= phasevol_{}\n".format( grid_variable["name"] ) code_string += indent * (depth + 1) + "\n" depth -= 1 ################ # Finalising print statements # # code_string += indent * (depth + 1) + "\n" code_string += indent * (depth + 1) + "#" * 40 + "\n" code_string += ( indent * (depth + 1) + "print('Grid has handled {} stars'.format(total_starcount))\n" ) code_string += ( indent * (depth + 1) + "print('with a total probability of {}'.format(self.grid_options['probtot']))\n" ) if dry_run: code_string += indent * (depth + 1) + "return total_starcount\n" ################################################################################# # Stop of code generation. Here the code is saved and written # Save the gridcode to the grid_options verbose_print( "Saving grid code to grid_options", self.grid_options["verbosity"], 1 ) self.grid_options["code_string"] = code_string # Write to file gridcode_filename = os.path.join( self.grid_options["tmp_dir"], "example_grid.py" ) self.grid_options["gridcode_filename"] = gridcode_filename verbose_print( "Writing grid code to {}".format(gridcode_filename), self.grid_options["verbosity"], 1, ) with open(gridcode_filename, "w") as file: file.write(code_string) def load_grid_function(self): """ TODO: Update this description Test function to run grid stuff. mostly to test the import """ # Code to load the verbose_print( message="Loading grid code function from {}".format( self.grid_options["gridcode_filename"] ), verbosity=self.grid_options["verbosity"], minimal_verbosity=1, ) spec = importlib.util.spec_from_file_location( "binary_c_python_grid", os.path.join(self.grid_options["gridcode_filename"]), ) grid_file = importlib.util.module_from_spec(spec) spec.loader.exec_module(grid_file) generator = grid_file.grid_code self.grid_options["system_generator"] = generator verbose_print("Grid code loaded", self.grid_options["verbosity"], 1) def dry_run(self): """ Function to dry run the grid and know how many stars it will run Requires the grid to be built as a dry run grid """ system_generator = self.grid_options["system_generator"] total_starcount = system_generator(self) self.grid_options["total_starcount"] = total_starcount def print_info(self, run_number, total_systems, full_system_dict): """ Function to print info about the current system and the progress of the grid. # color info tricks from https://ozzmaker.com/add-colour-to-text-in-python/ https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python """ # Define frequency if self.grid_options["verbosity"] == 1: print_freq = 1 else: print_freq = 10 # Calculate amount of time left # calculate amount of time passed # time_passed = time.time() - self.grid_options["start_time_evolution"] if run_number % print_freq == 0: binary_cmdline_string = self.return_argline(full_system_dict) info_string = "{color_part_1} \ {text_part_1}{end_part_1}{color_part_2} \ {text_part_2}{end_part_2}".format( color_part_1="\033[1;32;41m", text_part_1="{}/{}".format(run_number, total_systems), end_part_1="\033[0m", color_part_2="\033[1;32;42m", text_part_2="{}".format(binary_cmdline_string), end_part_2="\033[0m", ) print(info_string) ################################################### # Montecarlo functions # # Functions below are used to run populations with # Monte carlo ################################################### ################################################### # Population from file functions # # Functions below are used to run populations from # a file containing binary_c calls ################################################### def dry_run_source_file(self): """ Function to go through the source_file and count the amount of lines and the total probability """ system_generator = self.grid_options["system_generator"] total_starcount = 0 total_probability = 0 contains_probability = False for line in system_generator: total_starcount += 1 total_starcount = system_generator(self) self.grid_options["total_starcount"] = total_starcount def load_source_file(self, check=False): """ Function that loads the source_file that contains a binary_c calls """ if not os.path.isfile(self.grid_options["source_file_filename"]): verbose_print("Source file doesnt exist", self.grid_options["verbosity"], 0) verbose_print( message="Loading source file from {}".format( self.grid_options["gridcode_filename"] ), verbosity=self.grid_options["verbosity"], minimal_verbosity=1, ) # We can choose to perform a check on the sourcefile, which checks if the lines start with 'binary_c' if check: source_file_check_filehandle = open(self.grid_options["source_file_filename"], 'r') for line in source_file_check_filehandle: if not line.startswith('binary_c'): failed = True break if failed: verbose_print("Error, sourcefile contains lines that do not start with binary_c", self.grid_options["verbosity"], 0) raise ValueError source_file_filehandle = open(self.grid_options["source_file_filename"], 'r') self.grid_options["system_generator"] = source_file_filehandle verbose_print("Source file loaded", self.grid_options["verbosity"], 1) def dict_from_line_source_file(self): """ Function that creates a dict from a binary_c argline """ if line.startswith("binary_c "): line = line.replace("binary_c ", "") split_line = line.split() arg_dict = {} for i in range(0, len(split_line), 2): if "." in split_line[i+1]: arg_dict[split_line[i]] = float(split_line[i + 1]) else: arg_dict[split_line[i]] = int(split_line[i + 1]) return arg_dict ################################################### # SLURM functions # # subroutines to run SLURM grids ################################################### def slurm_grid(self): """ Main function that manages the SLURM setup. Has three stages: - setup - evolve - join Which stage is used is determined by the value of grid_options['slurm_command']: <empty>: the function will know its the user that executed the script and it will set up the necessary condor stuff 'evolve': evolve_population is called to evolve the population of stars 'join': We will attempt to join the output """ # TODO: Put in function slurm_version = get_slurm_version() if not slurm_version: verbose_print("SLURM: Error: No installation of slurm found", self.grid_options['verbosity'], 0) else: major_version = int(slurm_version.split(".")[0]) minor_version = int(slurm_version.split(".")[1]) # if (major_version == 8) and (minor_version > 4): # verbose_print("SLURM: Found version {} which is new enough".format(slurm_version), self.grid_options['verbosity'], 0) if (major_version > 17): verbose_print("SLURM: Found version {} which is new enough".format(slurm_version), self.grid_options['verbosity'], 0) else: verbose_print("SLURM: Found version {} which is too old (we require 17+)".format(slurm_version), self.grid_options['verbosity'], 0) verbose_print("SLURM: Running slurm grid. command={}".format(self.grid_options['slurm_command']), self.grid_options['verbosity'], 1) if not self.grid_options['slurm_command']: # Setting up verbose_print("SLURM: Main controller script. Setting up", self.grid_options['verbosity'], 1) # Check settings: # TODO: check settings # Set up working directories: verbose_print("SLURM: creating working directories", self.grid_options['verbosity'], 1) create_directories_hpc(self.grid_options['slurm_dir']) # Create command python_details = get_python_details() scriptname = path_of_calling_script() command = "".join([ "{}".format(python_details['executable']), "{}".format(scriptname), "offset=$jobarrayindex", "modulo={}".format(self.grid_options['slurm_njobs']), "vb={}".format(self.grid_options['verbosity']), "slurm_jobid=$jobid", "slurm_jobarrayindex=$jobarrayindex", "slurm_jobname='binary_grid_'$jobid'.'$jobarrayindex", "slurm_njobs={}".format(self.grid_options['slurm_njobs']), "slurm_dir={}".format(self.grid_options['slurm_dir']) ]) # Create SLURM_DIR script: # TODO: create the condor script. slurm_script_options = {} slurm_script_options['n'] = self.grid_options['slurm_njobs'] slurm_script_options['njobs'] = self.grid_options['slurm_njobs'] slurm_script_options['dir'] = self.grid_options['slurm_dir'] slurm_script_options['memory'] = self.grid_options['slurm_memory'] slurm_script_options['working_dir'] = self.grid_options['slurm_working_dir'] slurm_script_options['command'] = self.grid_options['command'] slurm_script_options['streams'] = self.grid_options['streams'] print(slurm_script_options) elif self.grid_options['slurm_command'] == 'evolve': # Part to evolve the population. # TODO: decide how many CPUs verbose_print("SLURM: Evolving population", self.grid_options['verbosity'], 1) # self._evolve_population() elif self.grid_options['slurm_command'] == 'join': # Joining the output. verbose_print("SLURM: Joining results", self.grid_options['verbosity'], 1) ################################################### # CONDOR functions # # subroutines to run CONDOR grids ################################################### def condor_grid(self): """ Main function that manages the CONDOR setup. Has three stages: - setup - evolve - join Which stage is used is determined by the value of grid_options['condor_command']: <empty>: the function will know its the user that executed the script and it will set up the necessary condor stuff 'evolve': evolve_population is called to evolve the population of stars 'join': We will attempt to join the output """ # TODO: Put in function condor_version = get_condor_version() if not condor_version: verbose_print("CONDOR: Error: No installation of condor found", self.grid_options['verbosity'], 0) else: major_version = int(condor_version.split(".")[0]) minor_version = int(condor_version.split(".")[1]) if (major_version == 8) and (minor_version > 4): verbose_print("CONDOR: Found version {} which is new enough".format(condor_version), self.grid_options['verbosity'], 0) elif (major_version > 9): verbose_print("CONDOR: Found version {} which is new enough".format(condor_version), self.grid_options['verbosity'], 0) else: verbose_print("CONDOR: Found version {} which is too old (we require 8.3/8.4+)".format(condor_version), self.grid_options['verbosity'], 0) verbose_print("Running Condor grid. command={}".format(self.grid_options['condor_command']), self.grid_options['verbosity'], 1) if not self.grid_options['condor_command']: # Setting up verbose_print("CONDOR: Main controller script. Setting up", self.grid_options['verbosity'], 1) # Check settings: # TODO: check settings # Set up working directories: verbose_print("CONDOR: creating working directories", verbosity, minimal_verbosity) create_directories_hpc(self.grid_options['condor_dir']) # Create command python_details = get_python_details() scriptname = path_of_calling_script() # command = "".join([ # "{}".python_details['executable'], # "{}".scriptname, # "offset=$jobarrayindex", # "modulo={}".format(self.grid_options['condor_njobs']), # "vb={}".format(self.grid_options['verbosity']) # "results_hash_dumpfile=$self->{_grid_options}{slurm_dir}/results/$jobid.$jobarrayindex", # 'slurm_jobid='.$jobid, # 'slurm_jobarrayindex='.$jobarrayindex, # 'slurm_jobname=binary_grid_'.$jobid.'.'.$jobarrayindex, # "slurm_njobs=$njobs", # "slurm_dir=$self->{_grid_options}{slurm_dir}", # ); # Create CONDOR script: # TODO: create the condor script. condor_script_options = {} # condor_script_options['n'] = condor_script_options['njobs'] = self.grid_options['condor_njobs'] condor_script_options['dir'] = self.grid_options['condor_dir'] condor_script_options['memory'] = self.grid_options['condor_memory'] condor_script_options['working_dir'] = self.grid_options['condor_working_dir'] condor_script_options['command'] = self.grid_options['command'] condor_script_options['streams'] = self.grid_options['streams'] elif self.grid_options['condor_command'] == 'evolve': # Part to evolve the population. # TODO: decide how many CPUs verbose_print("CONDOR: Evolving population", self.grid_options['verbosity'], 1) # self._evolve_population() elif self.grid_options['condor_command'] == 'join': # Joining the output. verbose_print("CONDOR: Joining results", self.grid_options['verbosity'], 1) pass ################################################### # Unordered functions # # Functions that arent ordered yet ################################################### def generate_population_arglines_file(self, output_file): """ Function to generate a file that contains all the argument lines that would be given to binary_c if the population had been run TODO: Fix this function """ pass def write_binary_c_calls_to_file( self, output_dir=None, output_filename=None, include_defaults=False ): """ Function that loops over the gridcode and writes the generated parameters to a file. In the form of a commandline call Only useful when you have a variable grid as system_generator. MC wouldnt be that useful Also, make sure that in this export there are the basic parameters like m1,m2,sep, orb-per, ecc, probability etc. On default this will write to the datadir, if it exists # warning; dont use yet. not fully tested. """ # Check if there is no compiled grid yet. If not, lets try to build it first. if not self.grid_options["system_generator"]: ## check the settings: if self.bse_options.get("ensemble", None): if self.bse_options['ensemble'] == 1: if not self.bse_options["ensemble_defer"] == 1: verbose_print( "Error, if you want to run an ensemble in a population, the output needs to be deferred", self.grid_options["verbosity"], 0, ) raise ValueError # Put in check if len(self.grid_options["grid_variables"]) == 0: print("Error: you havent defined any grid variables! Aborting") raise ValueError # self.generate_grid_code(dry_run=False) # self.load_grid_function() if self.grid_options["system_generator"]: # Check if there is an output dir configured if self.custom_options.get("data_dir", None): binary_c_calls_output_dir = self.custom_options["data_dir"] # otherwise check if theres one passed to the function else: if not output_dir: print( "Error. No data_dir configured and you gave no output_dir. Aborting" ) raise ValueError binary_c_calls_output_dir = output_dir # check if theres a filename passed to the function if output_filename: binary_c_calls_filename = output_filename # otherwise use default value else: binary_c_calls_filename = "binary_c_calls.txt" binary_c_calls_full_filename = os.path.join( binary_c_calls_output_dir, binary_c_calls_filename ) print("Writing binary_c calls to {}".format(binary_c_calls_full_filename)) # Write to file with open(binary_c_calls_full_filename, "w") as file: # Get defaults and clean them, then overwrite them with the set values. if include_defaults: # TODO: make sure that the defaults here are cleaned up properly cleaned_up_defaults = self.cleaned_up_defaults full_system_dict = cleaned_up_defaults.copy() full_system_dict.update(self.bse_options.copy()) else: full_system_dict = self.bse_options.copy() for system in self.grid_options["system_generator"](self): # update values with current system values full_system_dict.update(system) binary_cmdline_string = self.return_argline(full_system_dict) file.write(binary_cmdline_string + "\n") else: print("Error. No grid function found!") raise ValueError def cleanup_defaults(self): """ Function to clean up the default values: from a dictionary, removes the entries that have the following values: - "NULL" - "" - "Function" Uses the function from utils.functions TODO: Rethink this functionality. seems a bit double, could also be just outside of the class """ binary_c_defaults = self.return_binary_c_defaults().copy() cleaned_dict = filter_arg_dict(binary_c_defaults) return cleaned_dict def clean_up_custom_logging(self, evol_type): """ Function to clean up the custom logging. Has two types: 'single': - removes the compiled shared library (which name is stored in grid_options['custom_logging_shared_library_file']) - TODO: unloads/frees the memory allocated to that shared library (which is stored in grid_options['custom_logging_func_memaddr']) - sets both to None 'multiple': - TODO: make this and design this """ if evol_type == "single": verbose_print( "Cleaning up the custom logging stuff. type: single", self.grid_options["verbosity"], 1, ) # TODO: Unset custom logging code # TODO: Unset function memory adress # print(self.grid_options["custom_logging_func_memaddr"]) # remove shared library files if self.grid_options["custom_logging_shared_library_file"]: remove_file( self.grid_options["custom_logging_shared_library_file"], self.grid_options["verbosity"], ) if evol_type == "population": verbose_print( "Cleaning up the custom logging stuffs. type: population", self.grid_options["verbosity"], 1, ) # TODO: make sure that these also work. not fully sure if necessary tho. # whether its a single file, or a dict of files/memaddresses if evol_type == "MC": pass def increment_probtot(self, prob): """ Function to add to the total probability """ self.grid_options["probtot"] += prob def increment_count(self): """ Function to add to the total amount of stars """ self.grid_options["count"] += 1 def set_loggers(self): """ Function to set the loggers for the execution of the grid """ # Set logfile binary_c_logfile = self.grid_options["log_file"] # Create directory os.makedirs(os.path.dirname(binary_c_logfile), exist_ok=True) # Set up logger self.logger = logging.getLogger("binary_c_python_logger") self.logger.setLevel(self.grid_options["verbosity"]) # Reset handlers self.logger.handlers = [] # Set formatting of output log_formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) # Make and add filehandlers # make handler for output to file handler_file = logging.FileHandler(filename=os.path.join(binary_c_logfile)) handler_file.setFormatter(log_formatter) handler_file.setLevel(logging.INFO) # Make handler for output to stdout handler_stdout = logging.StreamHandler(sys.stdout) handler_stdout.setFormatter(log_formatter) handler_stdout.setLevel(logging.INFO) # Add the loggers self.logger.addHandler(handler_file) self.logger.addHandler(handler_stdout) # def join_result_dicts(self): # """ # Function to join the result dictionaries # """ ################################################################################################