diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index 3f491f03d68c3bee1b300f45e769ebd667b1b4f7..719c51ba08d1b330a8e53873bf4a954cb2e7944f 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -40,6 +40,14 @@ from binarycpython.utils.functions import ( binarycDecoder, merge_dicts, ) +from binarycpython.utils.hpc_functions import ( + get_condor_version, + get_slurm_version, + create_directories_hpc, + path_of_calling_script, + get_python_details, +) + import binary_c_python_api @@ -54,11 +62,12 @@ import binary_c_python_api # TODO: add functionality to return the ensemble_list # TODO: change the grid_options dict structure so that there is room for descriptions # TODO: consider spreading the functions over more files. +# TODO: indicate private or pbulic subroutines +# TODO: make explicit comparison to perl # Make this function also an API call. Doest seem to get written to a buffer # that is stored into a python object. rather its just written to stdout - class Population: """ Population Object. Contains all the necessary functions to set up, run and process a @@ -107,15 +116,6 @@ class Population: # I do however suggest everyone to export the binary_c defaults to a file, so that you know # exactly which values were the defaults. - def set_bse_option(self, key, arg): - """ - Setter for the BSE options. - - # TODO: Put a check here that compares it to the defaults and says something - """ - - self.bse_options[key] = arg - def set(self, **kwargs): """ Function to set the values of the population. This is the preferred method to set values @@ -181,6 +181,15 @@ class Population: ) self.custom_options[key] = kwargs[key] + def _set_bse_option(self, key, arg): + """ + Setter for the BSE options. + + # TODO: Put a check here that compares it to the defaults and says something + """ + + self.bse_options[key] = arg + def parse_cmdline(self): """ Function to handle settings values via the command line: @@ -240,16 +249,6 @@ class Population: argline = argline.strip() return argline - def generate_population_arglines_file(self, output_file): - """ - Function to generate a file that contains all the argument lines that would be given to - binary_c if the population had been run - - TODO: Fix this function - """ - - pass - def add_grid_variable( self, name, @@ -263,11 +262,11 @@ class Population: precode=None, condition=None, ): - """spec + """ Function to add grid variables to the grid_options. TODO: Fix this complex function. - TODO: update the descriptiontext + TODO: update the description The execution of the grid generation will be through a nested forloop, and will rely heavily on the eval() functionality of python. Which, in terms of safety is @@ -521,85 +520,6 @@ class Population: custom_logging_code, verbose=self.grid_options["verbosity"] ) - ################################################### - # Sourcefile functions - ################################################### - - def load_source_file(self, check=False): - """ - Function that loads the source_file that contains a binary_c calls - """ - - if not os.path.isfile(self.grid_options["source_file_filename"]): - verbose_print("Source file doesnt exist", self.grid_options["verbosity"], 0) - - verbose_print( - message="Loading source file from {}".format( - self.grid_options["gridcode_filename"] - ), - verbosity=self.grid_options["verbosity"], - minimal_verbosity=1, - ) - - # We can choose to perform a check on the sourcefile, which checks if the lines start with 'binary_c' - if check: - source_file_check_filehandle = open(self.grid_options["source_file_filename"], 'r') - for line in source_file_check_filehandle: - if not line.startswith('binary_c') - failed = True - break - if failed: - verbose_print("Error, sourcefile contains lines that do not start with binary_c", self.grid_options["verbosity"], 0) - raise ValueError - - source_file_filehandle = open(self.grid_options["source_file_filename"], 'r') - - self.grid_options["system_generator"] = source_file_filehandle - - verbose_print("Source file loaded", self.grid_options["verbosity"], 1) - - def dict_from_line_source_file(self): - """ - Function that creates a dict from a binary_c argline - """ - - if line.startswith("binary_c "): - line = line.replace("binary_c ", "") - - split_line = line.split() - arg_dict = {} - - for i in range(0, len(split_line), 2): - if "." in split_line[i+1]: - arg_dict[split_line[i]] = float(split_line[i + 1]) - else: - arg_dict[split_line[i]] = int(split_line[i + 1]) - - return arg_dict - - def dry_run_source_file(self): - """ - Function to go through the source_file and count the amount of lines and the total probability - """ - - system_generator = self.grid_options["system_generator"] - - total_starcount = 0 - total_probability = 0 - - contains_probability = False - - for line in system_generator: - - - - total_starcount += 1 - - - total_starcount = system_generator(self) - self.grid_options["total_starcount"] = total_starcount - - ################################################### # Ensemble functions ################################################### @@ -675,157 +595,126 @@ class Population: # Evolution functions ################################################### - def setup(self): + def evolve(self): """ - Function to set up the necessary stuff for the population evolution. - - The idea is to do all the stuff that is necessary for a population to run. - Since we have different methods of running a population, this setup function - will do different things depending on different settings - - # TODO: Make other kinds of populations possible. i.e, read out type of grid, - and set up accordingly - - # TODO: make this function more general. Have it explicitly set the system_generator - function + Entrypoint function of the whole object. From here, based on the settings, + we set up a SLURM or CONDOR grid, or if no setting is given we go straight + to evolving the population """ - if not self.grid_options["parse_function"]: - print("Error: No parse function set. Aborting run") - raise ValueError - - ####################### - ### Custom logging code: - self.set_custom_logging() - - ### Load store - self.grid_options["store_memaddr"] = binary_c_python_api.return_store("") - - ### ensemble: - ## Load persistent_data_memaddr if necessary: - if self.bse_options["ensemble"] == 1: - self.load_persistent_data_memory_dict() - - ## check the settings: - if self.bse_options["ensemble"] == 1: - if not self.bse_options["ensemble_defer"] == 1: - verbose_print( - "Error, if you want to run an ensemble in a population, the output needs to be deferred", - self.grid_options["verbosity"], - 0, - ) - raise ValueError + # Check which type: + if self.grid_options['slurm'] == 1: + # Execute slurm subroutines + self.slurm_grid() - # Check which type of population generation - if grid_options["population_type_options"] == "grid": - ####################### - # Dry run and getting starcount - self.grid_options["probtot"] = 0 + elif self.grid_options['condor'] == 1: + # Execute condor subroutines + self.condor_grid() - # Put in check - if len(self.grid_options["grid_variables"]) == 0: - print("Error: you havent defined any grid variables! Aborting") - raise ValueError + else: + # Execute population evolution subroutines + self.evolve_population() - # Set up the grid code with a dry run option to see total probability - self.generate_grid_code(dry_run=True) + def _evolve_population(self): + """ + Function to evolve populations. This handles the setting up, evolving + and cleaning up of a population of stars. - # Load the grid code - self.load_grid_function() + Choices here are: + - to evolve a population via multiprocessing or linearly on 1 core. + - to evolve a population via a variable grid, a source file or MC + + TODO: include options for different ways of generating a population here. + """ - # Do a dry run - self.dry_run() + ## + # Prepare code/initialise grid. + # set custom logging, set up store_memaddr, build grid code. dry run grid code. + self.setup() + ## + # Evolve systems: via grid_options one can choose to do this linearly, or + # multiprocessing method. + if ( + self.grid_options["evolution_type"] + in self.grid_options["evolution_type_options"] + ): + if self.grid_options["evolution_type"] == "mp": + self._evolve_population_mp() + elif self.grid_options["evolution_type"] == "linear": + self._evolve_population_lin() + else: print( - "Total starcount for this run will be: {}".format( - self.grid_options["total_starcount"] + "Warning. you chose a wrong option for the grid evolution types.\ + Please choose from the following: {}.".format( + self.grid_options["evolution_type_options"] ) ) - ####################### - # Reset values and prepare the grid function - self.grid_options[ - "probtot" - ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way - self.grid_options[ - "start_time_evolution" - ] = time.time() # Setting start time of grid - - # - self.generate_grid_code(dry_run=False) + ## + # Clean up code: remove files, unset values. + self.cleanup() - # - self.load_grid_function() + def _evolve_population_mp(self): + """ + Function to evolve the population with multiprocessing approach. + Using pathos to be able to include class-owned functions. + """ - # Source file - elif grid_options["population_type_options"] == "source_file": - ####################### - # Dry run and getting starcount - self.grid_options["probtot"] = 0 + # TODO: make further use of a queue to handle jobs or at least + # get information on the process ids etc + # https://stackoverflow.com/questions/10190981/get-a-unique-id-for-worker-in-python-multiprocessing-pool + # https://stackoverflow.com/questions/8640367/python-manager-dict-in-multiprocessing/9536888 + # for muting values through dicts + # https://python-forum.io/Thread-Dynamic-updating-of-a-nested-dictionary-in-multiprocessing-pool + # https://stackoverflow.com/questions/28740955/working-with-pathos-multiprocessing-tool-in-python-and - # Load the grid code - self.load_source_file_function() + # TODO: make good example of how to deal with a result_dict + manager = pathos_multiprocess.Manager() + self.grid_options["result_dict"] = manager.dict() - # Do a dry run - self.dry_run_source_file() + # Create pool + pool = Pool(processes=self.grid_options["amt_cores"]) - print( - "Total starcount for this run will be: {}".format( - self.grid_options["total_starcount"] - ) + # Execute + # TODO: calculate the chunksize value based on: total starcount and cores used. + _ = list( + pool.imap_unordered( + self.evolve_system_mp, self.yield_system_mp(), chunksize=20 ) + ) - ####################### - # Reset values and prepare the grid function - self.grid_options[ - "probtot" - ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way - self.grid_options[ - "start_time_evolution" - ] = time.time() # Setting start time of grid - - # - self.load_source_file_function(dry_run=False) - - # - self.load_grid_function() - - + # Handle clean termination of the whole multiprocessing (making sure there are no zombie + # processes (https://en.wikipedia.org/wiki/Zombie_process)) + pool.close() + pool.join() + def _evolve_population_lin(self): + """ + Function to evolve the population linearly (i.e. 1 core, no multiprocessing methods) + """ + for i, system in enumerate(self.grid_options["system_generator"](self)): + full_system_dict = self.bse_options.copy() + full_system_dict.update(system) - ####### - - def cleanup(self): - """ - Function that handles all the cleaning up after the grid has been generated and/or run - - - reset values to 0 - - remove grid file - - unload grid function/module - - remove dry grid file - - unload dry grid function/module - """ - - # Output the ensemble if necessary: - if self.bse_options["ensemble"] == 1: - self.free_persistent_data_memory_and_combine_results_and_output() - - # Reset values - self.grid_options["count"] = 0 - self.grid_options["probtot"] = 0 - self.grid_options["system_generator"] = None - - # Remove files - - # Unload functions + binary_cmdline_string = self.return_argline(full_system_dict) + out = binary_c_python_api.run_system( + argstring=binary_cmdline_string, + custom_logging_func_memaddr=self.grid_options[ + "custom_logging_func_memaddr" + ], + store_memaddr=self.grid_options["store_memaddr"], + population=1, + ) + self.print_info( + i + 1, self.grid_options["total_starcount"], full_system_dict + ) - # Unload store - binary_c_python_api.binary_c_free_store_memaddr( - self.grid_options["store_memaddr"] - ) + if self.grid_options["parse_function"]: + self.grid_options["parse_function"](self, out) - def evolve_system_mp(self, binary_cmdline_string): + def _evolve_system_mp(self, binary_cmdline_string): """ Function that the multiprocessing evolution method calls to evolve a system """ @@ -842,7 +731,7 @@ class Population: if self.grid_options["parse_function"]: self.grid_options["parse_function"](self, out) - def yield_system_mp(self): + def _yield_system_mp(self): """ Function that the multiprocessing evolution method calls to yield systems """ @@ -860,6 +749,7 @@ class Population: print("generator done") + # Single system def evolve_single(self, clean_up_custom_logging_files=True): """ Function to run a single system @@ -896,98 +786,150 @@ class Population: return self.grid_options["parse_function"](self, out) return out - def evolve_population_mp(self): + def _setup(self): """ - Function to evolve the population with multiprocessing approach. - Using pathos to be able to include class-owned functions. + Function to set up the necessary stuff for the population evolution. + + The idea is to do all the stuff that is necessary for a population to run. + Since we have different methods of running a population, this setup function + will do different things depending on different settings + + # TODO: Make other kinds of populations possible. i.e, read out type of grid, + and set up accordingly + + # TODO: make this function more general. Have it explicitly set the system_generator + function """ - # TODO: make further use of a queue to handle jobs or at least - # get information on the process ids etc - # https://stackoverflow.com/questions/10190981/get-a-unique-id-for-worker-in-python-multiprocessing-pool - # https://stackoverflow.com/questions/8640367/python-manager-dict-in-multiprocessing/9536888 - # for muting values through dicts - # https://python-forum.io/Thread-Dynamic-updating-of-a-nested-dictionary-in-multiprocessing-pool - # https://stackoverflow.com/questions/28740955/working-with-pathos-multiprocessing-tool-in-python-and + if not self.grid_options["parse_function"]: + print("Error: No parse function set. Aborting run") + raise ValueError - # TODO: make good example of how to deal with a result_dict - manager = pathos_multiprocess.Manager() - self.grid_options["result_dict"] = manager.dict() + ####################### + ### Custom logging code: + self.set_custom_logging() - # Create pool - pool = Pool(processes=self.grid_options["amt_cores"]) + ### Load store + self.grid_options["store_memaddr"] = binary_c_python_api.return_store("") - # Execute - # TODO: calculate the chunksize value based on: total starcount and cores used. - _ = list( - pool.imap_unordered( - self.evolve_system_mp, self.yield_system_mp(), chunksize=20 - ) - ) + ### ensemble: + ## Load persistent_data_memaddr if necessary: + if self.bse_options["ensemble"] == 1: + self.load_persistent_data_memory_dict() - # Handle clean termination of the whole multiprocessing (making sure there are no zombie - # processes (https://en.wikipedia.org/wiki/Zombie_process)) - pool.close() - pool.join() + ## check the settings: + if self.bse_options["ensemble"] == 1: + if not self.bse_options["ensemble_defer"] == 1: + verbose_print( + "Error, if you want to run an ensemble in a population, the output needs to be deferred", + self.grid_options["verbosity"], + 0, + ) + raise ValueError - def evolve_population_lin(self): - """ - Function to evolve the population linearly (i.e. 1 core, no multiprocessing) - """ + # Check which type of population generation + if grid_options["population_type_options"] == "grid": + ####################### + # Dry run and getting starcount + self.grid_options["probtot"] = 0 - for i, system in enumerate(self.grid_options["system_generator"](self)): - full_system_dict = self.bse_options.copy() - full_system_dict.update(system) + # Put in check + if len(self.grid_options["grid_variables"]) == 0: + print("Error: you havent defined any grid variables! Aborting") + raise ValueError - binary_cmdline_string = self.return_argline(full_system_dict) - out = binary_c_python_api.run_system( - argstring=binary_cmdline_string, - custom_logging_func_memaddr=self.grid_options[ - "custom_logging_func_memaddr" - ], - store_memaddr=self.grid_options["store_memaddr"], - population=1, - ) - self.print_info( - i + 1, self.grid_options["total_starcount"], full_system_dict + # Set up the grid code with a dry run option to see total probability + self.generate_grid_code(dry_run=True) + + # Load the grid code + self.load_grid_function() + + # Do a dry run + self.dry_run() + + print( + "Total starcount for this run will be: {}".format( + self.grid_options["total_starcount"] + ) ) - if self.grid_options["parse_function"]: - self.grid_options["parse_function"](self, out) + ####################### + # Reset values and prepare the grid function + self.grid_options[ + "probtot" + ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way + self.grid_options[ + "start_time_evolution" + ] = time.time() # Setting start time of grid - def evolve_population(self): - """ - Function to evolve populations. This is the main function. Handles the setting up, evolving - and cleaning up of a population of stars. - """ + # + self.generate_grid_code(dry_run=False) - ## - # Prepare code/initialise grid. - # set custom logging, set up store_memaddr, build grid code. dry run grid code. - self.setup() + # + self.load_grid_function() + + # Source file + elif grid_options["population_type_options"] == "source_file": + ####################### + # Dry run and getting starcount + self.grid_options["probtot"] = 0 + + # Load the grid code + self.load_source_file_function() + + # Do a dry run + self.dry_run_source_file() - ## - # Evolve systems: via grid_options one can choose to do this linearly, or - # multiprocessing method. - if ( - self.grid_options["evolution_type"] - in self.grid_options["evolution_type_options"] - ): - if self.grid_options["evolution_type"] == "mp": - self.evolve_population_mp() - elif self.grid_options["evolution_type"] == "linear": - self.evolve_population_lin() - else: print( - "Warning. you chose a wrong option for the grid evolution types.\ - Please choose from the following: {}.".format( - self.grid_options["evolution_type_options"] + "Total starcount for this run will be: {}".format( + self.grid_options["total_starcount"] ) ) - ## - # Clean up code: remove files, unset values. - self.cleanup() + ####################### + # Reset values and prepare the grid function + self.grid_options[ + "probtot" + ] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way + self.grid_options[ + "start_time_evolution" + ] = time.time() # Setting start time of grid + + # + self.load_source_file_function(dry_run=False) + + # + self.load_grid_function() + ####### + + def _cleanup(self): + """ + Function that handles all the cleaning up after the grid has been generated and/or run + + - reset values to 0 + - remove grid file + - unload grid function/module + - remove dry grid file + - unload dry grid function/module + """ + + # Output the ensemble if necessary: + if self.bse_options["ensemble"] == 1: + self.free_persistent_data_memory_and_combine_results_and_output() + + # Reset values + self.grid_options["count"] = 0 + self.grid_options["probtot"] = 0 + self.grid_options["system_generator"] = None + + # Remove files + + # Unload functions + + # Unload store + binary_c_python_api.binary_c_free_store_memaddr( + self.grid_options["store_memaddr"] + ) ################################################### # Gridcode functions @@ -1005,8 +947,6 @@ class Population: The phasevol values are handled by generating a second array - - # DONE: make a generator for this. # TODO: Add correct logging everywhere # TODO: add part to handle separation if orbital_period is added. Idea. use default values # for orbital parameters and possibly overwrite those or something. @@ -1486,12 +1426,310 @@ class Population: # a file containing binary_c calls ################################################### + def dry_run_source_file(self): + """ + Function to go through the source_file and count the amount of lines and the total probability + """ + + system_generator = self.grid_options["system_generator"] + + total_starcount = 0 + total_probability = 0 + + contains_probability = False + + for line in system_generator: + total_starcount += 1 + + total_starcount = system_generator(self) + self.grid_options["total_starcount"] = total_starcount + + def load_source_file(self, check=False): + """ + Function that loads the source_file that contains a binary_c calls + """ + + if not os.path.isfile(self.grid_options["source_file_filename"]): + verbose_print("Source file doesnt exist", self.grid_options["verbosity"], 0) + + verbose_print( + message="Loading source file from {}".format( + self.grid_options["gridcode_filename"] + ), + verbosity=self.grid_options["verbosity"], + minimal_verbosity=1, + ) + + # We can choose to perform a check on the sourcefile, which checks if the lines start with 'binary_c' + if check: + source_file_check_filehandle = open(self.grid_options["source_file_filename"], 'r') + for line in source_file_check_filehandle: + if not line.startswith('binary_c'): + failed = True + break + if failed: + verbose_print("Error, sourcefile contains lines that do not start with binary_c", self.grid_options["verbosity"], 0) + raise ValueError + + source_file_filehandle = open(self.grid_options["source_file_filename"], 'r') + + self.grid_options["system_generator"] = source_file_filehandle + + verbose_print("Source file loaded", self.grid_options["verbosity"], 1) + + def dict_from_line_source_file(self): + """ + Function that creates a dict from a binary_c argline + """ + + if line.startswith("binary_c "): + line = line.replace("binary_c ", "") + + split_line = line.split() + arg_dict = {} + + for i in range(0, len(split_line), 2): + if "." in split_line[i+1]: + arg_dict[split_line[i]] = float(split_line[i + 1]) + else: + arg_dict[split_line[i]] = int(split_line[i + 1]) + + return arg_dict + + + + ################################################### + # SLURM functions + # + # subroutines to run SLURM grids + ################################################### + + def slurm_grid(self): + """ + Main function that manages the SLURM setup. + + Has three stages: + + - setup + - evolve + - join + + Which stage is used is determined by the value of grid_options['slurm_command']: + + <empty>: the function will know its the user that executed the script and + it will set up the necessary condor stuff + + 'evolve': evolve_population is called to evolve the population of stars + + 'join': We will attempt to join the output + """ + + # TODO: Put in function + slurm_version = get_slurm_version() + if not slurm_version: + verbose_print("SLURM: Error: No installation of slurm found", self.grid_options['verbosity'], 0) + else: + major_version = int(slurm_version.split(".")[0]) + minor_version = int(slurm_version.split(".")[1]) + + # if (major_version == 8) and (minor_version > 4): + # verbose_print("SLURM: Found version {} which is new enough".format(slurm_version), self.grid_options['verbosity'], 0) + if (major_version > 17): + verbose_print("SLURM: Found version {} which is new enough".format(slurm_version), self.grid_options['verbosity'], 0) + else: + verbose_print("SLURM: Found version {} which is too old (we require 17+)".format(slurm_version), self.grid_options['verbosity'], 0) + + verbose_print("SLURM: Running slurm grid. command={}".format(self.grid_options['slurm_command']), self.grid_options['verbosity'], 1) + if not self.grid_options['slurm_command']: + # Setting up + verbose_print("SLURM: Main controller script. Setting up", self.grid_options['verbosity'], 1) + + # Check settings: + # TODO: check settings + + # Set up working directories: + verbose_print("SLURM: creating working directories", self.grid_options['verbosity'], 1) + create_directories_hpc(self.grid_options['slurm_dir']) + + # Create command + python_details = get_python_details() + scriptname = path_of_calling_script() + + command = "".join([ + "{}".format(python_details['executable']), + "{}".format(scriptname), + "offset=$jobarrayindex", + "modulo={}".format(self.grid_options['slurm_njobs']), + "vb={}".format(self.grid_options['verbosity']), + "slurm_jobid=$jobid", + "slurm_jobarrayindex=$jobarrayindex", + "slurm_jobname='binary_grid_'$jobid'.'$jobarrayindex", + "slurm_njobs={}".format(self.grid_options['slurm_njobs']), + "slurm_dir={}".format(self.grid_options['slurm_dir']) + ]) + + # Create SLURM_DIR script: + # TODO: create the condor script. + slurm_script_options = {} + slurm_script_options['n'] = self.grid_options['slurm_njobs'] + slurm_script_options['njobs'] = self.grid_options['slurm_njobs'] + slurm_script_options['dir'] = self.grid_options['slurm_dir'] + slurm_script_options['memory'] = self.grid_options['slurm_memory'] + slurm_script_options['working_dir'] = self.grid_options['slurm_working_dir'] + slurm_script_options['command'] = self.grid_options['command'] + slurm_script_options['streams'] = self.grid_options['streams'] + + print(slurm_script_options) + + + elif self.grid_options['slurm_command'] == 'evolve': + # Part to evolve the population. + # TODO: decide how many CPUs + verbose_print("SLURM: Evolving population", self.grid_options['verbosity'], 1) + + # + self._evolve_population() + + elif self.grid_options['slurm_command'] == 'join': + # Joining the output. + verbose_print("SLURM: Joining results", self.grid_options['verbosity'], 1) + + ################################################### + # CONDOR functions + # + # subroutines to run CONDOR grids + ################################################### + + def condor_grid(self): + """ + Main function that manages the CONDOR setup. + + Has three stages: + + - setup + - evolve + - join + + Which stage is used is determined by the value of grid_options['condor_command']: + + <empty>: the function will know its the user that executed the script and + it will set up the necessary condor stuff + + 'evolve': evolve_population is called to evolve the population of stars + + 'join': We will attempt to join the output + """ + + # TODO: Put in function + condor_version = get_condor_version() + if not condor_version: + verbose_print("CONDOR: Error: No installation of condor found", self.grid_options['verbosity'], 0) + else: + major_version = int(condor_version.split(".")[0]) + minor_version = int(condor_version.split(".")[1]) + + if (major_version == 8) and (minor_version > 4): + verbose_print("CONDOR: Found version {} which is new enough".format(condor_version), self.grid_options['verbosity'], 0) + elif (major_version > 9): + verbose_print("CONDOR: Found version {} which is new enough".format(condor_version), self.grid_options['verbosity'], 0) + else: + verbose_print("CONDOR: Found version {} which is too old (we require 8.3/8.4+)".format(condor_version), self.grid_options['verbosity'], 0) + + + verbose_print("Running Condor grid. command={}".format(self.grid_options['condor_command']), self.grid_options['verbosity'], 1) + if not self.grid_options['condor_command']: + # Setting up + verbose_print("CONDOR: Main controller script. Setting up", self.grid_options['verbosity'], 1) + + # Check settings: + # TODO: check settings + + # Set up working directories: + verbose_print("CONDOR: creating working directories", verbosity, minimal_verbosity) + create_directories_hpc(self.grid_options['condor_dir']) + + # Create command + python_details = get_python_details() + scriptname = path_of_calling_script() + + # command = "".join([ + # "{}".python_details['executable'], + # "{}".scriptname, + # "offset=$jobarrayindex", + # "modulo={}".format(self.grid_options['condor_njobs']), + # "vb={}".format(self.grid_options['verbosity']) + + + + + + # "results_hash_dumpfile=$self->{_grid_options}{slurm_dir}/results/$jobid.$jobarrayindex", + # 'slurm_jobid='.$jobid, + # 'slurm_jobarrayindex='.$jobarrayindex, + # 'slurm_jobname=binary_grid_'.$jobid.'.'.$jobarrayindex, + # "slurm_njobs=$njobs", + # "slurm_dir=$self->{_grid_options}{slurm_dir}", + # ); + + + # Create CONDOR script: + # TODO: create the condor script. + condor_script_options = {} + # condor_script_options['n'] = + condor_script_options['njobs'] = self.grid_options['condor_njobs'] + condor_script_options['dir'] = self.grid_options['condor_dir'] + condor_script_options['memory'] = self.grid_options['condor_memory'] + condor_script_options['working_dir'] = self.grid_options['condor_working_dir'] + condor_script_options['command'] = self.grid_options['command'] + condor_script_options['streams'] = self.grid_options['streams'] + + + + + + elif self.grid_options['condor_command'] == 'evolve': + # Part to evolve the population. + # TODO: decide how many CPUs + verbose_print("CONDOR: Evolving population", self.grid_options['verbosity'], 1) + + + # + self._evolve_population() + + elif self.grid_options['condor_command'] == 'join': + # Joining the output. + verbose_print("CONDOR: Joining results", self.grid_options['verbosity'], 1) + + pass + + ################################################### # Unordered functions # # Functions that arent ordered yet ################################################### + def generate_population_arglines_file(self, output_file): + """ + Function to generate a file that contains all the argument lines that would be given to + binary_c if the population had been run + + TODO: Fix this function + """ + + pass + + + + + + + + + + + + def write_binary_c_calls_to_file( self, output_dir=None, output_filename=None, include_defaults=False ): diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py index 19ecc02207540b56aaba3b0d6d21d6524fd03f4f..0d694fcc60425592b47061f5f9052d5a0ad35958 100644 --- a/binarycpython/utils/grid_options_defaults.py +++ b/binarycpython/utils/grid_options_defaults.py @@ -71,7 +71,7 @@ grid_options_defaults_dict = { "grid", "source_file", ], # Available choices for type of population generation # TODO: fill later with monte carlo etc - "source_file_filename": None # filename for the source + "source_file_filename": None, # filename for the source "count": 0, # total count of systems "probtot": 0, # total probability "weight": 1.0, # weighting for the probability @@ -118,11 +118,11 @@ grid_options_defaults_dict = { ######################################## # Slurm stuff ######################################## - slurm_ntasks=>1, # 1 CPU required per job - slurm_partition=>'all', # MUST be defined - slurm_jobname=>'binary_grid', # not required but useful - slurm_use_all_node_CPUs=>0, # if given nodes, set to 1 - # if given CPUs, set to 0 + # slurm_ntasks=>1, # 1 CPU required per job + # slurm_partition=>'all', # MUST be defined + # slurm_jobname=>'binary_grid', # not required but useful + # slurm_use_all_node_CPUs=>0, # if given nodes, set to 1 + # # if given CPUs, set to 0 "slurm": 0, # dont use the slurm by default. 1 = use slurm "slurm_command": "", # Command that slurm runs (e.g. run_flexigrid or join_datafiles) diff --git a/binarycpython/utils/hpc.py b/binarycpython/utils/hpc.py deleted file mode 100644 index e33bf362c66faf0fb7cbebf05838d05cc16d2e8f..0000000000000000000000000000000000000000 --- a/binarycpython/utils/hpc.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -File containing functions for HPC computing, distributed tasks on clusters etc. - -Mainly divided in 2 sections: Slurm and Condor -""" - -import os -import time - -class slurm_grid(): - - - - -def create_directories_hpc(working_dir): - """ - Function to create a set of directories, given a root directory - - These directories will contain stuff for the HPC runs - """ - - # - if not os.path.exist(working_dir): - print("Error. Working directory {} does not exist! Aborting") - raise ValueError - - directories_list = ['scripts','stdout','stderr','results','logs','status','joining'] - - # - for subdir in directories_list: - full_path = os.path.join(working_dir, subdir) - os.makedirs(full_path, exist_ok=False) - - # Since the directories are probably made on some mount which has to go over NFS - # we should explicitly check if they are created - - print("Waiting for directories") - directories_exist = False - while directories_exist: - directories_exist = True - - for subdir in directories_list: - full_path = os.path.join(working_dir, subdir) - - if not os.path.exist(full_path): - time.sleep(1) - directories_exist = False - - print("Directories exist") - - - diff --git a/binarycpython/utils/hpc_functions.py b/binarycpython/utils/hpc_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..dd8a0b7db662f46790a306219a784ab6d63f5576 --- /dev/null +++ b/binarycpython/utils/hpc_functions.py @@ -0,0 +1,122 @@ +""" +File containing functions for HPC computing, distributed tasks on clusters etc. + +Functions that the slurm and condor subroutines of the population object use. + +Mainly divided in 2 sections: Slurm and Condor +""" + +import os +import sys +import time +import subprocess +import __main__ as main + +def get_slurm_version(): + """ + Function that checks whether slurm is installed and returns the version if its installed. + + Only tested this with slurm v17+ + """ + + slurm_version = None + + try: + slurm_version = ( + subprocess.run(['sinfo', "-V"], stdout=subprocess.PIPE, check=True) + .stdout.decode("utf-8") + .split() + )[1] + except FileNotFoundError as err: + print(err) + print(err.args) + print("Slurm is not installed or not loaded") + except Exception as err: + print(err) + print(err.args) + print("Unknown error, contact me about this") + + return slurm_version + +def get_condor_version(): + """ + Function that checks whether slurm is installed and returns the version if its installed. + + otherwise returns None + + Result has to be condor v8 or higher + """ + + condor_version = None + + try: + condor_version = ( + subprocess.run(['condor_q', "--version"], stdout=subprocess.PIPE, check=True) + .stdout.decode("utf-8") + .split() + )[1] + except FileNotFoundError as err: + print("Slurm is not installed or not loaded: ") + print(err) + print(err.args) + except Exception as err: + print("Unknown error, contact me about this: ") + print(err) + print(err.args) + + return condor_version + +def create_directories_hpc(working_dir): + """ + Function to create a set of directories, given a root directory + + These directories will contain stuff for the HPC runs + """ + + # Check if working_dir exists + if not os.path.isdir(working_dir): + print("Error. Working directory {} does not exist! Aborting") + raise ValueError + + directories_list = ['scripts', 'stdout', 'stderr', 'results', 'logs', 'status', 'joining'] + + # Make directories. + for subdir in directories_list: + full_path = os.path.join(working_dir, subdir) + os.makedirs(full_path, exist_ok=True) + + # Since the directories are probably made on some mount which has to go over NFS + # we should explicitly check if they are created + print("Checking if creating the directories has finished...") + directories_exist = False + while directories_exist: + directories_exist = True + + for subdir in directories_list: + full_path = os.path.join(working_dir, subdir) + + if not os.path.isdir(full_path): + time.sleep(1) + directories_exist = False + print("..Finished! Directories exist.") + +def path_of_calling_script(): + """ + Function to get the name of the script the user executes. + """ + + return main.__file__ + +def get_python_details(): + """ + Function to get some info about the used python version and virtualenv etc + """ + + python_info_dict = {} + + # + python_info_dict['virtualenv'] = os.getenv('VIRTUAL_ENV') + python_info_dict['executable'] = sys.executable + python_info_dict['version'] = sys.version + + return python_info_dict