From 9121d5dff17a3e8121c0a0d2fddccb20e8b3b0f2 Mon Sep 17 00:00:00 2001 From: David Hendriks <davidhendriks93@gmail.com> Date: Fri, 14 Feb 2020 12:51:40 +0000 Subject: [PATCH] fixing errors: hdf5 file creation when there are no settings. and the setting export if there is a functio in it --- binarycpython/utils/functions.py | 98 +++++++++-------- binarycpython/utils/grid.py | 104 ++++++++++++++++-- ...pulation_comparing_with_multiprocessing.py | 1 + 3 files changed, 148 insertions(+), 55 deletions(-) diff --git a/binarycpython/utils/functions.py b/binarycpython/utils/functions.py index 71590f120..48de2b757 100644 --- a/binarycpython/utils/functions.py +++ b/binarycpython/utils/functions.py @@ -10,6 +10,11 @@ from binarycpython.utils.custom_logging_functions import ( create_and_load_logging_function, ) +def output_lines(output): + """ + Function that outputs the lines that were recieved from the binary_c run. + """ + return output.splitlines() def parse_binary_c_version_info(version_info_string): version_info_dict = {} @@ -101,66 +106,69 @@ def parse_binary_c_version_info(version_info_string): return version_info_dict -def create_hdf5(data_dir): +def create_hdf5(data_dir, name): """ Function to create an hdf5 file from the contents of a directory: - settings file is selected by checking on files ending on settings - data files are selected by checking on files ending with .dat - """ - - # Get content of data_dir - content_data_dir = os.listdir(data_dir) - # Settings - settings_file = os.path.join( - data_dir, - [file for file in content_data_dir if file.endswith("_settings.json")][0], - ) - with open(settings_file, "r") as f: - settings_json = json.load(f) - - # create basename for hdf5 - base_name = settings_file.replace("_settings.json", "") - - # Get data files - data_files = [el for el in content_data_dir if el.endswith(".dat")] + TODO: fix missing settingsfiles + """ + # Make HDF5: # Create the file hdf5_filename = os.path.join( data_dir, "{base_name}.hdf5".format(base_name=base_name) ) - f = h5py.File(hdf5_filename, "w") + print("Creating {}".format(hdf5_filename)) - # Create settings group - settings_grp = f.create_group("settings") + # Get content of data_dir + content_data_dir = os.listdir(data_dir) - # Write version_string to settings_group - settings_grp.create_dataset("used_settings", data=json.dumps(settings_json)) + # Settings + if any([file.endswith("_settings.json") for file in content_data_dir]): + print("Adding settings to HDF5 file") + settings_file = os.path.join( + data_dir, + [file for file in content_data_dir if file.endswith("_settings.json")][0], + ) - # Create the data group - data_grp = f.create_group("data") + with open(settings_file, "r") as f: + settings_json = json.load(f) - print("Creating {}".format(hdf5_filename)) + # Create settings group + settings_grp = f.create_group("settings") + + # Write version_string to settings_group + settings_grp.create_dataset("used_settings", data=json.dumps(settings_json)) - # Write the data to the file: - # Make sure: - for data_file in data_files: - # filename stuff - filename = data_file - full_path = os.path.join(data_dir, filename) - base_name = os.path.splitext(os.path.basename(filename))[0] - - # Get header info - header_name = "{base_name}_header".format(base_name=base_name) - data_headers = np.genfromtxt(full_path, dtype="str", max_rows=1) - data_headers = np.char.encode(data_headers) - data_grp.create_dataset(header_name, data=data_headers) - - # Add data - data = np.loadtxt(full_path, skiprows=1) - data_grp.create_dataset(base_name, data=data) - - f.close() + # Get data files + data_files = [el for el in content_data_dir if el.endswith(".dat")] + if data_files: + print("Adding data to HDF5 file") + + # Create the data group + data_grp = f.create_group("data") + + # Write the data to the file: + # Make sure: + for data_file in data_files: + # filename stuff + filename = data_file + full_path = os.path.join(data_dir, filename) + base_name = os.path.splitext(os.path.basename(filename))[0] + + # Get header info + header_name = "{base_name}_header".format(base_name=base_name) + data_headers = np.genfromtxt(full_path, dtype="str", max_rows=1) + data_headers = np.char.encode(data_headers) + data_grp.create_dataset(header_name, data=data_headers) + + # Add data + data = np.loadtxt(full_path, skiprows=1) + data_grp.create_dataset(base_name, data=data) + + f.close() def get_help_super(print_help=False, return_dict=True, fail_silently=True): diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index d9509893c..7ce68445f 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -22,7 +22,8 @@ from binarycpython.utils.custom_logging_functions import ( create_and_load_logging_function, temp_dir, ) -from binarycpython.utils.functions import get_defaults, parse_binary_c_version_info +from binarycpython.utils.functions import get_defaults, parse_binary_c_version_info, output_lines + # Todo-list @@ -698,17 +699,20 @@ class Population(object): self.load_grid_function() def evolve_system(binary_cmdline_string): + + # print(binary_cmdline_string) # pass # print('next') # self.set_bse_option("M_1", mass) - # out = binary_c_python_api.run_population( - # binary_cmdline_string, - # self.grid_options["custom_logging_func_memaddr"], - # self.grid_options["store_memaddr"], - # ) - pass - # # parse_function(self, out) + out = binary_c_python_api.run_population( + binary_cmdline_string, + self.grid_options["custom_logging_func_memaddr"], + self.grid_options["store_memaddr"], + ) + + parse_function(self, out) + # pass def yield_system(): for i, system in enumerate(self.grid_options["system_generator"](self)): @@ -724,8 +728,8 @@ class Population(object): # Create pool p = Pool(processes=self.grid_options["amt_cores"]) - # Execute + # TODO: calculate the chunksize value based on: total starcount and cores used. r = list(p.imap_unordered(evolve_system, yield_system(), chunksize=1000)) stop_mp = time.time() @@ -741,7 +745,86 @@ class Population(object): return stop_mp - start_mp + def evolve_population_mp_chunks(self): + """ + Test function to evolve a population in a parallel way. + + returns total time spent on the actual interfacing with binaryc + """ + + import time + import multiprocessing as mp + # from pathos.multiprocessing import ProcessingPool as Pool + from pathos.pools import _ProcessPool as Pool + ####################### + ### Custom logging code: + self.set_custom_logging() + + ### Load store + self.grid_options["store_memaddr"] = binary_c_python_api.return_store("") + + ####################### + # Dry run and getting starcount + self.grid_options['probtot'] = 0 + self.generate_grid_code(dry_run=True) + + self.load_grid_function() + + self.dry_run() + + total_starcount_run = self.grid_options['total_starcount'] + print("Total starcount for this run will be: {}".format(total_starcount_run)) + + ####################### + # MP run + self.grid_options['probtot'] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way + + start_mp = time.time() + + self.generate_grid_code(dry_run=False) + + self.load_grid_function() + + def evolve_system(binary_cmdline_string): + out = binary_c_python_api.run_population( + binary_cmdline_string, + self.grid_options["custom_logging_func_memaddr"], + self.grid_options["store_memaddr"], + ) + if self.custom_options['parse_function']: + self.custom_options['parse_function'](self, out) + + def yield_system(): + for i, system in enumerate(self.grid_options["system_generator"](self)): + full_system_dict = self.bse_options.copy() + full_system_dict.update(system) + + binary_cmdline_string = self.return_argline(full_system_dict) + # print("{}/{}".format(i+1, total_starcount_run), binary_cmdline_string) + yield binary_cmdline_string + # yield i + print("generator done") + + # Create pool + p = Pool(processes=self.grid_options["amt_cores"]) + + # Execute + # TODO: calculate the chunksize value based on: total starcount and cores used. + r = list(p.imap_unordered(evolve_system, yield_system(), chunksize=20)) + + stop_mp = time.time() + + # Give feedback + print( + "with mp: {} systems took {}s using {} cores".format( + self.grid_options['total_starcount'], + stop_mp - start_mp, + self.grid_options["amt_cores"], + ) + ) + + return stop_mp - start_mp @@ -787,8 +870,9 @@ class Population(object): if os.path.exists(file): try: if verbose > 0: - print("Removed {}".format()) + print("Removed {}".format(file)) os.remove(file) + # TODO: Put correct exception here. except: print("Error while deleting file {}".format(file)) raise FileNotFoundError diff --git a/tests/population/scaling/evolve_population_comparing_with_multiprocessing.py b/tests/population/scaling/evolve_population_comparing_with_multiprocessing.py index 4e95fb0eb..78840fbea 100644 --- a/tests/population/scaling/evolve_population_comparing_with_multiprocessing.py +++ b/tests/population/scaling/evolve_population_comparing_with_multiprocessing.py @@ -127,6 +127,7 @@ total_mp_start = time.time() #evolve_mp_time = test_pop.test_evolve_population_mp() evolve_mp_time = test_pop.test_evolve_population_mp_chunks() +# evolve_mp_time = test_pop.test_evolve_population_mp_chunks_better() total_mp_stop = time.time() -- GitLab