From 9121d5dff17a3e8121c0a0d2fddccb20e8b3b0f2 Mon Sep 17 00:00:00 2001
From: David Hendriks <davidhendriks93@gmail.com>
Date: Fri, 14 Feb 2020 12:51:40 +0000
Subject: [PATCH] fixing errors: hdf5 file creation when there are no settings.
 and the setting export if there is a functio in it

---
 binarycpython/utils/functions.py              |  98 +++++++++--------
 binarycpython/utils/grid.py                   | 104 ++++++++++++++++--
 ...pulation_comparing_with_multiprocessing.py |   1 +
 3 files changed, 148 insertions(+), 55 deletions(-)

diff --git a/binarycpython/utils/functions.py b/binarycpython/utils/functions.py
index 71590f120..48de2b757 100644
--- a/binarycpython/utils/functions.py
+++ b/binarycpython/utils/functions.py
@@ -10,6 +10,11 @@ from binarycpython.utils.custom_logging_functions import (
     create_and_load_logging_function,
 )
 
+def output_lines(output):
+    """
+    Function that outputs the lines that were recieved from the binary_c run. 
+    """
+    return output.splitlines()
 
 def parse_binary_c_version_info(version_info_string):
     version_info_dict = {}
@@ -101,66 +106,69 @@ def parse_binary_c_version_info(version_info_string):
     return version_info_dict
 
 
-def create_hdf5(data_dir):
+def create_hdf5(data_dir, name):
     """
     Function to create an hdf5 file from the contents of a directory: 
      - settings file is selected by checking on files ending on settings
      - data files are selected by checking on files ending with .dat
-    """
-
-    # Get content of data_dir
-    content_data_dir = os.listdir(data_dir)
 
-    # Settings
-    settings_file = os.path.join(
-        data_dir,
-        [file for file in content_data_dir if file.endswith("_settings.json")][0],
-    )
-    with open(settings_file, "r") as f:
-        settings_json = json.load(f)
-
-    # create basename for hdf5
-    base_name = settings_file.replace("_settings.json", "")
-
-    # Get data files
-    data_files = [el for el in content_data_dir if el.endswith(".dat")]
+    TODO: fix missing settingsfiles
+    """
 
+    # Make HDF5:
     # Create the file
     hdf5_filename = os.path.join(
         data_dir, "{base_name}.hdf5".format(base_name=base_name)
     )
-    f = h5py.File(hdf5_filename, "w")
+    print("Creating {}".format(hdf5_filename))
 
-    # Create settings group
-    settings_grp = f.create_group("settings")
+    # Get content of data_dir
+    content_data_dir = os.listdir(data_dir)
 
-    # Write version_string to settings_group
-    settings_grp.create_dataset("used_settings", data=json.dumps(settings_json))
+    # Settings
+    if any([file.endswith("_settings.json") for file in content_data_dir]):
+        print("Adding settings to HDF5 file")
+        settings_file = os.path.join(
+            data_dir,
+            [file for file in content_data_dir if file.endswith("_settings.json")][0],
+        )
 
-    # Create the data group
-    data_grp = f.create_group("data")
+        with open(settings_file, "r") as f:
+            settings_json = json.load(f)
 
-    print("Creating {}".format(hdf5_filename))
+        # Create settings group
+        settings_grp = f.create_group("settings")
+
+        # Write version_string to settings_group
+        settings_grp.create_dataset("used_settings", data=json.dumps(settings_json))
 
-    # Write the data to the file:
-    # Make sure:
-    for data_file in data_files:
-        # filename stuff
-        filename = data_file
-        full_path = os.path.join(data_dir, filename)
-        base_name = os.path.splitext(os.path.basename(filename))[0]
-
-        # Get header info
-        header_name = "{base_name}_header".format(base_name=base_name)
-        data_headers = np.genfromtxt(full_path, dtype="str", max_rows=1)
-        data_headers = np.char.encode(data_headers)
-        data_grp.create_dataset(header_name, data=data_headers)
-
-        # Add data
-        data = np.loadtxt(full_path, skiprows=1)
-        data_grp.create_dataset(base_name, data=data)
-
-    f.close()
+    # Get data files
+    data_files = [el for el in content_data_dir if el.endswith(".dat")]
+    if data_files:
+        print("Adding data to HDF5 file")
+
+        # Create the data group
+        data_grp = f.create_group("data")
+
+        # Write the data to the file:
+        # Make sure:
+        for data_file in data_files:
+            # filename stuff
+            filename = data_file
+            full_path = os.path.join(data_dir, filename)
+            base_name = os.path.splitext(os.path.basename(filename))[0]
+
+            # Get header info
+            header_name = "{base_name}_header".format(base_name=base_name)
+            data_headers = np.genfromtxt(full_path, dtype="str", max_rows=1)
+            data_headers = np.char.encode(data_headers)
+            data_grp.create_dataset(header_name, data=data_headers)
+
+            # Add data
+            data = np.loadtxt(full_path, skiprows=1)
+            data_grp.create_dataset(base_name, data=data)
+
+        f.close()
 
 
 def get_help_super(print_help=False, return_dict=True, fail_silently=True):
diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py
index d9509893c..7ce68445f 100644
--- a/binarycpython/utils/grid.py
+++ b/binarycpython/utils/grid.py
@@ -22,7 +22,8 @@ from binarycpython.utils.custom_logging_functions import (
     create_and_load_logging_function,
     temp_dir,
 )
-from binarycpython.utils.functions import get_defaults, parse_binary_c_version_info
+from binarycpython.utils.functions import get_defaults, parse_binary_c_version_info, output_lines
+
 
 
 # Todo-list
@@ -698,17 +699,20 @@ class Population(object):
         self.load_grid_function()
 
         def evolve_system(binary_cmdline_string):
+
+
             # print(binary_cmdline_string)
             # pass
             # print('next')
             # self.set_bse_option("M_1", mass)
-            # out = binary_c_python_api.run_population(
-            #     binary_cmdline_string,
-            #     self.grid_options["custom_logging_func_memaddr"],
-            #     self.grid_options["store_memaddr"],
-            # )
-            pass
-            # # parse_function(self, out)
+            out = binary_c_python_api.run_population(
+                binary_cmdline_string,
+                self.grid_options["custom_logging_func_memaddr"],
+                self.grid_options["store_memaddr"],
+            )
+
+            parse_function(self, out)
+            # pass
 
         def yield_system():
             for i, system in enumerate(self.grid_options["system_generator"](self)):
@@ -724,8 +728,8 @@ class Population(object):
         # Create pool
         p = Pool(processes=self.grid_options["amt_cores"])
 
-
         # Execute
+        # TODO: calculate the chunksize value based on: total starcount and cores used. 
         r = list(p.imap_unordered(evolve_system, yield_system(), chunksize=1000))
 
         stop_mp = time.time()
@@ -741,7 +745,86 @@ class Population(object):
 
         return stop_mp - start_mp
 
+    def evolve_population_mp_chunks(self):
+        """
+        Test function to evolve a population in a parallel way.
+
+        returns total time spent on the actual interfacing with binaryc 
+        """
+
+        import time
+        import multiprocessing as mp
+        # from pathos.multiprocessing import ProcessingPool as Pool
+        from pathos.pools import _ProcessPool as Pool
+        #######################
+        ### Custom logging code:
+        self.set_custom_logging()
+
+        ### Load store
+        self.grid_options["store_memaddr"] = binary_c_python_api.return_store("")
+
+        #######################
+        # Dry run and getting starcount
+        self.grid_options['probtot'] = 0
 
+        self.generate_grid_code(dry_run=True)
+
+        self.load_grid_function()
+
+        self.dry_run()
+
+        total_starcount_run = self.grid_options['total_starcount']
+        print("Total starcount for this run will be: {}".format(total_starcount_run))
+
+        #######################
+        # MP run
+        self.grid_options['probtot'] = 0 # To make sure that the values are reset. TODO: fix this in a cleaner way
+
+        start_mp = time.time()
+
+        self.generate_grid_code(dry_run=False)
+
+        self.load_grid_function()
+
+        def evolve_system(binary_cmdline_string):
+            out = binary_c_python_api.run_population(
+                binary_cmdline_string,
+                self.grid_options["custom_logging_func_memaddr"],
+                self.grid_options["store_memaddr"],
+            )
+            if self.custom_options['parse_function']:
+                self.custom_options['parse_function'](self, out)
+
+        def yield_system():
+            for i, system in enumerate(self.grid_options["system_generator"](self)):
+                full_system_dict = self.bse_options.copy()
+                full_system_dict.update(system)
+
+                binary_cmdline_string = self.return_argline(full_system_dict)
+                # print("{}/{}".format(i+1, total_starcount_run), binary_cmdline_string)
+                yield binary_cmdline_string
+                # yield i
+            print("generator done")
+
+        # Create pool
+        p = Pool(processes=self.grid_options["amt_cores"])
+
+        # Execute
+        # TODO: calculate the chunksize value based on: total starcount and cores used. 
+        r = list(p.imap_unordered(evolve_system, yield_system(), chunksize=20))
+
+        stop_mp = time.time()
+
+        # Give feedback
+        print(
+            "with mp: {} systems took {}s using {} cores".format(
+                self.grid_options['total_starcount'],
+                stop_mp - start_mp,
+                self.grid_options["amt_cores"],
+            )
+        )
+
+        return stop_mp - start_mp
 
 
 
@@ -787,8 +870,9 @@ class Population(object):
         if os.path.exists(file):
             try:
                 if verbose > 0:
-                    print("Removed {}".format())
+                    print("Removed {}".format(file))
                 os.remove(file)
+            # TODO: Put correct exception here.
             except:
                 print("Error while deleting file {}".format(file))
                 raise FileNotFoundError
diff --git a/tests/population/scaling/evolve_population_comparing_with_multiprocessing.py b/tests/population/scaling/evolve_population_comparing_with_multiprocessing.py
index 4e95fb0eb..78840fbea 100644
--- a/tests/population/scaling/evolve_population_comparing_with_multiprocessing.py
+++ b/tests/population/scaling/evolve_population_comparing_with_multiprocessing.py
@@ -127,6 +127,7 @@ total_mp_start = time.time()
 
 #evolve_mp_time = test_pop.test_evolve_population_mp()
 evolve_mp_time = test_pop.test_evolve_population_mp_chunks()
+# evolve_mp_time = test_pop.test_evolve_population_mp_chunks_better()
 
 total_mp_stop = time.time()
 
-- 
GitLab