From 59a682cd28ca7dd38e0a86b94d08c35964bb9926 Mon Sep 17 00:00:00 2001
From: David Hendriks <davidhendriks93@gmail.com>
Date: Mon, 27 Jan 2020 10:46:11 +0000
Subject: [PATCH] running tests with multiprocessing, also using diffferent
 multiprocessing techniques. it seems that the multiprocessing doesnt speed it
 up that much at least on my laptop. will run it on astro1 or something now

---
 binarycpython/utils/grid.py                   | 142 +++++++++++++++---
 tests/population/comparison_result.dat        |  29 ++++
 ...ltiprocessing_via_population_comparison.py | 115 ++++++++++++++
 tests/population/run_tests.sh                 |  17 +++
 4 files changed, 281 insertions(+), 22 deletions(-)
 create mode 100644 tests/population/comparison_result.dat
 create mode 100644 tests/population/multiprocessing_via_population_comparison.py
 create mode 100644 tests/population/run_tests.sh

diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py
index f63319dd7..162b33e85 100644
--- a/binarycpython/utils/grid.py
+++ b/binarycpython/utils/grid.py
@@ -3,8 +3,13 @@ import copy
 import json
 import sys
 import datetime
+import time
+import random
 
+import numpy as np
+import multiprocessing as mp
 
+from pathos.multiprocessing import ProcessingPool as Pool
 
 import binary_c_python_api
 
@@ -76,6 +81,9 @@ class Population(object):
 
     # TODO: maybe make a set_bse option.
 
+    def set_bse_option(self, key, arg):
+        self.bse_options[key] = arg
+
     def set(self, **kwargs):
         """
         Function to set the values of the population. This is the preferred method to set values of functions, as it 
@@ -387,33 +395,13 @@ class Population(object):
         else:
             return out
 
-    def evolve_population(self, custom_arg_file=None):
+    def evolve_population(self, parse_function, custom_arg_file=None):
         """
         The function that will evolve the population. This function contains many steps
         """
 
         ### Custom logging code:
-        # C_logging_code gets priority of C_autogen_code
-        if self.grid_options["C_auto_logging"]:
-            # Generate real logging code
-            logging_line = autogen_C_logging_code(self.grid_options["C_auto_logging"])
-
-            # Generate entire shared lib code around logging lines
-            custom_logging_code = binary_c_log_code(logging_line)
-
-            # Load memory adress
-            self.grid_options[
-                "custom_logging_func_memaddr"
-            ] = create_and_load_logging_function(custom_logging_code)
-        #
-        if self.grid_options["C_logging_code"]:
-            # Generate entire shared lib code around logging lines
-            custom_logging_code = binary_c_log_code(self.grid_options["C_logging_code"])
-
-            # Load memory adress
-            self.grid_options[
-                "custom_logging_func_memaddr"
-            ] = create_and_load_logging_function(custom_logging_code)
+        self.set_custom_logging()
 
         ### Load store
         self.grid_options["store_memaddr"] = binary_c_python_api.return_store("")
@@ -459,6 +447,116 @@ class Population(object):
 
         # TODO: add call to function that cleans up the temp customlogging dir, and unloads the loaded libraries.
 
+    def evolve_population_comparison(self, parse_function, amt, nodes, custom_arg_file=None):
+        """
+        The function that will evolve the population. This function contains many steps
+        """
+
+        ### Custom logging code:
+        self.set_custom_logging()
+        
+        ### Load store
+        self.grid_options["store_memaddr"] = binary_c_python_api.return_store("")
+
+        # Execute.
+
+        ### Part to test running this with and without multiprocessing. 
+        import time
+        import multiprocessing as mp
+        from pathos.multiprocessing import ProcessingPool as Pool
+        import random
+
+        start_no_mp = time.time()
+        self.set(base_filename='no_mp_{}.dat'.format(amt))
+        # amt = 1000
+        masses = range(1, amt+1)
+        for i in masses:
+            mass = random.randint(1, 500)
+            # print(mass)
+            self.set_bse_option('M_1', mass)
+            out = binary_c_python_api.run_population(
+                        self.return_argline(),
+                        self.grid_options["custom_logging_func_memaddr"],
+                        self.grid_options["store_memaddr"],
+                    )
+            # parse_function(self, out)
+        stop_no_mp = time.time()
+
+        print("without mp: {} systems took {}s".format(amt, stop_no_mp-start_no_mp))
+
+        #########################################################
+
+        start_mp = time.time()
+
+        self.set(base_filename='mp_{}.dat'.format(amt))
+        def evolve_mp(mass):
+            # print(mass)
+            self.set_bse_option('M_1', mass)
+            # self.set(M_1=mass)
+            out = binary_c_python_api.run_population(
+                        self.return_argline(),
+                        self.grid_options["custom_logging_func_memaddr"],
+                        self.grid_options["store_memaddr"],
+                    )
+            # parse_function(self, out)
+
+        p = Pool(nodes=nodes)
+        def g(amt):
+            # amt = 1000
+            masses = range(1, amt+1)
+            for i in masses:
+                mass = random.randint(1, 500)
+                yield mass
+            print("generator done")
+
+        r = list(p.imap(evolve_mp, g(amt)))
+
+        stop_mp = time.time()
+        print("with mp: {} systems took {}s".format(amt, stop_mp-start_mp))
+
+        #########################################################
+        print("Running mp versus no mp is {} times faster!".format((start_no_mp-stop_no_mp)/(start_mp-stop_mp)))
+        return (nodes, amt, stop_no_mp-start_no_mp, stop_mp-start_mp)
+
+
+    def evolve_population_mp(self, parse_function, mass_distribution):
+        """
+        The function that will evolve the population. This function contains many steps
+        """
+
+        ### Custom logging code:
+        self.set_custom_logging()
+        
+        ### Load store
+        self.grid_options["store_memaddr"] = binary_c_python_api.return_store("")
+
+        # evolve with mp
+        start_mp = time.time()
+        def evolve_mp(mass):
+            self.set_bse_option('M_1', mass)
+            out = binary_c_python_api.run_population(
+                        self.return_argline(),
+                        self.grid_options["custom_logging_func_memaddr"],
+                        self.grid_options["store_memaddr"],
+                    )
+            parse_function(self, out)
+
+        p = Pool(nodes=self.grid_options['amt_cores'])
+        def g(mass_distribution):
+            masses = mass_distribution
+            for mass in masses:
+                yield mass
+            print("generator done")
+
+        r = list(p.imap(evolve_mp, g(mass_distribution)))
+        stop_mp = time.time()
+
+        print("with mp: {} systems took {}s using {} cores".format(len(mass_distribution), stop_mp-start_mp), self.grid_options['amt_cores'])
+
+        #########################################################
+        # print("Running mp versus no mp is {} times faster!".format((start_no_mp-stop_no_mp)/(start_mp-stop_mp)))
+        # return (nodes, amt, stop_no_mp-start_no_mp, stop_mp-start_mp)
+
     ###################################################
     # Testing functions
     ###################################################
diff --git a/tests/population/comparison_result.dat b/tests/population/comparison_result.dat
new file mode 100644
index 000000000..64e82a8f4
--- /dev/null
+++ b/tests/population/comparison_result.dat
@@ -0,0 +1,29 @@
+(4, 100, 4.975594997406006, 2.3232288360595703)
+(4, 100, 4.787039518356323, 2.2747838497161865)
+(4, 100, 4.9317708015441895, 2.4268319606781006)
+(1, 100, 5.315765619277954, 5.674138784408569)
+(2, 100, 4.9390997886657715, 2.769941806793213)
+(2, 100, 5.208018779754639, 2.836611747741699)
+(2, 100, 5.186922311782837, 3.152923107147217)
+(4, 100, 5.072067499160767, 2.3781256675720215)
+(4, 100, 5.082597970962524, 2.3073325157165527)
+(4, 1000, 49.75706219673157, 30.233830213546753)
+(4, 1000, 53.46943283081055, 32.60539269447327)
+(4, 1000, 54.02741098403931, 30.801720142364502)
+(4, 1000, 55.66723918914795, 32.122862100601196)
+(4, 2000, 106.51247549057007, 61.482452392578125)
+(4, 2000, 109.3186149597168, 61.64964556694031)
+(4, 2000, 111.05836129188538, 91.57576203346252)
+(4, 2000, 121.61987137794495, 68.84247088432312)
+(4, 1000, 49.87850880622864, 30.613584995269775)
+(4, 1000, 52.58728384971619, 31.519263982772827)
+(4, 1000, 53.73133993148804, 31.784117937088013)
+(4, 1000, 54.716689348220825, 31.911094665527344)
+(4, 2000, 124.03876852989197, 62.09644532203674)
+(4, 2000, 103.91023874282837, 60.63083362579346)
+(4, 2000, 104.27078628540039, 61.83385968208313)
+(4, 2000, 104.66957712173462, 60.45007133483887)
+(4, 5000, 261.0527467727661, 154.01037573814392)
+(4, 5000, 259.3938636779785, 152.388818025589)
+(4, 5000, 260.7243824005127, 152.58654737472534)
+(4, 5000, 261.61197566986084, 152.4033432006836)
diff --git a/tests/population/multiprocessing_via_population_comparison.py b/tests/population/multiprocessing_via_population_comparison.py
new file mode 100644
index 000000000..8b49b69c7
--- /dev/null
+++ b/tests/population/multiprocessing_via_population_comparison.py
@@ -0,0 +1,115 @@
+import os
+import json
+import time
+import pickle
+import sys
+
+import matplotlib.pyplot as plt
+
+from binarycpython.utils.grid import Population
+from binarycpython.utils.functions import get_help_all, get_help, create_hdf5
+
+
+import argparse
+
+
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "amt_systems",
+    help='the amount of systems',
+)
+
+parser.add_argument(
+    "amt_nodes",
+    help='the amount of nodes that are used for the multiprocessing',
+)
+
+parser.add_argument(
+    "name_testcase",
+    help='The name of the testcase (e.g. laptop, cluster etc)',
+)
+
+
+args = parser.parse_args()
+
+amt_systems = args.amt_systems
+amt_nodes = args.amt_nodes
+name_testcase = args.name_testcase
+
+
+## Quick script to get some output about which stars go supernova when.
+def output_lines(output):
+    """
+    Function that outputs the lines that were recieved from the binary_c run. 
+    """
+    return output.splitlines()
+
+def parse_function(self, output):
+    # extract info from the population instance
+    # TODO: think about whether this is smart. Passing around this object might be an overkill
+
+    # Get some information from the 
+    data_dir = self.custom_options['data_dir']
+    base_filename = self.custom_options['base_filename']
+
+    # Check directory, make if necessary
+    os.makedirs(data_dir, exist_ok=True)
+
+    # Create filename
+    outfilename = os.path.join(data_dir, base_filename)
+
+    # Go over the output.
+    for el in output_lines(output):
+        headerline = el.split()[0]
+
+        # CHeck the header and act accordingly
+        if (headerline=='DAVID_SN'):
+            parameters = ['time', 'mass_1', 'prev_mass_1', 'zams_mass_1', 'SN_type']
+            values = el.split()[1:]
+            seperator='\t'
+
+            if not os.path.exists(outfilename):
+                with open(outfilename, 'w') as f:
+                    f.write(seperator.join(parameters)+'\n')
+
+            with open(outfilename, 'a') as f:
+                f.write(seperator.join(values)+'\n')
+## Set values
+test_pop = Population()
+test_pop.set(
+    C_logging_code="""
+if(stardata->star[0].SN_type != SN_NONE)    
+{
+    if (stardata->model.time < stardata->model.max_evolution_time)
+    {
+        Printf("DAVID_SN %30.12e %g %g %g %d\\n",
+            // 
+            stardata->model.time, // 1
+            stardata->star[0].mass, //2
+            stardata->previous_stardata->star[0].mass, //3
+            stardata->star[0].pms_mass, //4
+            stardata->star[0].SN_type //5
+      );
+    };
+    /* Kill the simulation to save time */
+    stardata->model.max_evolution_time = stardata->model.time - stardata->model.dtm;
+};
+""")
+
+test_pop.set(separation=1000000000, 
+    orbital_period=400000000, 
+    metallicity=0.002, 
+    data_dir=os.path.join(os.environ['BINARYC_DATA_ROOT'], 'testing_python', 'multiprocessing2', name_testcase))
+
+# res = test_pop.evolve_population_comparison(parse_function, amt=int(amt_systems), nodes=int(amt_nodes))
+# with open('comparison_result.dat', 'a') as f:
+#     f.write(str(res)+'\n')
+
+
+
+mass_distribution = np.arange(1, 200)
+
+
+# evolve_population_mp(parse_function, mass_distribution)
\ No newline at end of file
diff --git a/tests/population/run_tests.sh b/tests/population/run_tests.sh
new file mode 100644
index 000000000..1f156a869
--- /dev/null
+++ b/tests/population/run_tests.sh
@@ -0,0 +1,17 @@
+#python multiprocessing_via_population_comparison.py 100 4 laptop
+#python multiprocessing_via_population_comparison.py 100 4 laptop
+
+python multiprocessing_via_population_comparison.py 1000 4 laptop
+python multiprocessing_via_population_comparison.py 1000 4 laptop
+python multiprocessing_via_population_comparison.py 1000 4 laptop
+python multiprocessing_via_population_comparison.py 1000 4 laptop
+
+python multiprocessing_via_population_comparison.py 2000 4 laptop
+python multiprocessing_via_population_comparison.py 2000 4 laptop
+python multiprocessing_via_population_comparison.py 2000 4 laptop
+python multiprocessing_via_population_comparison.py 2000 4 laptop
+
+python multiprocessing_via_population_comparison.py 5000 4 laptop
+python multiprocessing_via_population_comparison.py 5000 4 laptop
+python multiprocessing_via_population_comparison.py 5000 4 laptop
+python multiprocessing_via_population_comparison.py 5000 4 laptop
-- 
GitLab