diff --git a/tests/population/scaling/plot_scaling.py b/tests/population/scaling/plot_scaling.py index 0afa06c399086b29dc332b9ed487c0a6d89cb3e7..be3c1d5e79bfe3668621d75eb97b137c45b1826d 100644 --- a/tests/population/scaling/plot_scaling.py +++ b/tests/population/scaling/plot_scaling.py @@ -88,7 +88,7 @@ def plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems): def amdahl(f, n): return 1.0 / ((1 - f) + (f / n)) -def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_testcase, machine): +def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_testcase): """ Plotting routine to plot the speedup and efficiency of scaling @@ -106,10 +106,6 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes with open(jsonfile, "r") as f: result_data = json.loads(f.read()) - # if i==0: - # name_testcase = result_data['name_testcase'] - # hostname = result_data['hostname'] - # Get linear data linear_data = result_data["linear"] linear_mean = np.mean(linear_data) @@ -146,14 +142,14 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes stddev_speedups, linestyle="None", marker="^", - label="Speed up & efficiency of {} systems".format( - result_data["amt_systems"] - ), + label="{}".format(os.path.basename(jsonfile)), + # label="Speed up & efficiency of {} systems".format( + # result_data["amt_systems"] + # ), ) # Plot the efficiencies - ax2.plot(cpus, efficiencies, alpha=0.5) - + ax2.plot(cpus, efficiencies, alpha=0.5, linestyle='dotted') # x_position_shift += 0.1 ##################### @@ -161,11 +157,11 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes # 100 % scaling line ax1.plot([1, max(cpus)], [1, max(cpus)], '--', alpha=0.25, label='100% scaling') - - # Amdahls law fitting - + ax2.axhline(y=1, linestyle='--', alpha=0.25, label='100% efficient') + # ax1.plot([1, max(cpus)], [1, max(cpus)], '--', alpha=0.25, label='100% scaling') + # Amdahls law fitting # Old stuff # Do Amdahls law fitting # cores = np.arange(1, 48, 0.1) @@ -183,9 +179,7 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes ################################# # Adding plot make up ax1.set_title( - "Speed up ratio vs amount of cores for different amounts of systems on {}".format( - machine - ) + "Speed up ratio (left y, symbols) and efficiency (right y, dotted line) vs amount of cores" ) ax1.set_xlabel("Amount of cores used") @@ -196,20 +190,9 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes ax1.set_xscale("log") ax2.set_xscale("log") + ax2.set_ylim(ymin=0, ymax=None) + + fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "png"))) fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf"))) plt.show() - -################################# -# Files -SCALING_RESULT_DIR = "scaling_results" -FILENAMES = [ - "david-Lenovo-IdeaPad-S340-14IWL_100_systems.json" - # "astro2_2500_systems.json", - # "astro2_3000_systems.json", -] -RESULT_JSONS = [] -for filename in FILENAMES: - RESULT_JSONS.append(os.path.join(os.path.abspath(SCALING_RESULT_DIR), filename)) - -plot_speedup_and_efficiency(RESULT_JSONS, SCALING_RESULT_DIR, "Example", "laptop_david") \ No newline at end of file diff --git a/tests/population/scaling/scaling_functions.py b/tests/population/scaling/scaling_functions.py index 6a5a691c820e484f8b8d36af85c2f2b799bd918b..0763dc3b07b7f6d7a60480708dca72a13aee0e41 100644 --- a/tests/population/scaling/scaling_functions.py +++ b/tests/population/scaling/scaling_functions.py @@ -3,6 +3,20 @@ Module containing the scaling functions. """ import time +import socket +import os + +import json + +import numpy as np + +from binarycpython.utils.grid import Population + +def dummy_parsefunction(self, output): + """ + Dummy parsing function + """ + pass def get_mp_results(population, cpu_list, amt_repeats, total_systems): """ @@ -31,7 +45,7 @@ def get_mp_results(population, cpu_list, amt_repeats, total_systems): ) mp_times.append(total_mp) - mp_dict[cpu_amt] = mp_times + mp_dict[str(cpu_amt)] = mp_times return mp_dict @@ -55,3 +69,105 @@ def get_linear_results(population, amt_repeats, total_systems): linear_times.append(total_lin) return linear_times + +def run_systems_for_scaling_comparison(settings_dict): + """ + Function that runs the systems for the scaling comparison + """ + + amount_of_cpus = settings_dict['amount_of_cpus'] + amount_of_cores = settings_dict['amount_of_cores'] + amt_repeats = settings_dict['amt_repeats'] + stepsize_cpus = settings_dict['stepsize_cpus'] + testcase = settings_dict['testcase'] + plot_dir = settings_dict['plot_dir'] + result_dir = settings_dict['result_dir'] + + resolutions = settings_dict['resolutions'] + + # For each set of resolutions + for resolution in resolutions: + # Some calculated values + total_systems = int(np.prod([el for el in resolution.values()])) + hostname = socket.gethostname() + + # Generate the range of cpu numbers + cpu_list = np.arange(1, amount_of_cpus+1, stepsize_cpus) + if not cpu_list[-1] == amount_of_cpus: + cpu_list = np.append(cpu_list, np.array([amount_of_cpus])) + + ################################################################## + # Create dictionary in which to store all the results: + result_dict = {} + + # + result_dict["amt_systems"] = total_systems + result_dict["hostname"] = hostname + result_dict["amt_logical_cores"] = amount_of_cpus + result_dict["amt_of_physical_cores"] = amount_of_cores + result_dict["testcase"] = testcase + + ################# + # Configuring population + test_pop = Population() + + test_pop.set( + verbose=1, binary=1, parse_function=dummy_parsefunction, + ) + + test_pop.add_grid_variable( + name="lnm1", + longname="Primary mass", + valuerange=[1, 100], + resolution="{}".format(resolution["M_1"]), + spacingfunc="const(math.log(1), math.log(100), {})".format(resolution["M_1"]), + precode="M_1=math.exp(lnm1)", + probdist="three_part_powerlaw(M_1, 0.1, 0.5, 1.0, 100, -1.3, -2.3, -2.3)*M_1", + dphasevol="dlnm1", + parameter_name="M_1", + condition="", # Impose a condition on this grid variable. Mostly for a check for yourself + ) + + test_pop.add_grid_variable( + name="q", + longname="Mass ratio", + valuerange=["0.1/M_1", 1], + resolution="{}".format(resolution['q']), + spacingfunc="const(0.1/M_1, 1, {})".format(resolution['q']), + probdist="flatsections(q, [{'min': 0.1/M_1, 'max': 0.8, 'height': 1}, {'min': 0.8, 'max': 1.0, 'height': 1.0}])", + dphasevol="dq", + precode="M_2 = q * M_1", + parameter_name="M_2", + condition="", # Impose a condition on this grid variable. Mostly for a check for yourself + ) + + test_pop.add_grid_variable( + name="logper", + longname="log(Orbital_Period)", + valuerange=[-2, 12], + resolution="{}".format(resolution["per"]), + spacingfunc="np.linspace(-2, 12, {})".format(resolution["per"]), + precode="orbital_period = 10** logper\n", # TODO: + probdist="gaussian(logper,4.8, 2.3, -2.0, 12.0)", + parameter_name="orbital_period", + dphasevol="dln10per", + ) + + ####################################################################################### + # Execute grids + + # Linear runs + linear_times = get_linear_results(test_pop, amt_repeats, total_systems) + result_dict["linear"] = linear_times + + ####################################################################################### + # MP runs + mp_dict = get_mp_results(test_pop, cpu_list, amt_repeats, total_systems) + result_dict["mp"] = mp_dict + + print(result_dict) + + # Write to file and make sure the directory exists. + os.makedirs(result_dir, exist_ok=True) + with open(os.path.join(result_dir, "{}_{}_systems.json".format(hostname, total_systems)), "w") as f: + f.write(json.dumps(result_dict, indent=4)) diff --git a/tests/population/scaling/scaling_script.py b/tests/population/scaling/scaling_script.py index 6b8ef378730587db35f517db62c32eb86413ac52..0e3fb46097039b5911a496f2d063bf2f9394dc4f 100644 --- a/tests/population/scaling/scaling_script.py +++ b/tests/population/scaling/scaling_script.py @@ -6,9 +6,9 @@ It requires some user input, which you can define at the top of the script after The following values should be configured according to your system: - -It will then run the population you specified, first linearly <AMT_REPEATS> times, -and then using multiprocessing it will run the population <AMT_REPEATS> times each time -with more cores. (Up until <AMOUNT_OF_CPUS>) +It will then run the population you specified, first linearly <amt_repeats> times, +and then using multiprocessing it will run the population <amt_repeats> times each time +with more cores. (Up until <amount_of_cpus>) TODO: get the real evolution time instead of the total as well TODO: put the methods in functions and put them in a different file @@ -22,96 +22,45 @@ import psutil import numpy as np from binarycpython.utils.grid import Population -from scaling_functions import get_mp_results, get_linear_results - -AMT_REPEATS = 5 # Number of times the population will be repeated per cpu - # number. Useful to get some reliable statistics -RESOLUTION = {"M_1": 50, "per": 60} # Resolution of sampling of the population -RESULT_DIR = "scaling_results" # Directory where the results are written to. -PLOT_DIR = "scaling_plots" # Directory where the plots will be stored -TESTCASE = "linear vs MP batched" # `name` of the calculation -STEPSIZE_CPUS = 1 # Stepsize for the cpu number generator. Try to keep this - # low, to get the most reliable results - -AMOUNT_OF_CPUS = 4 # Amount of logical cpus the machine has. -# AMOUNT_OF_CPUS = psutil.cpu_count() - -AMOUNT_OF_CORES = 2 # The amount of physical cores. This value +from scaling_functions import get_mp_results, get_linear_results, run_systems_for_scaling_comparison +from plot_scaling import plot_speedup_and_efficiency + +settings_dict = {} +settings_dict['amt_repeats'] = 1 # Number of times the population will be repeated per cpu + # number. Better do it several times than only run it once +settings_dict['resolutions'] = [ # List of resolution of sampling of the population. Useful for checking whether population size has an effect on the results + {"M_1": 10, "per": 10, "q": 2} +] +settings_dict['result_dir'] = "scaling_results" # Relative of absolute directory where results are writting to +settings_dict['plot_dir'] = "scaling_plots" # Directory where the plots will be stored +settings_dict['testcase'] = "linear vs MP batched" # 'name' of the calculation. will be used in the plot +settings_dict['stepsize_cpus'] = 1 # Stepsize for the cpu number generator. Try to keep this + # low, to get the most reliable results +settings_dict['amount_of_cpus'] = 4 # Amount of logical cpus the machine has (this is not the same as physical cpus!) +# settings_dict['amount_of_cpus'] = psutil.cpu_count() +settings_dict['amount_of_cores'] = 2 # The amount of physical cores. This value # is not vital bit will be used in the plot -# AMOUNT_OF_CORES = psutil.cpu_count(logical=False) # You can also use the psutil function to get +# settings_dict['amount_of_cores'] = psutil.cpu_count(logical=False) # You can also use the psutil function to get # the amt of physical cores, but this isnt fully # reliable (in mar 2020 it didnt get this value # right when there were multiple sockets) - -# Some calculated values -TOTAL_SYSTEMS = int(np.prod([el for el in RESOLUTION.values()])) -HOSTNAME = socket.gethostname() - - -# Generate the range of cpu numbers -CPU_LIST = np.arange(1, AMOUNT_OF_CPUS+1, STEPSIZE_CPUS) -if not CPU_LIST[-1] == AMOUNT_OF_CPUS: - CPU_LIST = np.append(CPU_LIST, np.array([AMOUNT_OF_CPUS])) - -################################################################## -# Create dictionairy in which to store all the results: -result_dict = {} - -# -result_dict["amt_systems"] = TOTAL_SYSTEMS -result_dict["hostname"] = HOSTNAME -result_dict["amt_logical_cores"] = AMOUNT_OF_CPUS -result_dict["amt_of_physical_cores"] = AMOUNT_OF_CORES -result_dict["testcase"] = TESTCASE - -################# -# Configuring population -test_pop = Population() - -test_pop.set( - verbose=1, binary=1, -) - -test_pop.add_grid_variable( - name="M_1", - longname="log primary mass", - valuerange=[1, 100], - resolution="{}".format(RESOLUTION["M_1"]), - spacingfunc="const(1, 100, {})".format(RESOLUTION["M_1"]), - probdist="Kroupa2001(M_1)", - # probdist='self.custom_options["extra_prob_function"](M_1)', - dphasevol="dlnm1", - parameter_name="M_1", - condition="", -) - -test_pop.add_grid_variable( - name="period", - longname="period", - valuerange=["M_1", 20], - resolution="{}".format(RESOLUTION["per"]), - spacingfunc="np.linspace(1, 10, {})".format(RESOLUTION["per"]), - precode="orbital_period = period**2", - probdist="flat(orbital_period)", - parameter_name="orbital_period", - dphasevol="dper", - condition='self.grid_options["binary"]==1', -) - -####################################################################################### -# Execute grids - -# Linear runs -LINEAR_TIMES = get_linear_results(test_pop, AMT_REPEATS, TOTAL_SYSTEMS) -result_dict["linear"] = LINEAR_TIMES - -####################################################################################### -# MP runs -MP_DICT = get_mp_results(test_pop, CPU_LIST, AMT_REPEATS, TOTAL_SYSTEMS) -result_dict["mp"] = MP_DICT - -# Write to file and make sure the directory exists. -os.makedirs(RESULT_DIR, exist_ok=True) -with open(os.path.join(RESULT_DIR, "{}_{}_systems.json".format(HOSTNAME, TOTAL_SYSTEMS)), "w") as f: - f.write(json.dumps(result_dict)) +run_systems_for_scaling_comparison(settings_dict) +################################# +# Files +SCALING_RESULT_DIR = settings_dict['result_dir'] +RESULT_JSONS = [os.path.join(SCALING_RESULT_DIR, file) for file in os.listdir(SCALING_RESULT_DIR) if file.endswith('.json')] # Automatically grab all of the stuff, override it + +# FILENAMES = [ +# "david-Lenovo-IdeaPad-S340-14IWL_100_systems.json", +# "david-Lenovo-IdeaPad-S340-14IWL_2500_systems.json" +# ] +# RESULT_JSONS = [] +# for filename in FILENAMES: +# RESULT_JSONS.append(os.path.join(os.path.abspath(SCALING_RESULT_DIR), filename)) + +plot_speedup_and_efficiency( + RESULT_JSONS, + SCALING_RESULT_DIR, + "Example" +) \ No newline at end of file