Skip to content
Snippets Groups Projects
grid.py 161 KiB
Newer Older
"""
Module containing the Population grid class object.

Here all the functionality of a Population object is defined.

Useful for the user to understand the functionality,
but copying functionality isn't recommended except if you know what you are doing

Tasks:
    - TODO: add functionality to 'on-init' set arguments
    - TODO: add functionality to return the initial_abundance_hash
    - TODO: add functionality to return the isotope_hash
    - TODO: add functionality to return the isotope_list
    - TODO: add functionality to return the nuclear_mass_hash
    - TODO: add functionality to return the nuclear_mass_list
    - TODO: add functionality to return the source_list
    - TODO: add functionality to return the ensemble_list
    - TODO: consider spreading the functions over more files.
    - TODO: type the private functions
David Hendriks's avatar
David Hendriks committed
    - TODO: fix the correct object types for the default values of the bse_options
    - TODO: uncomment and implement the HPC functionality
    - TODO: think of a clean and nice way to unload and remove the custom_logging_info library from memory (and from disk)
    - TODO: think of a nice way to remove the loaded grid_code/ generator from memory. 
    - TODO: Create a designated dict for results
import os
import gc
import sys
import copy
import json
import logging
David Hendriks's avatar
David Hendriks committed
import datetime
import multiprocessing
from collections import (
    OrderedDict,
)
import setproctitle
David Hendriks's avatar
David Hendriks committed
from binarycpython.utils.grid_options_defaults import (
    grid_options_defaults_dict,
    moe_distefano_default_options,
David Hendriks's avatar
David Hendriks committed
)

from binarycpython.utils.custom_logging_functions import (
    autogen_C_logging_code,
    binary_c_log_code,
    create_and_load_logging_function,
)
from binarycpython.utils.functions import (
    get_defaults,
    return_binary_c_version_info,
    binaryc_json_serializer,
    verbose_print,
David Hendriks's avatar
David Hendriks committed
    merge_dicts,
    extract_ensemble_json_from_string,
    get_moe_distefano_dataset,
    recursive_change_key_to_float,
    custom_sort_dict,
    recursive_change_key_to_string,
# from binarycpython.utils.hpc_functions import (
#     get_condor_version,
#     get_slurm_version,
#     create_directories_hpc,
#     path_of_calling_script,
#     get_python_details,
# )
David Hendriks's avatar
David Hendriks committed
from binarycpython.utils.distribution_functions import (
David Hendriks's avatar
David Hendriks committed
    Moecache,
    LOG_LN_CONVERTER,
    get_max_multiplicity,
    Arenou2010_binary_fraction,
    raghavan2010_binary_fraction,
    Moe_de_Stefano_2017_multiplicity_fractions,
from binarycpython import _binary_c_bindings
David Hendriks's avatar
David Hendriks committed

David Hendriks's avatar
David Hendriks committed
class Population:
    """
    Population Object. Contains all the necessary functions to set up, run and process a
    population of systems
    """
        Initialisation function of the population class
David Hendriks's avatar
David Hendriks committed
        """
        # Different sections of options

        # get binary_c defaults and create a cleaned up dict
        # Setting stuff will check against the defaults to see if the input is correct.
        self.defaults = get_defaults()
        self.cleaned_up_defaults = self._cleanup_defaults()
        self.available_keys = list(self.defaults.keys())
David Hendriks's avatar
David Hendriks committed
        self.special_params = [
            el for el in list(self.defaults.keys()) if el.endswith("%d")
        ]
David Hendriks's avatar
David Hendriks committed
        self.bse_options = {}  # bse_options is just empty.

        # Grid options
        self.grid_options = copy.deepcopy(grid_options_defaults_dict)

        # Custom options
        self.custom_options = {}

        # Load M&s options
        self.grid_options['m&s_options'] = copy.deepcopy(moe_distefano_default_options)

        # Write M&S options to a file
        os.makedirs(os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), exist_ok=True)
        with open(os.path.join(os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), "moeopts.dat"), "w") as f:
            f.write(json.dumps(self.grid_options['m&s_options'], indent=4))

        # Argline dict
        self.argline_dict = {}

        self.grid_options["_main_pid"] = os.getpid()
David Hendriks's avatar
David Hendriks committed
        # Create location to store results. Users should write to this dictionary.
        self.grid_results = {}

        # Create location where ensemble results are written to
        self.grid_ensemble_results = {}

    ###################################################
    # Argument functions
    ###################################################

    # General flow of generating the arguments for the binary_c call:
    # - user provides parameter and value via set (or manually but that is risky)
    # - The parameter names of these input get compared to the parameter names in the self.defaults;
    #    with this, we know that its a valid parameter to give to binary_c.
    # - For a single system, the bse_options will be written as a arg line
    # - For a population the bse_options will get copied to a temp_bse_options dict and updated with
    #   all the parameters generated by the grid
    # I will NOT create the argument line by fully writing ALL the defaults and overriding user
    # input, that seems not necessary because by using the get_defaults() function we already
    # know for sure which parameter names are valid for the binary_c version
    # And because binary_c uses internal defaults, its not necessary to explicitly pass them.
    # I do however suggest everyone to export the binary_c defaults to a file, so that you know
    # exactly which values were the defaults.
    def set(self, **kwargs) -> None:
        Function to set the values of the population. This is the preferred method to set values
        of functions, as it provides checks on the input.

        the bse_options will get populated with all the those that have a key that is present
        in the self.defaults

        the grid_options will get updated with all the those that have a key that is present
        in the self.grid_options

        If neither of above is met; the key and the value get stored in a custom_options dict.

        Args:
            via kwargs all the arguments are either set to binary_c parameters, grid_options or custom_options (see above)
        # Select the params that end with %d

        # Go over all the input
            # Filter out keys for the bse_options
            if key in self.defaults.keys():
David Hendriks's avatar
David Hendriks committed
                verbose_print(
                    "adding: {}={} to BSE_options".format(key, kwargs[key]),
                    self.grid_options["verbosity"],
                    1,
                )
                self.bse_options[key] = kwargs[key]

            # Extra check to check if the key fits one of parameter names that end with %d
David Hendriks's avatar
David Hendriks committed
            elif any(
                [
                    True
                    if (key.startswith(param[:-2]) and len(param[:-2]) < len(key))
                    else False
David Hendriks's avatar
David Hendriks committed
                ]
            ):
                verbose_print(
                    "adding: {}={} to BSE_options by catching the %d".format(
                        key, kwargs[key]
                    ),
                    self.grid_options["verbosity"],
                    1,
                )
                self.bse_options[key] = kwargs[key]

            # Filter out keys for the grid_options
            elif key in self.grid_options.keys():
David Hendriks's avatar
David Hendriks committed
                verbose_print(
                    "adding: {}={} to grid_options".format(key, kwargs[key]),
                    self.grid_options["verbosity"],
                    1,
                )
            # The of the keys go into a custom_options dict
            else:
                    "!! Key doesnt match previously known parameter: \
David Hendriks's avatar
David Hendriks committed
                    adding: {}={} to custom_options".format(
                        key, kwargs[key]
                    )
    def parse_cmdline(self) -> None:
        Function to handle settings values via the command line.
        Best to be called after all the .set(..) lines, and just before the .evolve() is called
        If you input any known parameter (i.e. contained in grid_options, defaults/bse_options
        or custom_options), this function will attempt to convert the input from string
        (because everything is string) to the type of the value that option had before.
David Hendriks's avatar
David Hendriks committed

        The values of the bse_options are initially all strings, but after user input they
        can change to ints.
David Hendriks's avatar
David Hendriks committed

        The value of any new parameter (which will go to custom_options) will be a string.

        Tasks:
            - TODO: remove the need for --cmdline
        parser.add_argument(
            "--cmdline",
            help='Setting values via the commandline. Input like --cmdline "metallicity=0.02"',
        )
        args = parser.parse_args()

        # How its set up now is that as input you need to give --cmdline "metallicity=0.002"
        # Its checked if this exists and handled accordingly.
        if args.cmdline:
David Hendriks's avatar
David Hendriks committed
            verbose_print(
                "Found cmdline args. Parsing them now",
                self.grid_options["verbosity"],
                1,
            )
            # Grab the input and split them up, while accepting only non-empty entries
            cmdline_args = args.cmdline
            self.grid_options["_commandline_input"] = cmdline_args
            split_args = [
                cmdline_arg
                for cmdline_arg in cmdline_args.split(" ")
                if not cmdline_arg == ""
            ]

            # Make dict and fill it
            cmdline_dict = {}
            for cmdline_arg in split_args:
                old_value_found = False

                # Find an old value
                if parameter in self.grid_options:
                    old_value = self.grid_options[parameter]
                    old_value_found = True

                elif parameter in self.defaults:
                    old_value = self.defaults[parameter]
                    old_value_found = True

                elif parameter in self.custom_options:
                    old_value = self.custom_options[parameter]
                    old_value_found = True

                # (attempt to) convert
                if old_value_found:
                    try:
David Hendriks's avatar
David Hendriks committed
                        verbose_print(
                            "Converting type of {} from {} to {}".format(
                                parameter, type(value), type(old_value)
                            ),
                            self.grid_options["verbosity"],
                            1,
                        )
                        value = type(old_value)(value)
David Hendriks's avatar
David Hendriks committed
                        verbose_print("Success!", self.grid_options["verbosity"], 1)
David Hendriks's avatar
David Hendriks committed
                        verbose_print(
                            "Tried to convert the given parameter {}/value {} to its correct type {} (from old value {}). But that wasn't possible.".format(
                                parameter, value, type(old_value), old_value
                            ),
                            self.grid_options["verbosity"],
                            0,
                        )
                # Add to dict
                cmdline_dict[parameter] = value

            # unpack the dictionary into the setting function that handles where the values are set
            self.set(**cmdline_dict)

    def _return_argline(self, parameter_dict=None):
        """
        Function to create the string for the arg line from a parameter dict
        """

        if not parameter_dict:
            parameter_dict = self.bse_options

        for param_name in sorted(parameter_dict):
            argline += "{} {} ".format(param_name, parameter_dict[param_name])
        argline = argline.strip()
        return argline

    def last_grid_variable(self):
        """
        Functon that returns the last grid variable
        (i.e. the one with the highest grid_variable_number)
        """

        number = len(self.grid_options["_grid_variables"])
        for grid_variable in self.grid_options["_grid_variables"]:
David Hendriks's avatar
David Hendriks committed
            if (
                self.grid_options["_grid_variables"][grid_variable][
                    "grid_variable_number"
                ]
                == number - 1
            ):
        name: str,
        longname: str,
        valuerange: Union[list, str],
        resolution: str,
        spacingfunc: str,
        probdist: str,
        parameter_name: str,
David Hendriks's avatar
David Hendriks committed
        gridtype: str = "edge",
        branchpoint: int = 0,
        precode: Union[str, None] = None,
        condition: Union[str, None] = None,
        Function to add grid variables to the grid_options.

        TODO: Fix this complex function.

        The execution of the grid generation will be through a nested forloop.
        Each of the grid variables will get create a deeper for loop.
David Hendriks's avatar
David Hendriks committed
        The real function that generates the numbers will get written to a new file in the TMP_DIR,
David Hendriks's avatar
David Hendriks committed
        beware that if you insert some destructive piece of code, it will be executed anyway.
David Hendriks's avatar
David Hendriks committed

                name of parameter. This is evaluated as a parameter and you can use it throughout
                the rest of the function
                example: name = 'lnm1'
            longname:
                Long name of parameter
                example: longname = 'Primary mass'
            range:
                Range of values to take. Does not get used really, the spacingfunction is used to
                get the values from
                example: range = [math.log(m_min), math.log(m_max)]
            resolution:
David Hendriks's avatar
David Hendriks committed
                Resolution of the sampled range (amount of samples).
                example: resolution = resolution["M_1"]
            spacingfunction:
David Hendriks's avatar
David Hendriks committed
                Function determining how the range is sampled. You can either use a real function,
                or a string representation of a function call. Will get written to a file and
                then evaluated.
                example:
                    spacingfunction = "const(math.log(m_min), math.log(m_max), {})".format(
                        resolution['M_1']
                    )
David Hendriks's avatar
David Hendriks committed
                Extra room for some code. This code will be evaluated within the loop of the
                sampling function (i.e. a value for lnm1 is chosen already)
                example: precode = 'M_1=math.exp(lnm1);'
            probdist:
                FUnction determining the probability that gets asigned to the sampled parameter
                example: probdist = 'Kroupa2001(M_1)*M_1'
            dphasevol:
                part of the parameter space that the total probability is calculated with. Put to -1
                if you want to ignore any dphasevol calculations and set the value to 1
                example: dphasevol = 'dlnm1'
            condition:
                condition that has to be met in order for the grid generation to continue
                example: condition = 'self.grid_options['binary']==1'
                Method on how the value range is sampled. Can be either 'edge' (steps starting at
                the lower edge of the value range) or 'center'
                (steps starting at lower edge + 0.5 * stepsize).
        # TODO: Add check for the gridtype input value
        # TODO: add functionality for branchpoint
        # Add grid_variable
        grid_variable = {
            "name": name,
            "longname": longname,
            "precode": precode,
            "probdist": probdist,
            "dphasevol": dphasevol,
            "parameter_name": parameter_name,
            "branchpoint": branchpoint,
            "grid_variable_number": len(self.grid_options["_grid_variables"]),
        self.grid_options["_grid_variables"][grid_variable["name"]] = grid_variable
David Hendriks's avatar
David Hendriks committed
        verbose_print(
            "Added grid variable: {}".format(json.dumps(grid_variable, indent=4)),
            self.grid_options["verbosity"],
            1,
        )

    ###################################################
    # Return functions
    ###################################################

    def return_population_settings(self) -> dict:
        """
        Function that returns all the options that have been set.

        Can be combined with json to make a nice file.
        Returns:
            dictionary containing "bse_options", "grid_options", "custom_options"
            "bse_options": self.bse_options,
            "grid_options": self.grid_options,
            "custom_options": self.custom_options,
    def _return_binary_c_version_info(self, parsed=False):
        Function that returns the version information of binary_c
        version_info = return_binary_c_version_info(parsed=parsed)
    def _return_binary_c_defaults(self):
        """
        Function that returns the defaults of the binary_c version that is used.
        """
    def return_all_info(
        include_population_settings: bool = True,
        include_binary_c_defaults: bool = True,
        include_binary_c_version_info: bool = True,
        include_binary_c_help_all: bool = True,
        """
        Function that returns all the information about the population and binary_c
            include_population_settings:
                whether to include the population_settings (see function return_population_settings)
            include_binary_c_defaults:
                whether to include a dict containing the binary_c parameters and their default
                values
            include_binary_c_version_info:
                whether to include a dict containing all the binary_c version info
                (see return_binary_c_version_info)
            include_binary_c_help_all:
                whether to include a dict containing all the information about
                the binary_c parameters (see get_help_all)

        Return:
            dictionary containing all, or part of, the above dictionaries
        if include_population_settings:
            population_settings = self.return_population_settings()
            all_info["population_settings"] = population_settings

        #
        if include_binary_c_defaults:
            binary_c_defaults = self._return_binary_c_defaults()
            all_info["binary_c_defaults"] = binary_c_defaults

        if include_binary_c_version_info:
            binary_c_version_info = return_binary_c_version_info(parsed=True)
            all_info["binary_c_version_info"] = binary_c_version_info

        if include_binary_c_help_all:
            binary_c_help_all_info = get_help_all(print_help=False)
            all_info["binary_c_help_all"] = binary_c_help_all_info
    def export_all_info(
        use_datadir: bool = True,
        outfile: Union[str, None] = None,
        include_population_settings: bool = True,
        include_binary_c_defaults: bool = True,
        include_binary_c_version_info: bool = True,
        include_binary_c_help_all: bool = True,
David Hendriks's avatar
David Hendriks committed
    ) -> Union[str, None]:
        Function that exports the all_info to a json file
        Tasks:
            - TODO: if any of the values in the dicts here is of a not-serializable form, then we need
                to change that to a string or something so, use a recursive function that goes over the
                all_info dict and finds those that fit
            - TODO: Fix to write things to the directory. which options do which etc
            - TODO: theres flawed logic here. rewrite this part pls
            - TODO: consider actually just removing the whole 'output to file' part and let the user do this.
        Args:
            include_population_settings: whether to include the population_settings (see function return_population_settings)
            include_binary_c_defaults: whether to include a dict containing the binary_c parameters and their default values
            include_binary_c_version_info: whether to include a dict containing all the binary_c version info (see return_binary_c_version_info)
            include_binary_c_help_all: whether to include a dict containing all the information about the binary_c parameters (see get_help_all)
            use_datadir: boolean whether to use the custom_options['data_dir'] to write the file to. If the  custom_options["base_filename"] is set, the output file will be called <custom_options["base_filename"]>_settings.json. Otherwise a file called simulation_<date+time>_settings.json will be created
            outfile: if use_datadir is false, a custom filename will be used
        all_info = self.return_all_info(
            include_population_settings=include_population_settings,
            include_binary_c_defaults=include_binary_c_defaults,
            include_binary_c_version_info=include_binary_c_version_info,
            include_binary_c_help_all=include_binary_c_help_all,
        )
David Hendriks's avatar
David Hendriks committed
        # Copy dict
        all_info_cleaned = copy.deepcopy(all_info)

            if not self.custom_options.get("base_filename", None):
                base_name = "simulation_{}".format(
                    datetime.datetime.strftime(datetime.datetime.now(), "%Y%m%d_%H%M%S")
                )
David Hendriks's avatar
David Hendriks committed
            else:
                base_name = os.path.splitext(self.custom_options["base_filename"])[0]

            settings_name = base_name + "_settings.json"
            os.makedirs(self.custom_options["data_dir"], exist_ok=True)
            settings_fullname = os.path.join(
                self.custom_options["data_dir"], settings_name
            )
David Hendriks's avatar
David Hendriks committed
            verbose_print(
                "Writing settings to {}".format(settings_fullname),
                self.grid_options["verbosity"],
                1,
            )
            with open(settings_fullname, "w") as file:
David Hendriks's avatar
David Hendriks committed
                    json.dumps(
David Hendriks's avatar
David Hendriks committed
                        all_info_cleaned,
                        indent=4,
                        default=binaryc_json_serializer,
David Hendriks's avatar
David Hendriks committed
            return settings_fullname
David Hendriks's avatar
David Hendriks committed
            verbose_print(
                "Writing settings to {}".format(outfile),
                self.grid_options["verbosity"],
                1,
            )
David Hendriks's avatar
David Hendriks committed
            if not outfile.endswith("json"):
                verbose_print(
                    "Error: outfile ({}) must end with .json".format(outfile),
                    self.grid_options["verbosity"],
                    0,
                )
                raise ValueError
David Hendriks's avatar
David Hendriks committed

David Hendriks's avatar
David Hendriks committed
                file.write(
                    json.dumps(
                        all_info_cleaned, indent=4, default=binaryc_json_serializer
                    )
                )
David Hendriks's avatar
David Hendriks committed
            return outfile
        Function/routine to set all the custom logging so that the function memory pointer
        is known to the grid.
        """

        # C_logging_code gets priority of C_autogen_code
David Hendriks's avatar
David Hendriks committed
        verbose_print(
            "Creating and loading custom logging functionality",
            self.grid_options["verbosity"],
            1,
        )
            # Generate entire shared lib code around logging lines
            custom_logging_code = binary_c_log_code(
            (
                self.grid_options["custom_logging_func_memaddr"],
                self.grid_options["_custom_logging_shared_library_file"],
            ) = create_and_load_logging_function(
David Hendriks's avatar
David Hendriks committed
                custom_logging_code,
                verbose=self.grid_options["verbosity"],
                custom_tmp_dir=self.grid_options["tmp_dir"],
        elif self.grid_options["C_auto_logging"]:
            # Generate real logging code
            logging_line = autogen_C_logging_code(
                self.grid_options["C_auto_logging"],
            # Generate entire shared lib code around logging lines
            custom_logging_code = binary_c_log_code(
                logging_line, verbose=self.grid_options["verbosity"]
            (
                self.grid_options["custom_logging_func_memaddr"],
                self.grid_options["_custom_logging_shared_library_file"],
            ) = create_and_load_logging_function(
David Hendriks's avatar
David Hendriks committed
                custom_logging_code,
                verbose=self.grid_options["verbosity"],
                custom_tmp_dir=self.grid_options["tmp_dir"],
David Hendriks's avatar
David Hendriks committed

    ###################################################
    # Ensemble functions
    ###################################################

    # Now they are stored in the _process_run_population thing.
    # Needed less code since they all
    ###################################################
    # Evolution functions
    ###################################################
    def _pre_run_cleanup(self):
        """
        Function to clean up some stuff in the grid before a run (like results, ensemble results etc)
        """

        # empty results
        self.grid_options["results"] = {}
    def evolve(self) -> None:
David Hendriks's avatar
David Hendriks committed
        Entrypoint function of the whole object. From here, based on the settings,
        we set up a SLURM or CONDOR grid, or if no setting is given we go straight
        to evolving the population
        There are no direct arguments to this function, rather it is based on the grid_options settings:
            grid_options['slurm']: integer boolean whether to use a slurm_grid evolution
            grid_options['condor']: integer boolean whether to use a condor_grid evolution

        If neither of the above is set, we continue without using HPC routines
        (that doesn't mean this cannot be run on a server with many cores)
David Hendriks's avatar
David Hendriks committed

        Returns an dictionary containing the analytics of the run
        TODO: change the way this is done. Slurm & CONDOR should probably do this different
        # Just to make sure we don't have stuff from a previous run hanging around
        self._pre_run_cleanup()

        # Check which type:
        if self.grid_options["slurm"] == 1:
            # Execute slurm subroutines
        elif self.grid_options["condor"] == 1:
            # Execute condor subroutines
            # Execute population evolution subroutines
            self.evolve_population()
        # Put all interesting stuff in a variable and output that afterwards, as analytics of the run.
        analytics_dict = {
            "population_name": self.grid_options["_population_id"],
            "evolution_type": self.grid_options["evolution_type"],
            "failed_count": self.grid_options["_failed_count"],
            "failed_prob": self.grid_options["_failed_prob"],
            "failed_systems_error_codes": self.grid_options[
                "_failed_systems_error_codes"
            ].copy(),
            "errors_exceeded": self.grid_options["_errors_exceeded"],
            "errors_found": self.grid_options["_errors_found"],
            "total_probability": self.grid_options["_probtot"],
            "total_count": self.grid_options["_count"],
            "start_timestamp": self.grid_options["_start_time_evolution"],
            "end_timestamp": self.grid_options["_end_time_evolution"],
David Hendriks's avatar
David Hendriks committed
            "total_mass_run": self.grid_options["_total_mass_run"],
            "total_probability_weighted_mass_run": self.grid_options[
                "_total_probability_weighted_mass_run"
            ],
David Hendriks's avatar
David Hendriks committed
            "zero_prob_stars_skipped": self.grid_options['_zero_prob_stars_skipped']
        # Clean up code: remove files, unset values, unload interpolators etc. This is placed in the general evolve function,
    def evolve_population(self):
        """
        Function to evolve populations. This handles the setting up, evolving
        and cleaning up of a population of stars.
        Choices here are:
            to evolve a population via multiprocessing or linearly on 1 core.
            NOT IMPLEMENTED YET to evolve a population via a variable grid, a source file or MC
        Tasks:
            - TODO: include options for different ways of generating a population here. (i.e. MC or source file)
        # Reset some settings: population_id, results, ensemble_results etc
        self.grid_options["_population_id"] = uuid.uuid4().hex
        ##
        # Prepare code/initialise grid.
        # set custom logging, set up store_memaddr, build grid code. dry run grid code.
        ##
        # Evolve systems: via grid_options one can choose to do this linearly, or
        # multiprocessing method.
        if (
            self.grid_options["evolution_type"]
            in self.grid_options["_evolution_type_options"]
            if self.grid_options["evolution_type"] == "grid":
                self._evolve_population_grid()
            # elif self.grid_options["evolution_type"] == "mc":
            #     # TODO: add MC option
                "Warning. you chose a wrong option for the grid evolution types.\
                Please choose from the following: {}.".format(
                    self.grid_options["_evolution_type_options"]
        self.grid_options["_end_time_evolution"] = time.time()

        # Log and print some information
            "Population-{} finished! The total probability was: {}. It took a total of {}s to run {} systems on {} cores".format(
David Hendriks's avatar
David Hendriks committed
                self.grid_options["_population_id"],
                self.grid_options["_probtot"],
David Hendriks's avatar
David Hendriks committed
                self.grid_options["_end_time_evolution"]
                - self.grid_options["_start_time_evolution"],
                self.grid_options["_total_starcount"],
                self.grid_options["amt_cores"],
            ),
            self.grid_options["verbosity"],
            0,
        if self.grid_options["_errors_found"]:
            # Some information afterwards
            verbose_print(
                "During the run {} failed systems were found, with a total probability of {} and with the following unique error codes: {} ".format(
                    self.grid_options["_failed_count"],
                    self.grid_options["_failed_prob"],
                    self.grid_options["_failed_systems_error_codes"],
                ),
                self.grid_options["verbosity"],
                0,
            )
            # Some information afterwards
            verbose_print(
                "The full argline commands for {} these systems have been written to {}".format(
                    "ALL"
                    if not self.grid_options["_errors_exceeded"]
                    else "SOME (only the first ones, as there were too many to log all of them)",
                    os.path.join(
                        self.grid_options["tmp_dir"],
                        "failed_systems_{}_X.txt".format(
                            self.grid_options["_population_id"]
                        ),
                    ),
                ),
                self.grid_options["verbosity"],
                0,
David Hendriks's avatar
David Hendriks committed
        else:
David Hendriks's avatar
David Hendriks committed
            verbose_print(
                "There were no errors found in this run.",
                self.grid_options["verbosity"],
                0,
            )
    def get_stream_logger(self, level=logging.DEBUG):
        """Return logger with configured StreamHandler."""
        stream_logger = logging.getLogger("stream_logger")
        stream_logger.handlers = []
        stream_logger.setLevel(level)
        sh = logging.StreamHandler()
        sh.setLevel(level)
        fmt = "[%(asctime)s %(levelname)-8s %(processName)s] --- %(message)s"
        formatter = logging.Formatter(fmt)
        sh.setFormatter(formatter)
        stream_logger.addHandler(sh)

        return stream_logger

    def system_queue_filler(self, job_queue, amt_cores):
        """
        Function that is responsible for keeping the queue filled.

        This will generate the systems until it is full, and then keeps trying to fill it.
        Will have to play with the size of this.
        """
        stream_logger = self.get_stream_logger()
        stream_logger.debug(f"setting up the system_queue_filler now")

        # Setup of the generator
        self._generate_grid_code(dry_run=False)

        self._load_grid_function()

        generator = self.grid_options["_system_generator"](self, print_results=False)
        # TODO: build in method to handle with the HPC.
        # Continously fill the queue
        for system_number, system_dict in enumerate(generator):
            # Put job in queue
            # Print some info
            # stream_logger.debug(f"producing: {system_number}")  # DEBUG
            verbose_print(
David Hendriks's avatar
David Hendriks committed
                "Queue produced system {}".format(system_number),
                self.grid_options["verbosity"],
                2,
            )

            # Print current size
            # print("Current size: {}".format(save_que.qsize()))

            # if system_number%10==0:
            #     print("system_queue_filler: system_number: {}".format(system_number))
            #     bytes_size_Moecache = get_size(Moecache)
            #     print("\tsystem_queue_filler: Size moecache: {}".format(convert_bytes(bytes_size_Moecache)))

            #     bytes_size_distribution_constants = get_size(distribution_constants)
            #     print("\tsystem_queue_filler: Size distribution_constants: {}".format(convert_bytes(bytes_size_distribution_constants)))

            #     bytes_size_self = get_size(dir(self))
            #     print("\tsystem_queue_filler: Size dir(self): {}".format(convert_bytes(bytes_size_self)))

        # Send closing signal to workers. When they receive this they will terminate
        stream_logger.debug(f"Signaling stop to processes")  # DEBUG
        for _ in range(amt_cores):
            job_queue.put("STOP")

David Hendriks's avatar
David Hendriks committed
    def format_ensemble_results(self, ensemble_dictionary):
        """
        Function to handle all the steps of formatting the ensemble output again.

        Input:
            ensemble_dictionary: dictionary containing all the ensemble results
        """

        original_ensemble_results = ensemble_dictionary

        float_format_ensemble_results = recursive_change_key_to_float(original_ensemble_results)
        del original_ensemble_results
        gc.collect()

        # Then sort the dictionary
        sorted_ensemble_results = custom_sort_dict(float_format_ensemble_results)
        del float_format_ensemble_results
        gc.collect()

        # Then Change the keys back to a string but with a %g format.
        reformatted_ensemble_results = recursive_change_key_to_string(sorted_ensemble_results)
        del sorted_ensemble_results
        gc.collect()

        # Put back in the dictionary
        return reformatted_ensemble_results

    def _evolve_population_grid(self):
        """
        Function to evolve the population with multiprocessing approach.
        Using pathos to be able to include class-owned functions.

        This function will create a pool with <self.grid_options["amt_cores"]> processes, and
        perform an imap_unordered to run the different `threads`.
        Before this was done by giving a generator as the iterable, and have the processes get a
        certain chunksize each round.
        Later on this seemed to be a bad decision, because it is difficult to pass information
        back to the main controller, and because with each new batch of systems a new object instance was created.

        What I do now is I spawn these X amount of processes, and pass a range(self.grid_options["amt_cores"]) as iterable.
        In that way, only once do they fetch a `job`, but that job is just a ID number.
        With this ID number each thread/process loops over the whole generator,
        but only runs the one <ID>'th system (if (localcounter+ID) % self.grid_options["amt_cores"]==0)'

        When they are finished, these jobs are instructed to return a set of information
        (the result dict, TODO: describe what more)

        These resultation dictionaries are then merged and stored as object properties again.
        """

        # TODO: make further use of a queue to handle jobs or at least
        #   get information on the process ids etc
        # https://stackoverflow.com/questions/10190981/get-a-unique-id-for-worker-in-python-multiprocessing-pool
        # https://stackoverflow.com/questions/8640367/python-manager-dict-in-multiprocessing/9536888
        #   for muting values through dicts
        # https://python-forum.io/Thread-Dynamic-updating-of-a-nested-dictionary-in-multiprocessing-pool
        # https://stackoverflow.com/questions/28740955/working-with-pathos-multiprocessing-tool-in-python-and

        # TODO: make good example of how to deal with a result_dict
        # https://www.programcreek.com/python/example/58176/multiprocessing.Value
        # https://stackoverflow.com/questions/17377426/shared-variable-in-pythons-multiprocessing

        # Set process name
        setproctitle.setproctitle('binarycpython parent process')
        setproctitle.setthreadtitle("binarycpyhon parent thread")
        # Set up the manager object that can share info between processes
        manager = multiprocessing.Manager()
        job_queue = manager.Queue(maxsize=self.grid_options["max_queue_size"])
        result_queue = manager.Queue(maxsize=self.grid_options["amt_cores"])