diff --git a/binarycpython/utils/dicts.py b/binarycpython/utils/dicts.py new file mode 100644 index 0000000000000000000000000000000000000000..77b47f8c83905ed7cb9f6a4cc4b511e15c1cb96e --- /dev/null +++ b/binarycpython/utils/dicts.py @@ -0,0 +1,695 @@ +""" +Module containing functions that binary_c-python uses to modify +dictionaries. +""" +import astropy.units as u +import collections +import copy +import numpy as np +from collections import ( + OrderedDict, +) + +# we need to convert keys to floats: +# this is ~ a factor 10 faster than David's +# recursive_change_key_to_float routine, +# probably because this version only does +# the float conversion, nothing else. +def keys_to_floats(json_data): + # assumes nested dicts ... + #new_data = {} + + # but this copies the variable type, but has some + # pointless copying + #new_data = copy.copy(json_data) + #new_data.clear() + + # this adopts the type correctly *and* is fast + new_data = type(json_data)() + + for k,v in json_data.items(): + if isinstance(v, list): + v = [ keys_to_floats(item) if isinstance(item, collections.abc.Mapping) \ + else item for item in v ] + elif isinstance(v, collections.abc.Mapping): + # dict, ordereddict, etc. + v = keys_to_floats(v) + try: + f = float(k) + new_data[f] = json_data[k] + except: + new_data[k] = v + return new_data + +def recursive_change_key_to_float(input_dict): + """ + Function to recursively change the key to float + + This only works if the dict contains just sub-dicts or numbers/strings. + Does not work with lists as values + """ + + new_dict = collections.OrderedDict() # TODO: check if this still works + + for key in input_dict: + if isinstance(input_dict[key], (dict, collections.OrderedDict)): + try: + num_key = float(key) + new_dict[num_key] = recursive_change_key_to_float( + copy.deepcopy(input_dict[key]) + ) + except ValueError: + new_dict[key] = recursive_change_key_to_float( + copy.deepcopy(input_dict[key]) + ) + else: + try: + num_key = float(key) + new_dict[num_key] = input_dict[key] + except ValueError: + new_dict[key] = input_dict[key] + + return new_dict + + +def recursive_change_key_to_string(input_dict): + """ + Function to recursively change the key back to a string but this time in a format that we decide + """ + + new_dict = collections.OrderedDict() # TODO: check if this still works + + for key in input_dict: + if isinstance(input_dict[key], (dict, collections.OrderedDict)): + if isinstance(key, (int, float)): + string_key = "{:g}".format(key) + new_dict[string_key] = recursive_change_key_to_string( + copy.deepcopy(input_dict[key]) + ) + else: + new_dict[key] = recursive_change_key_to_string( + copy.deepcopy(input_dict[key]) + ) + else: + if isinstance(key, (int, float)): + string_key = "{:g}".format(key) + new_dict[string_key] = input_dict[key] + else: + new_dict[key] = input_dict[key] + + return new_dict + +############################################################ +# code to walk a dictionary recursively based on +# https://stackoverflow.com/questions/13687924/setting-a-value-in-a-nested-python-dictionary-given-a-list-of-indices-and-value +def _nested_set(dic, keys, value): + for key in keys[:-1]: + dic = dic.setdefault(key, {}) + dic[keys[-1]] = value +def _nested_get(dic, keys): + for key in keys[:-1]: + dic = dic.setdefault(key, {}) + return dic[keys[-1]] + +# function to walk through the dictionary, multiplying +# only float values by a const +def _recursive_normalize_floats(path,d,const,parent=None,ignore=None): + if not parent: + parent = d + for k,v in d.items(): + if ignore and k in ignore: + continue + + if isinstance(v,float): + path.append(k) + # must be a float, multiply by the constant + _nested_set(parent,path,v * const) + path.pop() + elif isinstance(v, str) or isinstance(v, int): + path.append(k) + # do nothing to strings or ints + path.pop() + elif v is None: + path.append(k) + path.pop() + # dicts + # note: isinstance isn't enough, we need to check the Mapping + elif isinstance(v,collections.abc.Mapping): + path.append(k) + # nested dict + _recursive_normalize_floats(path,v,const,parent=parent) + path.pop() + else: + print ("###Type {} not recognized: {}.{}={}".format(type(v), ".".join(path),k, v)) + + +def multiply_float_values(d,const,ignore=None): + """ + multiply_float_values : A function to recursively multiply values of a (nested) dictionary that are floats by a constant. Nested dictionaries call this function recursively. + + Args: + d = the dictionary + const = the constant that multiplies float values + """ + path=[] + _recursive_normalize_floats(path,d,const,parent=d,ignore=ignore) + +def subtract_dicts(dict_1: dict, dict_2: dict) -> dict: + """ + Function to subtract two dictionaries. + + Only allows values to be either a dict or a numerical type + + For the overlapping keys (key name present in both dicts): + When the keys are of the same type: + - If the types are of numerical type: subtract the value at dict 2 from dict 1. + - If the types are both dictionaries: call this function with the subdicts + + When the keys are not of the same type: + - if the keys are all of numerical types + + For the unique keys: + - if the key is from dict 1: adds the value to the new dict (be it numerical value or dict) + - If the key is from dict 2: Adds the negative of its value in case of numerical type. + if the type is a dict, the result of subtract_dicts({}, dict_2[key]) will be set + + If the result is 0, the key will be removed from the resulting dict. + If that results in an empty dict, the dict will be removed too. + + Args: + dict_1: first dictionary + dict_2: second dictionary + + Returns: + Subtracted dictionary + """ + + # Set up new dict + new_dict = {} + + # Define allowed numerical types + ALLOWED_NUMERICAL_TYPES = (float, int, np.float64) + + # + keys_1 = dict_1.keys() + keys_2 = dict_2.keys() + + # Find overlapping keys of both dicts + overlapping_keys = set(keys_1).intersection(set(keys_2)) + + # Find the keys that are unique + unique_to_dict_1 = set(keys_1).difference(set(keys_2)) + unique_to_dict_2 = set(keys_2).difference(set(keys_1)) + + # Add the unique keys to the new dict + for key in unique_to_dict_1: + # If these items are numerical types + if isinstance(dict_1[key], ALLOWED_NUMERICAL_TYPES): + new_dict[key] = dict_1[key] + if new_dict[key] == 0: + del new_dict[key] + + # Else, to be safe we should deepcopy them + elif isinstance(dict_1[key], dict): + copy_dict = copy.deepcopy(dict_1[key]) + new_dict[key] = copy_dict + else: + msg = "Error: using unsupported type for key {}: {}".format( + key, type(dict_1[key]) + ) + print(msg) + raise ValueError(msg) + + # Add the unique keys to the new dict + for key in unique_to_dict_2: + # If these items are numerical type, we should add the negative of the value + if isinstance(dict_2[key], ALLOWED_NUMERICAL_TYPES): + new_dict[key] = -dict_2[key] + if new_dict[key] == 0: + del new_dict[key] + + # Else we should place the negative of that dictionary in the new place + elif isinstance(dict_2[key], dict): + new_dict[key] = subtract_dicts({}, dict_2[key]) + else: + msg = "Error: using unsupported type for key {}: {}".format( + key, type(dict_2[key]) + ) + print(msg) + raise ValueError(msg) + + # Go over the common keys: + for key in overlapping_keys: + + # See whether the types are actually the same + if not type(dict_1[key]) is type(dict_2[key]): + # Exceptions: + if (type(dict_1[key]) in ALLOWED_NUMERICAL_TYPES) and ( + type(dict_2[key]) in ALLOWED_NUMERICAL_TYPES + ): + # We can safely subtract the values since they are all numeric + new_dict[key] = dict_1[key] - dict_2[key] + if new_dict[key] == 0: + del new_dict[key] + + else: + msg = "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( + key, + dict_1[key], + type(dict_1[key]), + key, + dict_2[key], + type(dict_2[key]), + ) + + print(msg) + raise ValueError(msg) + + # This is where the keys are the same + else: + # If these items are numeric types + if isinstance(dict_1[key], ALLOWED_NUMERICAL_TYPES): + new_dict[key] = dict_1[key] - dict_2[key] + + # Remove entry if the value is 0 + if new_dict[key] == 0: + del new_dict[key] + + # Else, to be safe we should deepcopy them + elif isinstance(dict_1[key], dict): + new_dict[key] = subtract_dicts(dict_1[key], dict_2[key]) + + # Remove entry if it results in an empty dict + # TODO: write test to prevent empty dicts from showing up + if not new_dict[key]: + del new_dict[key] + else: + msg = "Error: using unsupported type for key {}: {}".format( + key, type(dict_2[key]) + ) + print(msg) + raise ValueError(msg) + + # + return new_dict + +class AutoVivificationDict(dict): + """ + Implementation of perl's autovivification feature, by overriding the + get item and the __iadd__ operator (https://docs.python.org/3/reference/datamodel.html?highlight=iadd#object.__iadd__) + + This allows to set values within a subdict that might not exist yet: + + Example: + newdict = {} + newdict['example']['mass'] += 10 + print(newdict) + >>> {'example': {'mass': 10}} + """ + + def __getitem__(self, item): + try: + return dict.__getitem__(self, item) + except KeyError: + value = self[item] = type(self)() + return value + + def __iadd__(self, other): + # if a value does not exist, assume it is 0.0 + try: + self += other + except: + self = other + return self + +def inspect_dict( + input_dict: dict, indent: int = 0, print_structure: bool = True +) -> dict: + """ + Function to (recursively) inspect a (nested) dictionary. + The object that is returned is a dictionary containing the key of the input_dict, but as value + it will return the type of what the value would be in the input_dict + + In this way we inspect the structure of these dictionaries, rather than the exact contents. + + Args: + input_dict: dictionary you want to inspect + print_structure: (optional, default = True) + indent: (optional, default = 0) indent of the first output + + Returns: + Dictionary that has the same structure as the input_dict, but as values it has the + type(input_dict[key]) (except if the value is a dict) + """ + + structure_dict = collections.OrderedDict() # TODO: check if this still works + + # + for key, value in input_dict.items(): + structure_dict[key] = type(value) + + if print_structure: + print("\t" * indent, key, type(value)) + + if isinstance(value, dict): + structure_dict[key] = inspect_dict( + value, indent=indent + 1, print_structure=print_structure + ) + + return structure_dict + + +def count_keys_recursive(input_dict): + """ + Function to count the total number of keys in a dictionary + """ + + local_count = 0 + for key in input_dict.keys(): + local_count += 1 + if isinstance(input_dict[key], (dict, collections.OrderedDict)): + local_count += count_keys_recursive(input_dict[key]) + return local_count + + +def merge_dicts(dict_1: dict, dict_2: dict) -> dict: + """ + Function to merge two dictionaries in a custom way. + + Behaviour: + + When dict keys are only present in one of either: + - we just add the content to the new dict + + When dict keys are present in both, we decide based on the value types how to combine them: + - dictionaries will be merged by calling recursively calling this function again + - numbers will be added + - (opt) lists will be appended + - In the case that the instances do not match: for now I will raise an error + + Args: + dict_1: first dictionary + dict_2: second dictionary + + Returns: + Merged dictionary + + """ + + # Set up new dict + new_dict = collections.OrderedDict() # TODO: check if this still necessary + + # + keys_1 = dict_1.keys() + keys_2 = dict_2.keys() + + # Find overlapping keys of both dicts + overlapping_keys = set(keys_1).intersection(set(keys_2)) + + # Find the keys that are unique + unique_to_dict_1 = set(keys_1).difference(set(keys_2)) + unique_to_dict_2 = set(keys_2).difference(set(keys_1)) + + # Add the unique keys to the new dict + for key in unique_to_dict_1: + # If these items are ints or floats, then just put them in + if isinstance(dict_1[key], (float, int)): + new_dict[key] = dict_1[key] + # Else, to be safe we should deepcopy them + else: + copy_dict = copy.deepcopy(dict_1[key]) + new_dict[key] = copy_dict + + for key in unique_to_dict_2: + # If these items are ints or floats, then just put them in + if isinstance(dict_2[key], (float, int)): + new_dict[key] = dict_2[key] + # Else, to be safe we should deepcopy them + else: + copy_dict = copy.deepcopy(dict_2[key]) + new_dict[key] = copy_dict + + # Go over the common keys: + for key in overlapping_keys: + + # If they keys are not the same, it depends on their type whether we still deal with them at all, or just raise an error + if not type(dict_1[key]) is type(dict_2[key]): + # Exceptions: numbers can be added + if isinstance(dict_1[key], (int, float, np.float64)) and isinstance( + dict_2[key], (int, float, np.float64) + ): + new_dict[key] = dict_1[key] + dict_2[key] + + # Exceptions: versions of dicts can be merged + elif isinstance( + dict_1[key], (dict, collections.OrderedDict, type(AutoVivificationDict)) + ) and isinstance( + dict_2[key], (dict, collections.OrderedDict, type(AutoVivificationDict)) + ): + new_dict[key] = merge_dicts(dict_1[key], dict_2[key]) + + # If the above cases have not dealt with it, then we should raise an error + else: + print( + "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( + key, + dict_1[key], + type(dict_1[key]), + key, + dict_2[key], + type(dict_2[key]), + ) + ) + raise ValueError + + # Here we check for the cases that we want to explicitly catch. Ints will be added, + # floats will be added, lists will be appended (though that might change) and dicts will be + # dealt with by calling this function again. + else: + # ints + # Booleans (has to be the type Bool, not just a 0 or 1) + if isinstance(dict_1[key], bool) and isinstance(dict_2[key], bool): + new_dict[key] = dict_1[key] or dict_2[key] + + elif isinstance(dict_1[key], int) and isinstance(dict_2[key], int): + new_dict[key] = dict_1[key] + dict_2[key] + + # floats + elif isinstance(dict_1[key], float) and isinstance(dict_2[key], float): + new_dict[key] = dict_1[key] + dict_2[key] + + # lists + elif isinstance(dict_1[key], list) and isinstance(dict_2[key], list): + new_dict[key] = dict_1[key] + dict_2[key] + + # Astropy quantities (using a dummy type representing the numpy array) + elif isinstance(dict_1[key], type(np.array([1]) * u.m)) and isinstance( + dict_2[key], type(np.array([1]) * u.m) + ): + new_dict[key] = dict_1[key] + dict_2[key] + + # dicts + elif isinstance(dict_1[key], dict) and isinstance(dict_2[key], dict): + new_dict[key] = merge_dicts(dict_1[key], dict_2[key]) + + else: + print( + "Object types {}: {} ({}), {} ({}) not supported.".format( + key, + dict_1[key], + type(dict_1[key]), + dict_2[key], + type(dict_2[key]), + ) + ) + raise ValueError + + # + return new_dict + + +def update_dicts(dict_1: dict, dict_2: dict) -> dict: + """ + Function to update dict_1 with values of dict_2 in a recursive way. + + Behaviour: + + When dict keys are only present in one of either: + - we just add the content to the new dict + + When dict keys are present in both, we decide based on the value types how to combine them: + - value of dict2 will be taken + + Args: + dict_1: first dictionary + dict_2: second dictionary + + Returns: + New dictionary with Updated values + + """ + + # Set up new dict of the same type as dict_1 + # + # Note: setting directly to OrderedDict fails in some cases + # so instead we take a (shallow) copy of dict_1 which will + # have the same type as dict_1, then clear it. (There must + # be a better way to do this...) + new_dict = dict_1.copy() # OrderedDict() # TODO: check if this still works + new_dict.clear() + + keys_1 = dict_1.keys() + keys_2 = dict_2.keys() + + # Find overlapping keys of both dicts + overlapping_keys = set(keys_1).intersection(set(keys_2)) + + # Find the keys that are unique + unique_to_dict_1 = set(keys_1).difference(set(keys_2)) + unique_to_dict_2 = set(keys_2).difference(set(keys_1)) + + # Add the unique keys to the new dict + for key in unique_to_dict_1: + # If these items are ints or floats, then just put them in + if isinstance(dict_1[key], (float, int)): + new_dict[key] = dict_1[key] + # Else, to be safe we should deepcopy them + else: + copy_dict = copy.deepcopy(dict_1[key]) + new_dict[key] = copy_dict + + for key in unique_to_dict_2: + # If these items are ints or floats, then just put them in + if isinstance(dict_2[key], (float, int)): + new_dict[key] = dict_2[key] + # Else, to be safe we should deepcopy them + else: + copy_dict = copy.deepcopy(dict_2[key]) + new_dict[key] = copy_dict + + # Go over the common keys: + for key in overlapping_keys: + + # See whether the types are actually the same + if not type(dict_1[key]) is type(dict_2[key]): + # Exceptions: + if (type(dict_1[key]) in [int, float]) and ( + type(dict_2[key]) in [int, float] + ): + new_dict[key] = dict_2[key] + + else: + print( + "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( + key, + dict_1[key], + type(dict_1[key]), + key, + dict_2[key], + type(dict_2[key]), + ) + ) + raise ValueError + + # Here we check for the cases that we want to explicitly catch. Ints will be added, + # floats will be added, lists will be appended (though that might change) and dicts will be + # dealt with by calling this function again. + else: + # dicts + if isinstance(dict_1[key], dict) and isinstance(dict_2[key], dict): + new_dict[key] = update_dicts(dict_1[key], dict_2[key]) + else: + new_dict[key] = dict_2[key] + + # + return new_dict + + +def multiply_values_dict(input_dict, factor): + """ + Function that goes over dictionary recursively and multiplies the value if possible by a factor + + If the key equals "general_info", the multiplication gets skipped + """ + + for key in input_dict: + if not key == "general_info": + if isinstance(input_dict[key], (dict, collections.OrderedDict)): + input_dict[key] = multiply_values_dict(input_dict[key], factor) + else: + if isinstance(input_dict[key], (int, float)): + input_dict[key] = input_dict[key] * factor + + return input_dict + + +def custom_sort_dict(input_dict): + """ + Returns a dictionary that is ordered, but can handle numbers better than normal OrderedDict + + When the keys of the current dictionary are of mixed type, we first find all the unique types. + Sort that list of type names. Then find the values that fit that type. + Sort those and append them to the sorted keys list. + This is done until all the keys are sorted. + + All objects other than dictionary types are directly return as they are + """ + + # If the new input is a dictionary, then try to sort it + if isinstance(input_dict, (dict, collections.OrderedDict)): + new_dict = collections.OrderedDict() + + keys = input_dict.keys() + + # Check if types are the same + all_types_keys = [] + for key in keys: + if not type(key) in all_types_keys: + all_types_keys.append(type(key)) + + # If there are multiple types, then we loop over them and do a piece wise sort + if len(all_types_keys) > 1: + msg = "Different types in the same dictionary key set" + + # Create a string repr of the type name to sort them afterwards + str_types = {repr(el): el for el in all_types_keys} + + # Set up sorted keys list + sorted_keys = [] + + for key_str_type in sorted(str_types.keys()): + cur_type = str_types[key_str_type] + + cur_list = [key for key in keys if isinstance(key, cur_type)] + cur_sorted_list = sorted(cur_list) + + sorted_keys = sorted_keys + cur_sorted_list + else: + sorted_keys = sorted(keys) + + for key in sorted_keys: + new_dict[key] = custom_sort_dict(copy.deepcopy(input_dict[key])) + + return new_dict + return input_dict + +def filter_dict(arg_dict: dict, + filter_list: list) -> dict: + """ + Function to filter out keys that contain values included in + filter_list + + Args: + arg_dict: dictionary containing the argument + default key pairs of binary_c + filter_list: lists of keys to be filtered out + Returns: + filtered dictionary (pairs with NULL and Function values are removed) + """ + + new_dict = arg_dict.copy() + + for key in filter_list: + if key in new_dict: + del new_dict[key] + + return new_dict diff --git a/binarycpython/utils/ensemble.py b/binarycpython/utils/ensemble.py new file mode 100644 index 0000000000000000000000000000000000000000..5a2ff362b91a1dc045fb0ddc53925a8b9b4254f8 --- /dev/null +++ b/binarycpython/utils/ensemble.py @@ -0,0 +1,366 @@ +""" +Module containing functions to interact with binary_c's +population ensemble using the binarycpython package + +""" + +import astropy.units as u +import binarycpython.utils.moe_di_stefano_2017_data as moe_di_stefano_2017_data +from binarycpython import _binary_c_bindings +from binarycpython.utils.dicts import keys_to_floats,recursive_change_key_to_float,custom_sort_dict,recursive_change_key_to_string +import bz2 +import collections +from colorama import Fore, Back, Style +import copy +import datetime as dt +import gc +import gzip +from halo import Halo +import h5py +import humanize +import inspect +from io import StringIO +import json +import msgpack +import numpy as np +import os +import psutil +import py_rinterpolate +import re +import resource +import sys +import subprocess +import tempfile +import time +import types +from typing import Union, Any + +import simplejson +#import orjson + + +def ensemble_setting(ensemble,parameter_name): + """ + Function to get the setting of parameter_name in the given ensemble, or return the default value. + """ + value = None + + try: + value = ensemble['metadata']['settings']['population_settings']['bse_options'][parameter_name] + except KeyError: + value = None + + if value is None: + try: + value = ensemble['metadata']['settings']['population_settings']['grid_options'][parameter_name] + except KeyError: + value = None + + if value is None: + try: + value = ensemble['metadata']['settings']['population_settings']['custom_options'][parameter_name] + except KeyError: + value = None + + # not found, try the default + if value is None: + try: + value = ensemble['metadata']['settings']['binary_c_defaults'][parameter_name] + except KeyError: + value = None + + return value + +def open_ensemble(filename): + """ + Function to open an ensemble at filename for reading and decompression if required. + """ + compression = ensemble_compression(filename) + if ensemble_file_type(filename) is 'msgpack': + flags = 'rb' + else: + flags = 'rt' + if compression is 'bzip2': + file_object = bz2.open(filename,flags) + elif compression is 'gzip': + file_object = gzip.open(filename,flags) + else: + file_object = open(filename,flags) + return file_object + +def ensemble_compression(filename): + """ + Return the compression type of the ensemble file, based on its filename extension. + """ + if filename.endswith('.bz2'): + return 'bzip2' + elif filename.endswith('.gz'): + return 'gzip' + else: + return None + +def ensemble_file_type(filename): + """ + Returns the file type of an ensemble file. + """ + if '.json' in filename: + filetype = 'JSON' + elif '.msgpack' in filename: + filetype = 'msgpack' + else: + filetype = None + return filetype + +def load_ensemble(filename, + convert_float_keys=True, + select_keys=None, + timing=False): + """ + Function to load an ensemeble file, even if it is compressed, + and return its contents to as a Python dictionary. + + Args: + convert_float_keys : if True, converts strings to floats. + select_keys : a list of keys to be selected from the ensemble. + """ + + # open the file + + # load with some info to the terminal + print("Loading JSON...") + + # open the ensemble and get the file type + file_object = open_ensemble(filename) + filetype = ensemble_file_type(filename) + + if not filetype or not file_object: + print("Unknown filetype : your ensemble should be saved either as JSON or msgpack data.") + sys.exit() + + with Halo(text='Loading', interval=250, spinner='moon',color='yellow'): + tstart = time.time() + _loaded = False + def _hook(obj): + nonlocal _loaded + if _loaded == False: + _loaded = True + print("\nLoaded {} data, now putting in a dictionary".format(filetype), + flush=True) + return obj + + if filetype is 'JSON': + # orjson promises to be fast, but it doesn't seem to be + # and fails on "Infinity"... oops + #data = orjson.loads(file_object.read()) + + # simplejson is faster than standard json and "just works" + # on the big Moe set in 37s + data = simplejson.load(file_object, + object_hook=_hook) + + # standard json module + # on the big Moe set takes 42s + #data = json.load(file_object, + # object_hook=_hook) + elif filetype is 'msgpack': + data = msgpack.load(file_object, + object_hook=_hook) + + if timing: + print("\n\nTook {} s to load the data\n\n".format(time.time() - tstart), + flush=True) + + # strip non-selected keys, if a list is given in select_keys + if select_keys: + keys = list(data['ensemble'].keys()) + for key in keys: + if not key in select_keys: + del data['ensemble'][key] + + # perhaps convert floats? + tstart = time.time() + if convert_float_keys: + # timings are for 100 iterations on the big Moe data set + #data = format_ensemble_results(data) # 213s + #data = recursive_change_key_to_float(data) # 61s + data = keys_to_floats(data) # 6.94s + + if timing: + print("\n\nTook {} s to convert floats\n\n".format(time.time() - tstart), + flush=True) + + # return data + return data + + +##### +def extract_ensemble_json_from_string(binary_c_output: str) -> dict: + """ + Function to extract the ensemble_json information from a raw binary_c output string + + Args: + binary_c_output: raw binary_c output string + + Returns: + JSON dictionary with the parsed ENSEMBLE_JSON data + """ + + json_dict = None + + try: + # If there is no output just return an empty dict: + if not binary_c_output: + json_dict = {} + return json_dict + + ensemble_jsons_strings = [ + line + for line in binary_c_output.splitlines() + if line.startswith("ENSEMBLE_JSON") + ] + + json_dict = handle_ensemble_string_to_json( + ensemble_jsons_strings[0][len("ENSEMBLE_JSON ") :] + ) + + if len(ensemble_jsons_strings) > 1: + verbose_print( + "Warning: There is more than one line starting with ENSEMBLE_JSON. Taking the first, but you should check this out.", + 1, + 0, + ) + except IndexError: + verbose_print( + "Error: Couldn't extract the ensemble information from the output string", + 0, + 0, + ) + + return json_dict + + +def handle_ensemble_string_to_json(raw_output): + """ + Function that deals with the raw output of the ensemble and + creates a working JSON dictionary out of it. + + Having this wrapper makes it easy to + + Args: + raw_output: raw output of the ensemble dump by binary_c + + Returns: + json.loads(raw_output, cls=binarycDecoder) + + """ + + # return json.loads(json.dumps(ast.literal_eval(raw_output)), cls=binarycDecoder) + return json.loads(raw_output, cls=binarycDecoder) + + +def binaryc_json_serializer(obj: Any) -> Any: + """ + Custom serialiser for binary_c to use when functions are present in the dictionary + that we want to export. + + Function objects will be turned into str representations of themselves + + Args: + obj: The object that might not be serialisable + + Returns: + Either string representation of object if the object is a function, or the object itself + """ + + if inspect.isfunction(obj) or isinstance(obj, py_rinterpolate.Rinterpolate): + return str(obj) + return obj + + +class binarycDecoder(json.JSONDecoder): + """ + Custom decoder to transform the numbers that are strings to actual floats + """ + + def decode(self, s): + """ + Entry point function for decoding + """ + + result = super().decode( + s + ) # result = super(Decoder, self).decode(s) for Python 2.x + return self._decode(result) + + def _decode(self, o): + """ + Depending on the type of object is will determine whether to loop over the elements, + or try to change the type of the object from string to float + + The try except might be a somewhat rough solution but it catches all cases. + """ + + # Check if we can turn it into a float + # if isinstance(o, str) or isinstance(o, unicode): + if isinstance(o, str): + try: + return float(o) + except ValueError: + return o + elif isinstance(o, dict): + return {k: self._decode(v) for k, v in o.items()} + elif isinstance(o, list): + return [self._decode(v) for v in o] + else: + return o + + +class BinaryCEncoder(json.JSONEncoder): + """ + Encoding class function to attempt to convert things to strings. + """ + + def default(self, o): + """ + Converting function. Well, could be more precise. look at the JSON module + """ + try: + str_repr = str(o) + except TypeError: + pass + else: + return str_repr + + # Let the base class default method raise the TypeError + return json.JSONEncoder.default(self, o) + +def format_ensemble_results(ensemble_dictionary): + """ + Function to handle all the steps of formatting the ensemble output again. + + Input: + ensemble_dictionary: dictionary containing all the ensemble results + """ + + original_ensemble_results = ensemble_dictionary + + float_format_ensemble_results = recursive_change_key_to_float( + original_ensemble_results + ) + del original_ensemble_results + gc.collect() + + # Then sort the dictionary + sorted_ensemble_results = custom_sort_dict(float_format_ensemble_results) + del float_format_ensemble_results + gc.collect() + + # Then Change the keys back to a string but with a %g format. + reformatted_ensemble_results = recursive_change_key_to_string( + sorted_ensemble_results + ) + del sorted_ensemble_results + gc.collect() + + # Put back in the dictionary + return reformatted_ensemble_results diff --git a/binarycpython/utils/functions.py b/binarycpython/utils/functions.py index bf5d4cf2f8e12ab1296323c092b17d5157abe29c..5f1d3560f891a17e2fa11dadec702404c4b62b92 100644 --- a/binarycpython/utils/functions.py +++ b/binarycpython/utils/functions.py @@ -11,6 +11,8 @@ Tasks: import astropy.units as u import binarycpython.utils.moe_di_stefano_2017_data as moe_di_stefano_2017_data from binarycpython import _binary_c_bindings +from binarycpython.utils.dicts import filter_dict + import bz2 import collections from colorama import Fore, Back, Style @@ -155,37 +157,6 @@ def conv_time_units(t): units = "h" return (t, units) - -class AutoVivificationDict(dict): - """ - Implementation of perl's autovivification feature, by overriding the - get item and the __iadd__ operator (https://docs.python.org/3/reference/datamodel.html?highlight=iadd#object.__iadd__) - - This allows to set values within a subdict that might not exist yet: - - Example: - newdict = {} - newdict['example']['mass'] += 10 - print(newdict) - >>> {'example': {'mass': 10}} - """ - - def __getitem__(self, item): - try: - return dict.__getitem__(self, item) - except KeyError: - value = self[item] = type(self)() - return value - - def __iadd__(self, other): - # if a value does not exist, assume it is 0.0 - try: - self += other - except: - self = other - return self - - def bin_data(value, binwidth): """ Function that bins the data @@ -235,177 +206,6 @@ def get_size(obj, seen=None): return size -def format_ensemble_results(ensemble_dictionary): - """ - Function to handle all the steps of formatting the ensemble output again. - - Input: - ensemble_dictionary: dictionary containing all the ensemble results - """ - - original_ensemble_results = ensemble_dictionary - - float_format_ensemble_results = recursive_change_key_to_float( - original_ensemble_results - ) - del original_ensemble_results - gc.collect() - - # Then sort the dictionary - sorted_ensemble_results = custom_sort_dict(float_format_ensemble_results) - del float_format_ensemble_results - gc.collect() - - # Then Change the keys back to a string but with a %g format. - reformatted_ensemble_results = recursive_change_key_to_string( - sorted_ensemble_results - ) - del sorted_ensemble_results - gc.collect() - - # Put back in the dictionary - return reformatted_ensemble_results - - -def subtract_dicts(dict_1: dict, dict_2: dict) -> dict: - """ - Function to subtract two dictionaries. - - Only allows values to be either a dict or a numerical type - - For the overlapping keys (key name present in both dicts): - When the keys are of the same type: - - If the types are of numerical type: subtract the value at dict 2 from dict 1. - - If the types are both dictionaries: call this function with the subdicts - - When the keys are not of the same type: - - if the keys are all of numerical types - - For the unique keys: - - if the key is from dict 1: adds the value to the new dict (be it numerical value or dict) - - If the key is from dict 2: Adds the negative of its value in case of numerical type. - if the type is a dict, the result of subtract_dicts({}, dict_2[key]) will be set - - If the result is 0, the key will be removed from the resulting dict. - If that results in an empty dict, the dict will be removed too. - - Args: - dict_1: first dictionary - dict_2: second dictionary - - Returns: - Subtracted dictionary - """ - - # Set up new dict - new_dict = {} - - # Define allowed numerical types - ALLOWED_NUMERICAL_TYPES = (float, int, np.float64) - - # - keys_1 = dict_1.keys() - keys_2 = dict_2.keys() - - # Find overlapping keys of both dicts - overlapping_keys = set(keys_1).intersection(set(keys_2)) - - # Find the keys that are unique - unique_to_dict_1 = set(keys_1).difference(set(keys_2)) - unique_to_dict_2 = set(keys_2).difference(set(keys_1)) - - # Add the unique keys to the new dict - for key in unique_to_dict_1: - # If these items are numerical types - if isinstance(dict_1[key], ALLOWED_NUMERICAL_TYPES): - new_dict[key] = dict_1[key] - if new_dict[key] == 0: - del new_dict[key] - - # Else, to be safe we should deepcopy them - elif isinstance(dict_1[key], dict): - copy_dict = copy.deepcopy(dict_1[key]) - new_dict[key] = copy_dict - else: - msg = "Error: using unsupported type for key {}: {}".format( - key, type(dict_1[key]) - ) - print(msg) - raise ValueError(msg) - - # Add the unique keys to the new dict - for key in unique_to_dict_2: - # If these items are numerical type, we should add the negative of the value - if isinstance(dict_2[key], ALLOWED_NUMERICAL_TYPES): - new_dict[key] = -dict_2[key] - if new_dict[key] == 0: - del new_dict[key] - - # Else we should place the negative of that dictionary in the new place - elif isinstance(dict_2[key], dict): - new_dict[key] = subtract_dicts({}, dict_2[key]) - else: - msg = "Error: using unsupported type for key {}: {}".format( - key, type(dict_2[key]) - ) - print(msg) - raise ValueError(msg) - - # Go over the common keys: - for key in overlapping_keys: - - # See whether the types are actually the same - if not type(dict_1[key]) is type(dict_2[key]): - # Exceptions: - if (type(dict_1[key]) in ALLOWED_NUMERICAL_TYPES) and ( - type(dict_2[key]) in ALLOWED_NUMERICAL_TYPES - ): - # We can safely subtract the values since they are all numeric - new_dict[key] = dict_1[key] - dict_2[key] - if new_dict[key] == 0: - del new_dict[key] - - else: - msg = "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( - key, - dict_1[key], - type(dict_1[key]), - key, - dict_2[key], - type(dict_2[key]), - ) - - print(msg) - raise ValueError(msg) - - # This is where the keys are the same - else: - # If these items are numeric types - if isinstance(dict_1[key], ALLOWED_NUMERICAL_TYPES): - new_dict[key] = dict_1[key] - dict_2[key] - - # Remove entry if the value is 0 - if new_dict[key] == 0: - del new_dict[key] - - # Else, to be safe we should deepcopy them - elif isinstance(dict_1[key], dict): - new_dict[key] = subtract_dicts(dict_1[key], dict_2[key]) - - # Remove entry if it results in an empty dict - # TODO: write test to prevent empty dicts from showing up - if not new_dict[key]: - del new_dict[key] - else: - msg = "Error: using unsupported type for key {}: {}".format( - key, type(dict_2[key]) - ) - print(msg) - raise ValueError(msg) - - # - return new_dict - def get_moe_di_stefano_dataset(options, verbosity=0): """ @@ -1217,15 +1017,7 @@ def filter_arg_dict(arg_dict: dict) -> dict: filtered dictionary (pairs with NULL and Function values are removed) """ - old_dict = arg_dict.copy() - new_dict = {} - - for key in old_dict.keys(): - if not old_dict[key] in ["NULL", "Function"]: - if not old_dict[key] == "": - new_dict[key] = old_dict[key] - - return new_dict + return filter_dict(arg_dict.copy(),["NULL", "Function"]) def create_arg_string( @@ -1706,792 +1498,4 @@ def load_logfile(logfile: str) -> None: ######################################################## -def inspect_dict( - input_dict: dict, indent: int = 0, print_structure: bool = True -) -> dict: - """ - Function to (recursively) inspect a (nested) dictionary. - The object that is returned is a dictionary containing the key of the input_dict, but as value - it will return the type of what the value would be in the input_dict - - In this way we inspect the structure of these dictionaries, rather than the exact contents. - - Args: - input_dict: dictionary you want to inspect - print_structure: (optional, default = True) - indent: (optional, default = 0) indent of the first output - - Returns: - Dictionary that has the same structure as the input_dict, but as values it has the - type(input_dict[key]) (except if the value is a dict) - """ - - structure_dict = collections.OrderedDict() # TODO: check if this still works - - # - for key, value in input_dict.items(): - structure_dict[key] = type(value) - - if print_structure: - print("\t" * indent, key, type(value)) - - if isinstance(value, dict): - structure_dict[key] = inspect_dict( - value, indent=indent + 1, print_structure=print_structure - ) - - return structure_dict - - -def count_keys_recursive(input_dict): - """ - Function to count the total number of keys in a dictionary - """ - - local_count = 0 - for key in input_dict.keys(): - local_count += 1 - if isinstance(input_dict[key], (dict, collections.OrderedDict)): - local_count += count_keys_recursive(input_dict[key]) - return local_count - - -def merge_dicts(dict_1: dict, dict_2: dict) -> dict: - """ - Function to merge two dictionaries in a custom way. - - Behaviour: - - When dict keys are only present in one of either: - - we just add the content to the new dict - - When dict keys are present in both, we decide based on the value types how to combine them: - - dictionaries will be merged by calling recursively calling this function again - - numbers will be added - - (opt) lists will be appended - - In the case that the instances do not match: for now I will raise an error - - Args: - dict_1: first dictionary - dict_2: second dictionary - - Returns: - Merged dictionary - - """ - - # Set up new dict - new_dict = collections.OrderedDict() # TODO: check if this still necessary - - # - keys_1 = dict_1.keys() - keys_2 = dict_2.keys() - - # Find overlapping keys of both dicts - overlapping_keys = set(keys_1).intersection(set(keys_2)) - - # Find the keys that are unique - unique_to_dict_1 = set(keys_1).difference(set(keys_2)) - unique_to_dict_2 = set(keys_2).difference(set(keys_1)) - - # Add the unique keys to the new dict - for key in unique_to_dict_1: - # If these items are ints or floats, then just put them in - if isinstance(dict_1[key], (float, int)): - new_dict[key] = dict_1[key] - # Else, to be safe we should deepcopy them - else: - copy_dict = copy.deepcopy(dict_1[key]) - new_dict[key] = copy_dict - - for key in unique_to_dict_2: - # If these items are ints or floats, then just put them in - if isinstance(dict_2[key], (float, int)): - new_dict[key] = dict_2[key] - # Else, to be safe we should deepcopy them - else: - copy_dict = copy.deepcopy(dict_2[key]) - new_dict[key] = copy_dict - - # Go over the common keys: - for key in overlapping_keys: - - # If they keys are not the same, it depends on their type whether we still deal with them at all, or just raise an error - if not type(dict_1[key]) is type(dict_2[key]): - # Exceptions: numbers can be added - if isinstance(dict_1[key], (int, float, np.float64)) and isinstance( - dict_2[key], (int, float, np.float64) - ): - new_dict[key] = dict_1[key] + dict_2[key] - - # Exceptions: versions of dicts can be merged - elif isinstance( - dict_1[key], (dict, collections.OrderedDict, type(AutoVivificationDict)) - ) and isinstance( - dict_2[key], (dict, collections.OrderedDict, type(AutoVivificationDict)) - ): - new_dict[key] = merge_dicts(dict_1[key], dict_2[key]) - - # If the above cases have not dealt with it, then we should raise an error - else: - print( - "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( - key, - dict_1[key], - type(dict_1[key]), - key, - dict_2[key], - type(dict_2[key]), - ) - ) - raise ValueError - - # Here we check for the cases that we want to explicitly catch. Ints will be added, - # floats will be added, lists will be appended (though that might change) and dicts will be - # dealt with by calling this function again. - else: - # ints - # Booleans (has to be the type Bool, not just a 0 or 1) - if isinstance(dict_1[key], bool) and isinstance(dict_2[key], bool): - new_dict[key] = dict_1[key] or dict_2[key] - - elif isinstance(dict_1[key], int) and isinstance(dict_2[key], int): - new_dict[key] = dict_1[key] + dict_2[key] - - # floats - elif isinstance(dict_1[key], float) and isinstance(dict_2[key], float): - new_dict[key] = dict_1[key] + dict_2[key] - - # lists - elif isinstance(dict_1[key], list) and isinstance(dict_2[key], list): - new_dict[key] = dict_1[key] + dict_2[key] - - # Astropy quantities (using a dummy type representing the numpy array) - elif isinstance(dict_1[key], type(np.array([1]) * u.m)) and isinstance( - dict_2[key], type(np.array([1]) * u.m) - ): - new_dict[key] = dict_1[key] + dict_2[key] - - # dicts - elif isinstance(dict_1[key], dict) and isinstance(dict_2[key], dict): - new_dict[key] = merge_dicts(dict_1[key], dict_2[key]) - - else: - print( - "Object types {}: {} ({}), {} ({}) not supported.".format( - key, - dict_1[key], - type(dict_1[key]), - dict_2[key], - type(dict_2[key]), - ) - ) - raise ValueError - - # - return new_dict - - -def update_dicts(dict_1: dict, dict_2: dict) -> dict: - """ - Function to update dict_1 with values of dict_2 in a recursive way. - - Behaviour: - - When dict keys are only present in one of either: - - we just add the content to the new dict - - When dict keys are present in both, we decide based on the value types how to combine them: - - value of dict2 will be taken - - Args: - dict_1: first dictionary - dict_2: second dictionary - - Returns: - New dictionary with Updated values - - """ - - # Set up new dict of the same type as dict_1 - # - # Note: setting directly to OrderedDict fails in some cases - # so instead we take a (shallow) copy of dict_1 which will - # have the same type as dict_1, then clear it. (There must - # be a better way to do this...) - new_dict = dict_1.copy() # OrderedDict() # TODO: check if this still works - new_dict.clear() - - keys_1 = dict_1.keys() - keys_2 = dict_2.keys() - - # Find overlapping keys of both dicts - overlapping_keys = set(keys_1).intersection(set(keys_2)) - - # Find the keys that are unique - unique_to_dict_1 = set(keys_1).difference(set(keys_2)) - unique_to_dict_2 = set(keys_2).difference(set(keys_1)) - - # Add the unique keys to the new dict - for key in unique_to_dict_1: - # If these items are ints or floats, then just put them in - if isinstance(dict_1[key], (float, int)): - new_dict[key] = dict_1[key] - # Else, to be safe we should deepcopy them - else: - copy_dict = copy.deepcopy(dict_1[key]) - new_dict[key] = copy_dict - - for key in unique_to_dict_2: - # If these items are ints or floats, then just put them in - if isinstance(dict_2[key], (float, int)): - new_dict[key] = dict_2[key] - # Else, to be safe we should deepcopy them - else: - copy_dict = copy.deepcopy(dict_2[key]) - new_dict[key] = copy_dict - - # Go over the common keys: - for key in overlapping_keys: - - # See whether the types are actually the same - if not type(dict_1[key]) is type(dict_2[key]): - # Exceptions: - if (type(dict_1[key]) in [int, float]) and ( - type(dict_2[key]) in [int, float] - ): - new_dict[key] = dict_2[key] - - else: - print( - "Error key: {} value: {} type: {} and key: {} value: {} type: {} are not of the same type and cannot be merged".format( - key, - dict_1[key], - type(dict_1[key]), - key, - dict_2[key], - type(dict_2[key]), - ) - ) - raise ValueError - - # Here we check for the cases that we want to explicitly catch. Ints will be added, - # floats will be added, lists will be appended (though that might change) and dicts will be - # dealt with by calling this function again. - else: - # dicts - if isinstance(dict_1[key], dict) and isinstance(dict_2[key], dict): - new_dict[key] = update_dicts(dict_1[key], dict_2[key]) - else: - new_dict[key] = dict_2[key] - - # - return new_dict - - -def multiply_values_dict(input_dict, factor): - """ - Function that goes over dictionary recursively and multiplies the value if possible by a factor - - If the key equals "general_info", the multiplication gets skipped - """ - - for key in input_dict: - if not key == "general_info": - if isinstance(input_dict[key], (dict, collections.OrderedDict)): - input_dict[key] = multiply_values_dict(input_dict[key], factor) - else: - if isinstance(input_dict[key], (int, float)): - input_dict[key] = input_dict[key] * factor - - return input_dict - - -def custom_sort_dict(input_dict): - """ - Returns a dictionary that is ordered, but can handle numbers better than normal OrderedDict - - When the keys of the current dictionary are of mixed type, we first find all the unique types. - Sort that list of type names. Then find the values that fit that type. - Sort those and append them to the sorted keys list. - This is done until all the keys are sorted. - - All objects other than dictionary types are directly return as they are - """ - - # If the new input is a dictionary, then try to sort it - if isinstance(input_dict, (dict, collections.OrderedDict)): - new_dict = collections.OrderedDict() - - keys = input_dict.keys() - - # Check if types are the same - all_types_keys = [] - for key in keys: - if not type(key) in all_types_keys: - all_types_keys.append(type(key)) - - # If there are multiple types, then we loop over them and do a piece wise sort - if len(all_types_keys) > 1: - msg = "Different types in the same dictionary key set" - - # Create a string repr of the type name to sort them afterwards - str_types = {repr(el): el for el in all_types_keys} - - # Set up sorted keys list - sorted_keys = [] - - for key_str_type in sorted(str_types.keys()): - cur_type = str_types[key_str_type] - - cur_list = [key for key in keys if isinstance(key, cur_type)] - cur_sorted_list = sorted(cur_list) - - sorted_keys = sorted_keys + cur_sorted_list - else: - sorted_keys = sorted(keys) - - for key in sorted_keys: - new_dict[key] = custom_sort_dict(copy.deepcopy(input_dict[key])) - - return new_dict - return input_dict - - -def recursive_change_key_to_float(input_dict): - """ - Function to recursively change the key to float - - This only works if the dict contains just sub-dicts or numbers/strings. - Does not work with lists as values - """ - - new_dict = collections.OrderedDict() # TODO: check if this still works - - for key in input_dict: - if isinstance(input_dict[key], (dict, collections.OrderedDict)): - try: - num_key = float(key) - new_dict[num_key] = recursive_change_key_to_float( - copy.deepcopy(input_dict[key]) - ) - except ValueError: - new_dict[key] = recursive_change_key_to_float( - copy.deepcopy(input_dict[key]) - ) - else: - try: - num_key = float(key) - new_dict[num_key] = input_dict[key] - except ValueError: - new_dict[key] = input_dict[key] - - return new_dict - - -def recursive_change_key_to_string(input_dict): - """ - Function to recursively change the key back to a string but this time in a format that we decide - """ - - new_dict = collections.OrderedDict() # TODO: check if this still works - - for key in input_dict: - if isinstance(input_dict[key], (dict, collections.OrderedDict)): - if isinstance(key, (int, float)): - string_key = "{:g}".format(key) - new_dict[string_key] = recursive_change_key_to_string( - copy.deepcopy(input_dict[key]) - ) - else: - new_dict[key] = recursive_change_key_to_string( - copy.deepcopy(input_dict[key]) - ) - else: - if isinstance(key, (int, float)): - string_key = "{:g}".format(key) - new_dict[string_key] = input_dict[key] - else: - new_dict[key] = input_dict[key] - - return new_dict - - -##### -def extract_ensemble_json_from_string(binary_c_output: str) -> dict: - """ - Function to extract the ensemble_json information from a raw binary_c output string - - Args: - binary_c_output: raw binary_c output string - - Returns: - JSON dictionary with the parsed ENSEMBLE_JSON data - """ - - json_dict = None - - try: - # If there is no output just return an empty dict: - if not binary_c_output: - json_dict = {} - return json_dict - - ensemble_jsons_strings = [ - line - for line in binary_c_output.splitlines() - if line.startswith("ENSEMBLE_JSON") - ] - - json_dict = handle_ensemble_string_to_json( - ensemble_jsons_strings[0][len("ENSEMBLE_JSON ") :] - ) - - if len(ensemble_jsons_strings) > 1: - verbose_print( - "Warning: There is more than one line starting with ENSEMBLE_JSON. Taking the first, but you should check this out.", - 1, - 0, - ) - except IndexError: - verbose_print( - "Error: Couldn't extract the ensemble information from the output string", - 0, - 0, - ) - - return json_dict - - -def handle_ensemble_string_to_json(raw_output): - """ - Function that deals with the raw output of the ensemble and - creates a working JSON dictionary out of it. - - Having this wrapper makes it easy to - - Args: - raw_output: raw output of the ensemble dump by binary_c - - Returns: - json.loads(raw_output, cls=binarycDecoder) - - """ - - # return json.loads(json.dumps(ast.literal_eval(raw_output)), cls=binarycDecoder) - return json.loads(raw_output, cls=binarycDecoder) - - -def binaryc_json_serializer(obj: Any) -> Any: - """ - Custom serialiser for binary_c to use when functions are present in the dictionary - that we want to export. - - Function objects will be turned into str representations of themselves - - Args: - obj: The object that might not be serialisable - - Returns: - Either string representation of object if the object is a function, or the object itself - """ - - if inspect.isfunction(obj) or isinstance(obj, py_rinterpolate.Rinterpolate): - return str(obj) - return obj - - -class binarycDecoder(json.JSONDecoder): - """ - Custom decoder to transform the numbers that are strings to actual floats - """ - - def decode(self, s): - """ - Entry point function for decoding - """ - - result = super().decode( - s - ) # result = super(Decoder, self).decode(s) for Python 2.x - return self._decode(result) - - def _decode(self, o): - """ - Depending on the type of object is will determine whether to loop over the elements, - or try to change the type of the object from string to float - - The try except might be a somewhat rough solution but it catches all cases. - """ - - # Check if we can turn it into a float - # if isinstance(o, str) or isinstance(o, unicode): - if isinstance(o, str): - try: - return float(o) - except ValueError: - return o - elif isinstance(o, dict): - return {k: self._decode(v) for k, v in o.items()} - elif isinstance(o, list): - return [self._decode(v) for v in o] - else: - return o - - -class BinaryCEncoder(json.JSONEncoder): - """ - Encoding class function to attempt to convert things to strings. - """ - - def default(self, o): - """ - Converting function. Well, could be more precise. look at the JSON module - """ - try: - str_repr = str(o) - except TypeError: - pass - else: - return str_repr - - # Let the base class default method raise the TypeError - return json.JSONEncoder.default(self, o) - -def open_ensemble(filename): - """ - Function to open an ensemble at filename for reading and decompression if required. - """ - compression = ensemble_compression(filename) - if ensemble_file_type(filename) is 'msgpack': - flags = 'rb' - else: - flags = 'rt' - if compression is 'bzip2': - file_object = bz2.open(filename,flags) - elif compression is 'gzip': - file_object = gzip.open(filename,flags) - else: - file_object = open(filename,flags) - return file_object - -def ensemble_compression(filename): - """ - Return the compression type of the ensemble file, based on its filename extension. - """ - if filename.endswith('.bz2'): - return 'bzip2' - elif filename.endswith('.gz'): - return 'gzip' - else: - return None - -def ensemble_file_type(filename): - """ - Returns the file type of an ensemble file. - """ - if '.json' in filename: - filetype = 'JSON' - elif '.msgpack' in filename: - filetype = 'msgpack' - else: - filetype = None - return filetype - -def load_ensemble(filename, - convert_float_keys=True, - select_keys=None, - timing=False): - """ - Function to load an ensemeble file, even if it is compressed, - and return its contents to as a Python dictionary. - - Args: - convert_float_keys : if True, converts strings to floats. - select_keys : a list of keys to be selected from the ensemble. - """ - - # open the file - - # load with some info to the terminal - print("Loading JSON...") - - # open the ensemble and get the file type - file_object = open_ensemble(filename) - filetype = ensemble_file_type(filename) - - if not filetype or not file_object: - print("Unknown filetype : your ensemble should be saved either as JSON or msgpack data.") - exit() - - with Halo(text='Loading', interval=250, spinner='moon',color='yellow'): - tstart = time.time() - _loaded = False - def _hook(obj): - nonlocal _loaded - if _loaded == False: - _loaded = True - print("\nLoaded {} data, now putting in a dictionary".format(filetype), - flush=True) - return obj - - if filetype is 'JSON': - # orjson promises to be fast, but it doesn't seem to be - # and fails on "Infinity"... oops - #data = orjson.loads(file_object.read()) - - # simplejson is faster than standard json and "just works" - # on the big Moe set in 37s - data = simplejson.load(file_object, - object_hook=_hook) - - # standard json module - # on the big Moe set takes 42s - #data = json.load(file_object, - # object_hook=_hook) - elif filetype is 'msgpack': - data = msgpack.load(file_object, - object_hook=_hook) - - if timing: - print("\n\nTook {} s to load the data\n\n".format(time.time() - tstart), - flush=True) - - # strip non-selected keys, if a list is given in select_keys - if select_keys: - keys = list(data['ensemble'].keys()) - for key in keys: - if not key in select_keys: - del data['ensemble'][key] - - # perhaps convert floats? - tstart = time.time() - if convert_float_keys: - # we need to convert keys to floats: - # this is ~ a factor 10 faster than David's - # recursive_change_key_to_float routine, - # probably because this version only does - # the float conversion, nothing else. - def _to_float(json_data): - # assumes nested dicts ... - #new_data = {} - - # but this copies the variable type, but has some - # pointless copying - #new_data = copy.copy(json_data) - #new_data.clear() - - # this adopts the type correctly *and* is fast - new_data = type(json_data)() - - for k,v in json_data.items(): - if isinstance(v, list): - v = [ _to_float(item) if isinstance(item, collections.abc.Mapping) \ - else item for item in v ] - elif isinstance(v, collections.abc.Mapping): - # dict, ordereddict, etc. - v = _to_float(v) - try: - f = float(k) - new_data[f] = json_data[k] - except: - new_data[k] = v - return new_data - - - # timings are for 100 iterations on the big Moe data set - #data = format_ensemble_results(data) # 213s - #data = recursive_change_key_to_float(data) # 61s - data = _to_float(data) # 6.94s - - if timing: - print("\n\nTook {} s to convert floats\n\n".format(time.time() - tstart), - flush=True) - - # return data - return data - -def ensemble_setting(ensemble,parameter_name): - """ - Function to get the setting of parameter_name in the given ensemble, or return the default value. - """ - value = None - - try: - value = ensemble['metadata']['settings']['population_settings']['bse_options'][parameter_name] - except KeyError: - value = None - - if value is None: - try: - value = ensemble['metadata']['settings']['population_settings']['grid_options'][parameter_name] - except KeyError: - value = None - - if value is None: - try: - value = ensemble['metadata']['settings']['population_settings']['custom_options'][parameter_name] - except KeyError: - value = None - - # not found, try the default - if value is None: - try: - value = ensemble['metadata']['settings']['binary_c_defaults'][parameter_name] - except KeyError: - value = None - - return value - -############################################################ -# code to walk a dictionary recursively based on -# https://stackoverflow.com/questions/13687924/setting-a-value-in-a-nested-python-dictionary-given-a-list-of-indices-and-value -def _nested_set(dic, keys, value): - for key in keys[:-1]: - dic = dic.setdefault(key, {}) - dic[keys[-1]] = value -def _nested_get(dic, keys): - for key in keys[:-1]: - dic = dic.setdefault(key, {}) - return dic[keys[-1]] - -# function to walk through the dictionary, multiplying -# float values by const -def _recursive_normalize_floats(path,d,const,parent=None): - if not parent: - parent = d - for k,v in d.items(): - if isinstance(v,float): - path.append(k) - # must be a float, multiply by the constant - _nested_set(parent,path,v * const) - path.pop() - elif isinstance(v, str) or isinstance(v, int): - path.append(k) - # do nothing to strings or ints - path.pop() - elif v is None: - path.append(k) - path.pop() - # dicts - # note: isinstance isn't enough, we need to check the Mapping - elif isinstance(v,collections.abc.Mapping): - path.append(k) - # nested dict - _recursive_normalize_floats(path,v,const,parent=parent) - path.pop() - else: - print ("###Type {} not recognized: {}.{}={}".format(type(v), ".".join(path),k, v)) - -def multiply_float_values(d,const): - """ - multiply_float_values : A function to recursively multiply values of a (nested) dictionary that are floats by a constant. Nested dictionaries call this function recursively. - - Args: - d = the dictionary - const = the constant that multiplies float values - """ - path=[] - _recursive_normalize_floats(path,d,const,parent=d) - ############################################################ diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index 94d06289d2b4efb99ee5da5de67c3057644d95c1..bca7e1e69871a40f7507804af980c7acb87a80cc 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -71,27 +71,31 @@ from binarycpython.utils.functions import ( filter_arg_dict, get_help_all, return_binary_c_version_info, - binaryc_json_serializer, verbose_print, - merge_dicts, - update_dicts, - extract_ensemble_json_from_string, get_moe_di_stefano_dataset, - recursive_change_key_to_float, - custom_sort_dict, - recursive_change_key_to_string, - multiply_values_dict, - format_ensemble_results, - AutoVivificationDict, trem, conv_time_units, mem_use, ANSI_colours, check_if_in_shell, format_number, - timedelta, + timedelta +) +from binarycpython.utils.ensemble import ( + binaryc_json_serializer, + ensemble_compression, ensemble_file_type, - ensemble_compression + extract_ensemble_json_from_string, + format_ensemble_results, +) +from binarycpython.utils.dicts import ( + AutoVivificationDict, + custom_sort_dict, + merge_dicts, + multiply_values_dict, + recursive_change_key_to_float, + recursive_change_key_to_string, + update_dicts, ) # from binarycpython.utils.hpc_functions import (