diff --git a/binarycpython/utils/distribution_functions.py b/binarycpython/utils/distribution_functions.py index cdb941bb4858f25febccdf3720eb401ff18b5a26..146d45f1ec9e6f4ae4b69d987bd142381a47c807 100644 --- a/binarycpython/utils/distribution_functions.py +++ b/binarycpython/utils/distribution_functions.py @@ -1867,7 +1867,7 @@ def Moe_di_Stefano_2017_pdf(options, verbosity=0): """ verbose_print( - "\tMoe_di_Stefano_2017_pdf with options:\n\t\t{}".format(json.dumps(options)), + "\tMoe_di_Stefano_2017_pdf with options:\n\t\t{}".format(json.dumps(options,ensure_ascii=False)), verbosity, _MOE2017_VERBOSITY_LEVEL, ) diff --git a/binarycpython/utils/ensemble.py b/binarycpython/utils/ensemble.py index 72a0ced7d9879a9398135c70adb028e729b447d5..b61ff253db7e54053f6c2d66a18b5d80059af380 100644 --- a/binarycpython/utils/ensemble.py +++ b/binarycpython/utils/ensemble.py @@ -72,7 +72,7 @@ def ensemble_setting(ensemble, parameter_name): return value -def open_ensemble(filename): +def open_ensemble(filename,encoding='utf-8'): """ Function to open an ensemble at filename for reading and decompression if required. """ @@ -82,11 +82,11 @@ def open_ensemble(filename): else: flags = "rt" if compression is "bzip2": - file_object = bz2.open(filename, flags) + file_object = bz2.open(filename, flags, encoding=encoding) elif compression is "gzip": - file_object = gzip.open(filename, flags) + file_object = gzip.open(filename, flags, encoding=encoding) else: - file_object = open(filename, flags) + file_object = open(filename, flags, encoding=encoding) return file_object @@ -115,7 +115,7 @@ def ensemble_file_type(filename): return filetype -def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=False): +def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=False, flush=False): """ Function to load an ensemeble file, even if it is compressed, and return its contents to as a Python dictionary. @@ -128,7 +128,7 @@ def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=Fa # open the file # load with some info to the terminal - print("Loading JSON...") + print("Loading JSON...",flush=flush) # open the ensemble and get the file type file_object = open_ensemble(filename) @@ -136,7 +136,7 @@ def load_ensemble(filename, convert_float_keys=True, select_keys=None, timing=Fa if not filetype or not file_object: print( - "Unknown filetype : your ensemble should be saved either as JSON or msgpack data." + "Unknown filetype : your ensemble should be saved either as JSON or msgpack data.",flush=flush ) sys.exit() @@ -261,8 +261,6 @@ def handle_ensemble_string_to_json(raw_output): json.loads(raw_output, cls=binarycDecoder) """ - - # return json.loads(json.dumps(ast.literal_eval(raw_output)), cls=binarycDecoder) return json.loads(raw_output, cls=binarycDecoder) diff --git a/binarycpython/utils/functions.py b/binarycpython/utils/functions.py index 9aea0de8185f3d35fd461c8b50a176887aba50c4..ad6dce198733ae2b132d936fae6c2852241ce007 100644 --- a/binarycpython/utils/functions.py +++ b/binarycpython/utils/functions.py @@ -255,7 +255,7 @@ def get_moe_di_stefano_dataset(options, verbosity=0): else: # Read input data and Clean up the data if there are white spaces around the keys - with open(options["file"], "r") as data_filehandle: + with open(options["file"], "r",encoding='utf-8') as data_filehandle: datafile_data = data_filehandle.read() datafile_data = datafile_data.replace('" ', '"') datafile_data = datafile_data.replace(' "', '"') @@ -537,14 +537,14 @@ def create_hdf5(data_dir: str, name: str) -> None: [file for file in content_data_dir if file.endswith("_settings.json")][0], ) - with open(settings_file, "r") as settings_file: + with open(settings_file, "r",encoding='utf-8') as settings_file: settings_json = json.load(settings_file) # Create settings group settings_grp = hdf5_file.create_group("settings") # Write version_string to settings_group - settings_grp.create_dataset("used_settings", data=json.dumps(settings_json)) + settings_grp.create_dataset("used_settings", data=json.dumps(settings_json, ensure_ascii=False)) # Get data files data_files = [el for el in content_data_dir if el.endswith(".dat")] @@ -855,11 +855,49 @@ def parse_binary_c_version_info(version_info_string: str) -> dict: version_info_dict["dt_limits"] = dt_limits_dict if dt_limits_dict else None + ############################## + # Units + units = {el for el in cleaned if el.startswith("Unit ")} + cleaned -= units + units_dict={} + for el in units: + split_info = el.split("Unit ")[-1].split(",") + s = split_info[0].split(" is ") + + if len(s)==2: + long,short = [i.strip().strip("\"") for i in s] + elif len(s)==1: + long,short = None,s[0] + else: + print("Warning: Failed to split unit string {}".format(el)) + + to_cgs = (split_info[1].split())[3].strip().strip("\"") + code_units = split_info[2].split() + code_unit_type_num = int(code_units[3].strip().strip("\"")) + code_unit_type = code_units[4].strip().strip("\"") + code_unit_cgs_value = code_units[9].strip().strip("\"") + units_dict[long] = { + "long" : long, + "short" : short, + "to_cgs" : to_cgs, + "code_unit_type_num" : code_unit_type_num, + "code_unit_type" : code_unit_type, + "code_unit_cgs_value" : code_unit_cgs_value + } + + units = {el for el in cleaned if el.startswith("Units: ")} + cleaned -= units + for el in units: + el = el[7:] # removes "Units: " + units_dict["units list"] = el.strip('Units:') + + version_info_dict["units"] = units_dict + ########################## # Nucleosynthesis sources: # Split off nucsyn_sources = {el for el in cleaned if el.startswith("Nucleosynthesis")} - cleaned = cleaned - nucsyn_sources + cleaned -= nucsyn_sources # Fill dict nucsyn_sources_dict = {} @@ -914,7 +952,10 @@ def parse_binary_c_version_info(version_info_string: str) -> dict: split = el.split(" is ") key = split[0].strip() val = " is ".join(split[1:]).strip() - misc_dict[key] = val + if key in misc_dict: + misc_dict[key + ' (alt)'] = val + else: + misc_dict[key] = val misc_dict["uncaught"] = list(cleaned) @@ -1391,7 +1432,7 @@ def get_help_super(print_help: bool = False, fail_silently: bool = True) -> dict # check whether the descriptions of help_all and detailed help are the same if not fail_silently: if not parameter["description"] == detailed_help["description"]: - print(json.dumps(parameter, indent=4)) + print(json.dumps(parameter, indent=4, ensure_ascii=False)) ## put values into help all super dict # input type @@ -1409,7 +1450,7 @@ def get_help_super(print_help: bool = False, fail_silently: bool = True) -> dict section["parameters"][parameter_name] = parameter if print_help: - print(json.dumps(help_all_super_dict, indent=4)) + print(json.dumps(help_all_super_dict, indent=4, ensure_ascii=False)) return help_all_super_dict @@ -1462,7 +1503,7 @@ def write_binary_c_parameter_descriptions_to_rst_file(output_file: str) -> None: print("Filename doesn't end with .rst, please provide a proper filename") return None - with open(output_file, "w") as f: + with open(output_file, "w", encoding='utf-8') as f: print("Binary\\_c parameters", file=f) print("{}".format("=" * len("Binary\\_c parameters")), file=f) @@ -1521,7 +1562,7 @@ def load_logfile(logfile: str) -> None: """ - with open(logfile, "r") as file: + with open(logfile, "r", encoding='utf-8') as file: logfile_data = file.readlines() time_list = [] diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py index e91ad4cb5ba2087e209fe5a72a7f4573289946ae..5f5db48a79de60126acc5915239858f0c9003d5f 100644 --- a/binarycpython/utils/grid.py +++ b/binarycpython/utils/grid.py @@ -194,8 +194,7 @@ class Population: ), "w", ) as f: - f.write(json.dumps(self.grid_options["Moe2017_options"], indent=4)) - f.close() + json.dump(self.grid_options["Moe2017_options"], f, indent=4, ensure_ascii=False) # Argline dict self.argline_dict = {} @@ -465,6 +464,7 @@ class Population: format_statment.format(ID), ), "w", + encoding='utf-8' ) as f: f.write(string) f.close() @@ -529,7 +529,7 @@ class Population: for key, value in kwargs.items(): grid_variable[key] = value verbose_print( - "Updated grid variable: {}".format(json.dumps(grid_variable, indent=4)), + "Updated grid variable: {}".format(json.dumps(grid_variable, indent=4, ensure_ascii=False)), self.grid_options["verbosity"], 1, ) @@ -738,7 +738,7 @@ class Population: self.grid_options["_grid_variables"][grid_variable["name"]] = grid_variable verbose_print( - "Added grid variable: {}".format(json.dumps(grid_variable, indent=4)), + "Added grid variable: {}".format(json.dumps(grid_variable, indent=4, ensure_ascii=False)), self.grid_options["verbosity"], 2, ) @@ -850,13 +850,15 @@ class Population: return dt * ncpus def export_all_info( - self, - use_datadir: bool = True, - outfile: Union[str, None] = None, - include_population_settings: bool = True, - include_binary_c_defaults: bool = True, - include_binary_c_version_info: bool = True, - include_binary_c_help_all: bool = True, + self, + use_datadir: bool = True, + outfile: Union[str, None] = None, + include_population_settings: bool = True, + include_binary_c_defaults: bool = True, + include_binary_c_version_info: bool = True, + include_binary_c_help_all: bool = True, + ensure_ascii: str = False, + indent: int = 4 ) -> Union[str, None]: """ Function that exports the all_info to a JSON file @@ -884,6 +886,9 @@ class Population: <custom_options["base_filename"]>_settings.json. Otherwise a file called simulation_<date+time>_settings.json will be created outfile: if use_datadir is false, a custom filename will be used + ensure_ascii: the ensure_ascii flag passed to json.dump and/or json.dumps + (Default: False) + indent: indentation passed to json.dump and/or json.dumps (default 4) """ all_info = self.return_all_info( @@ -923,14 +928,15 @@ class Population: self.grid_options["verbosity"], 1, ) + # if not outfile.endswith('json'): with open(settings_fullname, "w") as file: - file.write( - json.dumps( - all_info_cleaned, - indent=4, - default=binaryc_json_serializer, - ) + json.dump( + all_info_cleaned, + file, + indent=indent, + default=binaryc_json_serializer, + ensure_ascii=ensure_ascii ) return settings_fullname else: @@ -952,10 +958,12 @@ class Population: raise ValueError with open(outfile, "w") as file: - file.write( - json.dumps( - all_info_cleaned, indent=4, default=binaryc_json_serializer - ) + json.dump( + all_info_cleaned, + file, + indent=indent, + default=binaryc_json_serializer, + ensure_ascii=ensure_ascii ) return outfile @@ -1228,7 +1236,7 @@ class Population: 'status', "{}.{}".format(oldjobid, self.grid_options['slurm_jobarrayindex'])) - status = open(file).read() + status = open(file,encoding='utf-8').read() if status == 'finished': self.exit(code=0) @@ -1284,11 +1292,9 @@ class Population: raise ValueError("Condor evolution not available at this moment") elif self.grid_options["slurm"] == 1: - # Slurm setup grid + # Slurm setup grid then exit self.slurm_grid() - # and then exit - print("Slurm jobs launched : exiting") self.exit(code=0) else: # Execute population evolution subroutines @@ -1757,7 +1763,7 @@ class Population: include_binary_c_help_all=True, ) self.grid_ensemble_results["metadata"]["settings"] = json.loads( - json.dumps(all_info, default=binaryc_json_serializer) + json.dumps(all_info, default=binaryc_json_serializer, ensure_ascii=False) ) ############################## @@ -2089,12 +2095,13 @@ class Population: # Each new system overrides the previous if self.grid_options["log_args"]: with open( - os.path.join( - self.grid_options["log_args_dir"], - "current_system", - "process_{}.txt".format(self.process_ID), - ), - "w", + os.path.join( + self.grid_options["log_args_dir"], + "current_system", + "process_{}.txt".format(self.process_ID), + ), + "w", + encoding='utf-8' ) as f: binary_cmdline_string = self._return_argline(full_system_dict) f.write(binary_cmdline_string) @@ -2130,12 +2137,13 @@ class Population: # Debug line: logging all the lines if self.grid_options["log_runtime_systems"] == 1: with open( - os.path.join( - self.grid_options["tmp_dir"], - "runtime_systems", - "process_{}.txt".format(self.process_ID), - ), - "a+", + os.path.join( + self.grid_options["tmp_dir"], + "runtime_systems", + "process_{}.txt".format(self.process_ID), + ), + "a+", + encoding='utf-8' ) as f: binary_cmdline_string = self._return_argline(full_system_dict) f.write( @@ -2337,15 +2345,18 @@ class Population: "zero_prob_stars_skipped": zero_prob_stars_skipped, } with open( - os.path.join( - self.grid_options["tmp_dir"], - "process_summary", - "process_{}.json".format(self.process_ID), - ), - "w", + os.path.join( + self.grid_options["tmp_dir"], + "process_summary", + "process_{}.json".format(self.process_ID), + ), + "w", + encoding='utf-8' ) as f: - f.write(json.dumps(summary_dict, indent=4)) - f.close() + json.dump(summary_dict, + f, + indent=4, + ensure_ascii=False) # Set status to finished if self.was_killed(): @@ -3221,7 +3232,7 @@ class Population: 1, ) - with open(gridcode_filename, "w") as file: + with open(gridcode_filename, "w",encoding='utf-8') as file: file.write(self.code_string) # perhaps create symlink @@ -3435,7 +3446,9 @@ class Population: # We can choose to perform a check on the source file, which checks if the lines start with 'binary_c' if check: source_file_check_filehandle = open( - self.grid_options["source_file_filename"], "r" + self.grid_options["source_file_filename"], + "r", + encoding='utf-8' ) for line in source_file_check_filehandle: if not line.startswith("binary_c"): @@ -3449,7 +3462,9 @@ class Population: ) raise ValueError - source_file_filehandle = open(self.grid_options["source_file_filename"], "r") + source_file_filehandle = open(self.grid_options["source_file_filename"], + "r", + encoding='utf-8') self.grid_options["_system_generator"] = source_file_filehandle @@ -3728,7 +3743,7 @@ class Population: # Functions that aren't ordered yet ################################################### - def write_ensemble(self, output_file, data=None, sort_keys=True, indent=4): + def write_ensemble(self, output_file, data=None, sort_keys=True, indent=4, encoding='utf-8', ensure_ascii=False): """ write_ensemble : Write ensemble results to a file. @@ -3752,6 +3767,11 @@ class Population: indent : number of space characters used in the JSON indent. (Default: 4, passed to json.dumps) + + encoding : file encoding method, usually defaults to 'utf-8' + + ensure_ascii : the ensure_ascii flag passed to json.dump and/or json.dumps + (Default: False) """ # get the file type @@ -3773,23 +3793,26 @@ class Population: # JSON output if compression == "gzip": # gzip - f = gzip.open(output_file, "wt") + f = gzip.open(output_file, "wt", encoding=encoding) elif compression == "bzip2": # bzip2 - f = bz2.open(output_file, "wt") + f = bz2.open(output_file, "wt", encoding=encoding) else: # raw output (not compressed) - f = open(output_file, "wt") - f.write(json.dumps(data, sort_keys=sort_keys, indent=indent)) + f = open(output_file, "wt", encoding=encoding) + f.write(json.dumps(data, + sort_keys=sort_keys, + indent=indent, + ensure_ascii=ensure_ascii)) elif file_type == "msgpack": # msgpack output if compression == "gzip": - f = gzip.open(output_file, "wb") + f = gzip.open(output_file, "wb", encoding=encoding) elif compression == "bzip2": - f = bz2.open(output_file, "wb") + f = bz2.open(output_file, "wb", encoding=encoding) else: - f = open(output_file, "wb") + f = open(output_file, "wb", encoding=encoding) msgpack.dump(data, f) f.close() @@ -3806,10 +3829,11 @@ class Population: ############################################################ def write_binary_c_calls_to_file( - self, - output_dir: Union[str, None] = None, - output_filename: Union[str, None] = None, - include_defaults: bool = False, + self, + output_dir: Union[str, None] = None, + output_filename: Union[str, None] = None, + include_defaults: bool = False, + encoding='utf-8' ) -> None: """ Function that loops over the grid code and writes the generated parameters to a file. @@ -3887,7 +3911,7 @@ class Population: print("Writing binary_c calls to {}".format(binary_c_calls_full_filename)) # Write to file - with open(binary_c_calls_full_filename, "w") as file: + with open(binary_c_calls_full_filename, "w", encoding=encoding) as file: # Get defaults and clean them, then overwrite them with the set values. if include_defaults: # TODO: make sure that the defaults here are cleaned up properly @@ -4092,12 +4116,13 @@ class Population: # Write arg lines to file argstring = self._return_argline(system_dict) with open( - os.path.join( - self.grid_options["tmp_dir"], - "failed_systems", - "process_{}.txt".format(self.process_ID), - ), - "a+", + os.path.join( + self.grid_options["tmp_dir"], + "failed_systems", + "process_{}.txt".format(self.process_ID), + ), + "a+", + encoding='utf-8' ) as f: f.write(argstring + "\n") f.close() @@ -4128,13 +4153,14 @@ class Population: exist_ok=True, ) with open( - os.path.join( + os.path.join( os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - "moeopts.dat", - ), - "w", + "moeopts.dat", + ), + "w", + encoding='utf-8' ) as f: - f.write(json.dumps(self.grid_options["Moe2017_options"], indent=4)) + f.write(json.dumps(self.grid_options["Moe2017_options"], indent=4, ensure_ascii=False)) f.close() def _load_moe_di_stefano_data(self): @@ -4186,6 +4212,7 @@ class Population: "moe.log", ), "w", + encoding='utf-8', ) as logfile: logfile.write("logâ‚â‚€Masses(M☉) {}\n".format(logmasses)) @@ -4238,6 +4265,7 @@ class Population: with open( os.path.join(self.grid_options["tmp_dir"], "moe_distefano", "moe.log"), "a", + encoding='utf-8' ) as logfile: logfile.write("logâ‚â‚€Periods(days) {}\n".format(logperiods)) @@ -4427,13 +4455,14 @@ class Population: exist_ok=True, ) with open( - os.path.join( - os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), - "moecache.json", - ), - "w", + os.path.join( + os.path.join(self.grid_options["tmp_dir"], "moe_distefano"), + "moecache.json", + ), + "w", + encoding='utf-8' ) as cache_filehandle: - cache_filehandle.write(json.dumps(Moecache, indent=4)) + cache_filehandle.write(json.dumps(Moecache, indent=4, ensure_ascii=False)) # Signal that the data has been loaded self.grid_options["_loaded_Moe2017_data"] = True @@ -5213,14 +5242,14 @@ eccentricity3=0 idfile = os.path.join(self.grid_options["slurm_dir"], "jobid") if not os.path.exists(idfile): - with open(idfile,"w") as fjobid: + with open(idfile,"w",encoding='utf-8') as fjobid: fjobid.write("{jobid}\n".format(jobid=self.grid_options['slurm_jobid'])) fjobid.close() # save slurm status file = self.slurm_status_file() if file: - with open(file,'w') as f: + with open(file,'w',encoding='utf-8') as f: f.write(string) f.close() return @@ -5334,8 +5363,12 @@ eccentricity3=0 self.grid_options['slurm_warn_max_memory'])) self.exit(code=1) - # set slurm_array - slurm_array = self.grid_options['slurm_array'] or "1-{njobs}\%{njobs}".format(njobs=self.grid_options['slurm_njobs']) + # set up slurm_array + if not self.grid_options['slurm_array_max_jobs']: + self.grid_options['slurm_array_max_jobs'] = self.grid_options['slurm_njobs'] + slurm_array = self.grid_options['slurm_array'] or "1-{njobs}\%{max_jobs}".format( + njobs=self.grid_options['slurm_njobs'], + max_jobs=self.grid_options['slurm_array_max_jobs']) # get job id (might be passed in) jobid = self.grid_options['slurm_jobid'] if self.grid_options['slurm_jobid'] != "" else '$SLURM_ARRAY_JOB_ID' @@ -5369,7 +5402,7 @@ eccentricity3=0 # make slurm script scriptpath = self.slurmpath('slurm_script') try: - script = open(scriptpath,'w') + script = open(scriptpath,'w',encoding='utf-8') except IOError: print("Could not open Slurm script at {path} for writing: please check you have set {slurm_dir} correctly (it is currently {slurm_dir} and can write to this directory.".format(path=scriptpath, slurm_dir = self.grid_options['slurm_dir'])) @@ -5476,7 +5509,14 @@ eccentricity3=0 # just say we would have (use this for testing) print("Slurm script is at {path} but has not been launched".format(path=scriptpath)) + + # some messages to the user, then return + if self.grid_options['slurm_postpone_sbatch'] == 1: + print("Slurm script written, but launching the jobs with sbatch was postponed.") + else: + print("Slurm jobs launched") print("All done in slurm_grid().") + return def save_population_object(self,object=None,filename=None,confirmation=True,compression='gzip'): """ @@ -5674,7 +5714,7 @@ eccentricity3=0 """ Function to load in the joinlist to an array and return it. """ - f = open(self.grid_options['joinlist'],'r') + f = open(self.grid_options['joinlist'],'r',encoding='utf-8') list = f.read().splitlines() f.close() return list @@ -5840,7 +5880,7 @@ eccentricity3=0 Return the Slurm jobid from a slurm directory, passed in """ file = os.path.join(dir,'jobid') - f = open(file,"r") + f = open(file,"r",encoding='utf-8') if not f: print("Error: could not open {} to read the Slurm jobid of the directory {}".format(file,dir)) sys.exit(code=1) diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py index 9d28ad9c0cabe01ff32cb4cdd018d0c0fb02373c..00ad9a3768147370e6008eddd485540a3fde0228 100644 --- a/binarycpython/utils/grid_options_defaults.py +++ b/binarycpython/utils/grid_options_defaults.py @@ -198,6 +198,7 @@ grid_options_defaults_dict = { "slurm_time": 0, # total time. 0 = infinite time "slurm_postpone_sbatch": 0, # if 1: don't submit, just make the script "slurm_array": None, # override for --array, useful for rerunning jobs + "slurm_array_max_jobs" : None, # override for the max number of concurrent array jobs "slurm_extra_settings": {}, # Dictionary of extra settings for Slurm to put in its launch script. "slurm_sbatch": "sbatch", # sbatch command "slurm_restart_dir" : None, # restart Slurm jobs from this directory diff --git a/binarycpython/utils/spacing_functions.py b/binarycpython/utils/spacing_functions.py index daec4c41fdcf43c4a70110887dd3c87de692c626..ef2fa06a10dc3dfc481183208a5c44f44cbf50bd 100644 --- a/binarycpython/utils/spacing_functions.py +++ b/binarycpython/utils/spacing_functions.py @@ -458,7 +458,7 @@ def const_dt(self,cachedir=None,usecache=True,**kwargs): # bse_options dict to make sure the order is preserved mass_list = _const_dt(cachedir, self.grid_options['num_cores'], - json.dumps(self.bse_options,sort_keys=True), + json.dumps(self.bse_options,sort_keys=True,ensure_ascii=False), **kwargs ) cache.close()