From c2543c67ef883a36f0b35bb13df431c0b93002b9 Mon Sep 17 00:00:00 2001
From: Robert Izzard <r.izzard@surrey.ac.uk>
Date: Tue, 12 Oct 2021 10:45:56 +0100
Subject: [PATCH] time duration formatting has been improved (using the
 humanize module)

I've fixed the _boxed formatting to strip ANSI codes when calculating string lengths (so you can use ANSI colours properly)

changed the colours for the "failed systems" warning so it's absolutely obvious

wrapped the JSON ensemble output in a write_ensemble() function which automatically gzips or bzips depending on the filename extension (or this can be explictly set in grid_options['compress_ensemble'] ).
---
 binarycpython/utils/functions.py             |  15 ++
 binarycpython/utils/grid.py                  | 187 ++++++++++++++-----
 binarycpython/utils/grid_options_defaults.py |   1 +
 requirements.txt                             |   2 +
 4 files changed, 162 insertions(+), 43 deletions(-)

diff --git a/binarycpython/utils/functions.py b/binarycpython/utils/functions.py
index 9ae916850..e03c46e93 100644
--- a/binarycpython/utils/functions.py
+++ b/binarycpython/utils/functions.py
@@ -8,12 +8,15 @@ Tasks:
     - TODO: change all prints to verbose_prints
 """
 
+import datetime as dt
 import json
 import os
 import gc
 import tempfile
 import copy
+import humanize
 import inspect
+import re
 import sys
 import subprocess
 import time
@@ -66,6 +69,18 @@ def check_if_in_shell():
 
     return in_shell
 
+def timedelta(delta):
+    """
+    Function to convert a length of time (float, seconds) to a string for
+    human-readable output.
+    """
+    # currently use the humanize module to do this
+    t = humanize.time.precisedelta(dt.timedelta(seconds=delta),format='%0.2f',minimum_unit='milliseconds',suppress=['milliseconds'])
+    # and use more compact units
+    t = t.replace(" hours","h")
+    t = t.replace(" seconds","s")
+    return t
+
 
 def ANSI_colours():
     # ANSI colours dictionary
diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py
index 49324572c..8b10d6c9f 100644
--- a/binarycpython/utils/grid.py
+++ b/binarycpython/utils/grid.py
@@ -20,26 +20,30 @@ Tasks:
     - TODO: think of a nice way to remove the loaded grid_code/ generator from memory.
 """
 
-import os
-import gc
-import sys
-
+import argparse
+import bz2
 import copy
-import json
-import time
-import uuid
-import logging
 import datetime
-import argparse
+import json
+import gc
+import gzip
 import importlib.util
+import logging
 import multiprocessing
+import os
+import py_rinterpolate
+import re
 import resource
+import setproctitle
+import strip_ansi
+import sys
+import time
+import uuid
+
 from typing import Union, Any
 from collections import (
     OrderedDict,
 )
-import setproctitle
-import py_rinterpolate
 
 from colorama import init as colorama_init
 
@@ -83,6 +87,7 @@ from binarycpython.utils.functions import (
     ANSI_colours,
     check_if_in_shell,
     format_number,
+    timedelta
 )
 
 # from binarycpython.utils.hpc_functions import (
@@ -161,6 +166,7 @@ class Population:
             "w",
         ) as f:
             f.write(json.dumps(self.grid_options["m&s_options"], indent=4))
+            f.close()
 
         # Argline dict
         self.argline_dict = {}
@@ -799,21 +805,34 @@ class Population:
         """
         strlen = 0
         strings = []
+        lengths = []
+
+        # make a list of strings
         if separator:
             for l in list:
                 strings += l.split(sep=separator)
         else:
             strings = list
+
+        # get lengths without ANSI codes
         for string in strings:
-            strlen = max(len(string),strlen)
+            lengths.append(len(strip_ansi.strip_ansi(string)))
+
+        # hence the max length
+        strlen = max(lengths)
         strlen += strlen % 2
         header = boxchar * (4 + strlen)
+
+        # start output
         out = self.ANSI_colours[colour] + header + "\n"
-        for string in strings:
-            if len(string)%2 == 1:
+
+        # loop over strings to output, padding as required
+        for n,string in enumerate(strings):
+            if lengths[n]%2 == 1:
                 string = ' ' + string
-            pad = ' ' * int((strlen - len(string))/2)
+            pad = ' ' * int((strlen - lengths[n])/2)
             out = out + boxchar + ' ' + pad + string + pad + ' ' + boxchar +"\n"
+        # close output and return
         out = out + header + "\n" + self.ANSI_colours["reset"]
         return out
 
@@ -1060,9 +1079,12 @@ class Population:
         dtsecs = self.grid_options["_end_time_evolution"] - self.grid_options["_start_time_evolution"]
         string1 = "Population-{} finished!\nThe total probability is {:g}.".format(self.grid_options["_population_id"],
                                                                                   self.grid_options["_probtot"],)
-        string2 = "It took a total of {}s to run {} systems on {} cores".format(dtsecs,
-                                                                                self.grid_options["_total_starcount"],
-                                                                                self.grid_options["num_cores"])
+        string2 = "It took a total of {} to run {} systems on {} cores\n = {} of CPU time".format(
+            timedelta(dtsecs),
+            self.grid_options["_total_starcount"],
+            self.grid_options["num_cores"],
+            timedelta(dtsecs * self.grid_options["num_cores"])
+        )
         verbose_print(self._boxed(string1,string2),
                       self.grid_options["verbosity"],
                       0)
@@ -1382,6 +1404,7 @@ class Population:
             "w",
         ) as f:
             f.write("STARTING")
+            f.close()
 
         # lets try out making stores for all the grids:
         self.grid_options["_store_memaddr"] = _binary_c_bindings.return_store_memaddr()
@@ -1453,6 +1476,7 @@ class Population:
                     "w",
                 ) as f:
                     f.write("RUNNING")
+                    f.close()
 
             # Combine that with the other settings
             full_system_dict = self.bse_options.copy()
@@ -1566,6 +1590,7 @@ class Population:
                 ) as f:
                     binary_cmdline_string = self._return_argline(full_system_dict)
                     f.write(binary_cmdline_string)
+                    f.close()
 
             ##############
             # Running the system
@@ -1612,6 +1637,7 @@ class Population:
                             binary_cmdline_string,
                         )
                     )
+                    f.close()
 
             ####################
             # Tallying system information
@@ -1643,6 +1669,7 @@ class Population:
             "w",
         ) as f:
             f.write("FINISHING")
+            f.close()
 
         if self.grid_options["verbosity"] >= _LOGGER_VERBOSITY_LEVEL:
             stream_logger.debug(f"Process-{self.process_ID} is finishing.")
@@ -1693,18 +1720,8 @@ class Population:
                         self.grid_options["_population_id"], self.process_ID
                     ),
                 )
-
-                # TODO: consider writing this in a formatted structure
-                # Write to file
-                with open(output_file, "w") as f:
-                    f.write(ensemble_raw_output)
-                    # f.write(json.dumps(self.format_ensemble_results(ensemble_output)))
-
-                print(
-                    "Thread {}: Wrote ensemble results directly to file: {}".format(
-                        self.process_ID, output_file
-                    )
-                )
+                self.write_ensemble(output_file,
+                                    ensemble_raw_output)
 
         ########
         # Clean up and return
@@ -1733,20 +1750,26 @@ class Population:
         end_process_time = datetime.datetime.now()
 
         # thread end message
+        colour = 'cyan on black'
         verbose_print(
             self._boxed(
-            "Process {} finished:\ngenerator started at {}\ngenerator finished at {}\ntotal: {}s\nof which {}s with binary_c\nRan {} systems\nwith a total probability of {:g}\nThis thread had {} failing systems\nwith a total failed probability of {}\nSkipped a total of {} zero-probability systems\n".format(
-                ID,
-                start_process_time.isoformat(),
-                end_process_time.isoformat(),
-                (end_process_time - start_process_time).total_seconds(),
-                total_time_calling_binary_c,
-                number_of_systems_run,
-                probability_of_systems_run,
-                self.grid_options["_failed_count"],
-                self.grid_options["_failed_prob"],
-                zero_prob_stars_skipped,
-        ),colour="cyan on black"),
+            "{colour}Process {ID} finished:\ngenerator started at {start}\ngenerator finished at {end}\ntotal: {timesecs}\nof which {binary_c_secs} with binary_c\nRan {nsystems} systems\nwith a total probability of {psystems:g}\n{failcolour}This thread had {nfail} failing systems{colour}\n{failcolour}with a total failed probability of {pfail}{colour}\n{zerocolour}Skipped a total of {nzero} zero-probability systems{zeroreset}\n".format(
+                colour=self.ANSI_colours[colour],
+                ID=ID,
+                start=start_process_time.isoformat(),
+                end=end_process_time.isoformat(),
+                timesecs=timedelta((end_process_time - start_process_time).total_seconds()),
+                binary_c_secs=timedelta(total_time_calling_binary_c),
+                nsystems=number_of_systems_run,
+                psystems=probability_of_systems_run,
+                failcolour=self.ANSI_colours['red'] if self.grid_options["_failed_count"]>0 else '',
+                failreset=self.ANSI_colours[colour] if self.grid_options["_failed_count"]>0 else '',
+                nfail=self.grid_options["_failed_count"],
+                pfail=self.grid_options["_failed_prob"],
+                nzero=zero_prob_stars_skipped,
+                zerocolour=self.ANSI_colours["yellow"] if zero_prob_stars_skipped>0 else '',
+                zeroreset=self.ANSI_colours[colour] if zero_prob_stars_skipped>0 else ''
+            ),colour=colour),
             self.grid_options["verbosity"],
             1,
         )
@@ -1776,6 +1799,7 @@ class Population:
             "w",
         ) as f:
             f.write(json.dumps(summary_dict, indent=4))
+            f.close()
 
         # Set status to running
         with open(
@@ -1787,6 +1811,7 @@ class Population:
             "w",
         ) as f:
             f.write("FINISHED")
+            f.close()
 
         result_queue.put(output_dict)
 
@@ -1852,6 +1877,8 @@ class Population:
             msg = "No actual evolution options passed to the evolve call. Aborting"
             raise ValueError(msg)
 
+
+    ############################################################
     def _setup(self):
         """
         Function to set up the necessary stuff for the population evolution.
@@ -3339,9 +3366,81 @@ class Population:
     ###################################################
     # Unordered functions
     #
-    # Functions that arent ordered yet
+    # Functions that aren't ordered yet
     ###################################################
 
+    def write_ensemble(self,output_file,json_data):
+        """
+    write_ensemble : Write ensemble results to a file.
+
+    Args:
+        output_file : the output filename.
+
+                      If the filename has an extension that we recognise,
+                      e.g. .gz or .bz2, we compress the output appropriately.
+
+                      Note that if grid_options['compress_ensemble'] is set, the
+                      appropriate file extension is added if required and compression
+                      is performed.
+
+        json_data :   the JSON data to be written out. This can be raw
+                      unformatted JSON, or the output of JSON_dumps().
+        """
+        # TODO: consider writing this in a formatted structure
+
+        # default to the compression algorithm specified
+        compression = self.grid_options['compress_ensemble']
+
+        # choose algorithm based on file extension if we're not given
+        # an algorithm
+        if not compression:
+            if output_file.endswith('.gz'):
+                compression = 'gzip'
+            elif output_file.endswith('.bz2'):
+                compression = 'bzip2'
+
+        # Write JSON ensemble data to file
+        if compression:
+            # write to a compressed file, adding the appropriate extension if
+            # required
+            if compression == 'gzip':
+                if output_file.endswith('.gz'):
+                    zipfile = output_file
+                else:
+                    zipfile = output_file + '.gz'
+                with gzip.open(zipfile, "wt") as f:
+                    f.write(json_data)
+                    f.close()
+            elif compression == 'bzip2':
+                if output_file.endswith('.bz2'):
+                    zipfile = output_file
+                else:
+                    zipfile = output_file + '.bz2'
+                with bz2.open(zipfile, "wt") as f:
+                    f.write(json_data)
+                    f.close()
+            else:
+                print("You have asked me to compress the ensemble output using algorithm {algorithm}, but not given me a valid compression algorithm (use gzip or bzip2 in grid_options['compress_ensemble']) : I will write the data uncompressed.").format(algorithm=compression)
+                # raw output (not compressed)
+                with open(output_file, "wt") as f:
+                    f.write(json_data)
+                    f.close()
+        else:
+            # raw output (not compressed)
+            with open(output_file, "wt") as f:
+                f.write(json_data)
+                f.close()
+
+        print(
+            "Thread {thread}: Wrote ensemble results to file: {colour}{file}{reset}".format(
+                thread=self.process_ID,
+                file=output_file,
+                colour=self.ANSI_colours['green'],
+                reset=self.ANSI_colours['reset']
+            )
+        )
+
+    ############################################################
     def write_binary_c_calls_to_file(
         self,
         output_dir: Union[str, None] = None,
@@ -3636,6 +3735,7 @@ class Population:
                         "a+",
                     ) as f:
                         f.write(argstring + "\n")
+                        f.close()
         else:
             verbose_print(
                 "binary_c output nothing - this is strange. If there is ensemble output being generated then this is fine.",
@@ -3668,6 +3768,7 @@ class Population:
                 "w",
             ) as f:
                 f.write(json.dumps(self.grid_options["m&s_options"], indent=4))
+                f.close()
 
     def _load_moe_di_stefano_data(self):
         """
diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py
index 8e4435201..3a1ff066d 100644
--- a/binarycpython/utils/grid_options_defaults.py
+++ b/binarycpython/utils/grid_options_defaults.py
@@ -35,6 +35,7 @@ grid_options_defaults_dict = {
     "tmp_dir": temp_dir(),  # Setting the temp dir of the program
     "_main_pid": -1,  # Placeholder for the main process id of the run.
     "combine_ensemble_with_thread_joining": True,  # Flag on whether to combine everything and return it to the user or if false: write it to data_dir/ensemble_output_{population_id}_{thread_id}.json
+    "compress_ensemble":False, # compress the ensemble output?
     "_commandline_input": "",
     "log_runtime_systems": 0,  # whether to log the runtime of the systems (1 file per thread. stored in the tmp_dir)
     "_actually_evolve_system": True,  # Whether to actually evolve the systems of just act as if. for testing. used in _process_run_population_grid
diff --git a/requirements.txt b/requirements.txt
index 849cea167..d2cfebe3b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,6 +12,7 @@ dill==0.3.1.1
 docutils==0.15.2
 h5py==2.10.0
 hawkmoth==0.4
+humanize==3.12.0
 idna==2.8
 imagesize==1.1.0
 ipython==7.9.0
@@ -57,6 +58,7 @@ sphinxcontrib-htmlhelp==1.0.2
 sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==1.0.2
 sphinxcontrib-serializinghtml==1.1.3
+strip-ansi==0.1.1
 traitlets==4.3.3
 typed-ast==1.4.1
 typing-extensions==3.7.4.1
-- 
GitLab