From 6c2e18a3bccde5241e0ad7e3252a9253aac01672 Mon Sep 17 00:00:00 2001
From: Robert Izzard <r.izzard@surrey.ac.uk>
Date: Sat, 6 Nov 2021 23:18:15 +0000
Subject: [PATCH] still buggy - this is just to back up to the server

---
 binarycpython/utils/grid.py                  | 78 ++++++++------------
 binarycpython/utils/grid_options_defaults.py |  2 +-
 2 files changed, 33 insertions(+), 47 deletions(-)

diff --git a/binarycpython/utils/grid.py b/binarycpython/utils/grid.py
index 213a89aa7..3692971e4 100644
--- a/binarycpython/utils/grid.py
+++ b/binarycpython/utils/grid.py
@@ -36,6 +36,7 @@ import msgpack
 import multiprocessing
 import os
 import pathlib
+import pprint # for debugging only
 import psutil
 import py_rinterpolate
 import re
@@ -1069,21 +1070,24 @@ class Population:
         self._pre_run_cleanup()
 
         if self.grid_options["slurm"]>=1:
-            self.grid_options["symlink latest gridcode"] = False
+            self.grid_options["symlink_latest_gridcode"] = False
 
         if self.grid_options["condor"] >= 1:
             # Execute condor subroutines
             # self._condor_grid()
             raise ValueError("Condor evolution not available at this moment")
+
         elif self.grid_options["slurm"] == 1:
             # Slurm setup grid
             self.slurm_grid()
             # and then exit
+            print("Slurm jobs launched : exiting")
             sys.exit()
         else:
             # Execute population evolution subroutines
             self._evolve_population()
 
+        print("do analytics")
         # Put all interesting stuff in a variable and output that afterwards, as analytics of the run.
         analytics_dict = {
             "population_name": self.grid_options["_population_id"],
@@ -1115,7 +1119,8 @@ class Population:
             self.save_population_object()
 
         # if we're running a slurm grid, exit here
-        if self.grid_options["slurm"] >= 1:
+        # unless we're joining
+        if self.grid_options["slurm"] >= 1 and self.grid_options['evolution_type'] != 'join':
             sys.exit()
 
         ##
@@ -1123,7 +1128,6 @@ class Population:
         # because that makes for easier control
         self._cleanup()
 
-
         return analytics_dict
 
     def _evolve_population(self):
@@ -1147,14 +1151,19 @@ class Population:
         # special cases
         if self.grid_options['evolution_type'] == 'join':
             joinfiles = self.joinfiles()
-
-            if self.can_join(joinfiles):
+            joiningfile = self.slurmpath('joining')
+            if self.can_join(joinfiles,joiningfile):
                 # join object files
-                print("can join : all tasks are finished")
-                self.join_from_files(joinfiles)
+                try:
+                    pathlib.Path(joiningfile).touch(exist_ok=False)
+                    print("can join : all tasks are finished")
+                    self.join_from_files(joinfiles)
+                except:
+                    pass
             else:
                 print("cannot join : other tasks are not yet finished")
-                return
+                print("Finished this job : exiting")
+                sys.exit()
 
         ############################################################
         # Evolve systems
@@ -2820,7 +2829,7 @@ class Population:
             file.write(self.code_string)
 
         # perhaps create symlink
-        if self.grid_options["symlink latest gridcode"]:
+        if self.grid_options["symlink_latest_gridcode"] and self.grid_options["slurm"] == 0:
             global _count
             symlink = os.path.join(
                 self.grid_options["tmp_dir"], "binary_c_grid-latest" + str(_count)
@@ -5079,7 +5088,7 @@ eccentricity3=0
         # make a list of directories, these contain the various slurm
         # output, status files, etc.
         dirs = []
-        for dir in ['scripts','stdout','stderr','results','logs','status','joining']:
+        for dir in ['stdout','stderr','results','status']:
             dirs.append(self.slurmpath(dir))
 
         # make the directories: we do not allow these to already exist
@@ -5124,8 +5133,8 @@ eccentricity3=0
         if self.grid_options['evolution_type'] == 'grid':
             # run a grid of stars only, leaving the results
             # in a file
-
             print("Run grid")
+
             # get number of cpu cores available to us
             ncpus = max(1,psutil.cpu_count(logical=True))
 
@@ -5138,39 +5147,7 @@ eccentricity3=0
             return self.evolve()
 
         elif self.grid_options['evolution_type'] == 'join':
-            # join the data from multiple grid runs
-
-            # TODO : fix this
-            joinfile = os.path.join(self.slurmpath(),'joining', self.grid_options['slurm_jobid'])
-
-            if os.path.exists(joinfile):
-                print("Another process is already joining")
-                return
-
-            for n in range(1,self.grid_options['slurm_njobs']+1):
-                results_dumpfile = os.path.join(slurmpath('status'),self.grid_options['slurm_jobid'] + '.' + n)
-                print("Check file {file} (jobid {jobid}, n {n}\n".format(
-                    file=results_dumpfle,
-                    jobid=self.grid_options['slurm_jobid'],
-                    n=n))
-                status = self.get_slurm_status(jobid=self.grid_options['slurm_jobid'],
-                                               jobarrayindex=n)
-                if status != "finished":
-                    print("... is not finished")
-                    return
-                else:
-                    print("... is finished")
-
-            # attempt to ~atomically create the joinfile
-            # https://stackoverflow.com/questions/33223564/atomically-creating-a-file-if-it-doesnt-exist-in-python
-            try:
-                pathlib.Path(joinfile).touch(exist_ok=False)
-            except:
-                # already joining
-                return
-
-            self.grid_options['rungrid'] = 0
-
+            # should not happen!
             return
         else:
             # setup and launch slurm jobs
@@ -5322,6 +5299,8 @@ eccentricity3=0
             print("Save pickle to ",filename)
             print("pop is ",self.grid_options["_population_id"])
             print("probtot ",object.grid_options['_probtot'])
+            #print("grid_ensemble_results",pprint.pprint(object.grid_ensemble_results, sort_dicts=False))
+
             # remove shared memory
             shared_memory = object.shared_memory
             object.shared_memory = None
@@ -5350,7 +5329,6 @@ eccentricity3=0
             return None
         else:
             obj = compress_pickle.load(filename)
-            print("loaded obj",obj)
             return obj
 
     def merge_grid_object_results(self,refpop,newpop):
@@ -5370,8 +5348,13 @@ eccentricity3=0
         print("merge dicts")
         print("left: ",refpop.grid_results)
         print("right:",newpop.grid_results)
+
+        # combine data
         refpop.grid_results = merge_dicts(refpop.grid_results,
                                           newpop.grid_results)
+        refpop.grid_ensemble_results = merge_dicts(refpop.grid_ensemble_results,
+                                                   newpop.grid_ensemble_results)
+
         print("probs left ",refpop.grid_options["_probtot"],"right",newpop.grid_options["_probtot"])
         for key in ["_probtot"]:
             refpop.grid_options[key] += newpop.grid_options[key]
@@ -5409,9 +5392,12 @@ eccentricity3=0
                                                      file)
         print("done join from files")
 
-    def can_join(self,joinfiles):
+    def can_join(self,joinfiles,joiningfile):
         # check the joinfiles to make sure they all exist
         # and their .saved equivalents also exist
+
+        if os.path.exists(joiningfile):
+            return False
         for file in joinfiles:
             print("check for ",file)
             if os.path.exists(file) == False:
diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py
index ddc07e224..ed37dc8df 100644
--- a/binarycpython/utils/grid_options_defaults.py
+++ b/binarycpython/utils/grid_options_defaults.py
@@ -128,7 +128,7 @@ grid_options_defaults_dict = {
     ## Grid type evolution
     "_grid_variables": {},  # grid variables
     "gridcode_filename": None,  # filename of gridcode
-    "symlink latest gridcode": True,  # symlink to latest gridcode
+    "symlink_latest_gridcode": True,  # symlink to latest gridcode
     "save_population_object" : None, # filename to which we should save a pickled grid object as the final thing we do
     'joinlist' : None,
     ## Monte carlo type evolution
-- 
GitLab