From dbf77cdf0ac9be48cb1348c8e4beedfc7e250e87 Mon Sep 17 00:00:00 2001 From: David Hendriks <davidhendriks93@gmail.com> Date: Wed, 16 Sep 2020 16:25:14 +0100 Subject: [PATCH] busy with hpc slurm and condor stuff --- binarycpython/utils/grid_options_defaults.py | 22 ++++++--- binarycpython/utils/hpc.py | 52 ++++++++++++++++++++ 2 files changed, 68 insertions(+), 6 deletions(-) create mode 100644 binarycpython/utils/hpc.py diff --git a/binarycpython/utils/grid_options_defaults.py b/binarycpython/utils/grid_options_defaults.py index bbcda861e..19ecc0220 100644 --- a/binarycpython/utils/grid_options_defaults.py +++ b/binarycpython/utils/grid_options_defaults.py @@ -118,27 +118,37 @@ grid_options_defaults_dict = { ######################################## # Slurm stuff ######################################## - "slurm": 0, # dont use the slurm by default + slurm_ntasks=>1, # 1 CPU required per job + slurm_partition=>'all', # MUST be defined + slurm_jobname=>'binary_grid', # not required but useful + slurm_use_all_node_CPUs=>0, # if given nodes, set to 1 + # if given CPUs, set to 0 + + "slurm": 0, # dont use the slurm by default. 1 = use slurm "slurm_command": "", # Command that slurm runs (e.g. run_flexigrid or join_datafiles) "slurm_dir": "", # working directory containin scripts output logs etc. - # slurm_njobs=>'', # number of scripts - # slurm_jobid=>'', # slurm job id (%A) + "slurm_njobs": 0, # number of scripts; set to 0 as default + "slurm_jobid": '', # slurm job id (%A) + "slurm_memory": 512, # in MB, the memory use of the job + "slurm_warn_max_memory": 1024, # in MB : warn if mem req. > this + "slurm_use_all_node_CPUs": 0, # 1 = use all of a node's CPUs. 0 = use a given amount of CPUs + "slurm_postpone_join": 0, # if 1 do not join on slurm, join elsewhere + # slurm_jobarrayindex=>'', # slurm job array index (%a) # slurm_jobname=>'binary_grid', # set to binary_grid # slurm_postpone_join=>0, # if 1, data is not joined, e.g. if you # # want to do it off the slurm grid (e.g. with more RAM) # slurm_postpone_sbatch=>0, # if 1, don't submit, just make the script # # (defaults to $ENV{PWD} if undef) - # slurm_memory=>512, # in MB, the memory use of the job - # slurm_warn_max_memory=>1024, # in MB : warn if mem req. > this # slurm_partition=>undef, # slurm_ntasks=>1, # 1 CPU required per array job: usually only need this # slurm_time=>0, # 0 = infinite time - # slurm_use_all_node_CPUs=>0, # 1 = use all of a node's CPUs (0) # # you will want to use this if your Slurm SelectType is e.g. linear # # which means it allocates all the CPUs in a node to the job # slurm_control_CPUs=>0, # if so, leave this many for Perl control (0) # slurm_array=>undef,# override for --array, useful for rerunning jobs + + ######################################## # Condor stuff ######################################## diff --git a/binarycpython/utils/hpc.py b/binarycpython/utils/hpc.py new file mode 100644 index 000000000..e33bf362c --- /dev/null +++ b/binarycpython/utils/hpc.py @@ -0,0 +1,52 @@ +""" +File containing functions for HPC computing, distributed tasks on clusters etc. + +Mainly divided in 2 sections: Slurm and Condor +""" + +import os +import time + +class slurm_grid(): + + + + +def create_directories_hpc(working_dir): + """ + Function to create a set of directories, given a root directory + + These directories will contain stuff for the HPC runs + """ + + # + if not os.path.exist(working_dir): + print("Error. Working directory {} does not exist! Aborting") + raise ValueError + + directories_list = ['scripts','stdout','stderr','results','logs','status','joining'] + + # + for subdir in directories_list: + full_path = os.path.join(working_dir, subdir) + os.makedirs(full_path, exist_ok=False) + + # Since the directories are probably made on some mount which has to go over NFS + # we should explicitly check if they are created + + print("Waiting for directories") + directories_exist = False + while directories_exist: + directories_exist = True + + for subdir in directories_list: + full_path = os.path.join(working_dir, subdir) + + if not os.path.exist(full_path): + time.sleep(1) + directories_exist = False + + print("Directories exist") + + + -- GitLab