Example Scheduler Submission Scripts
Below are example submission scripts used to configure and launch libEnsemble on a variety of high-powered systems. See Running on HPC Systems for more information about the respective systems and configuration.
Note
It is highly recommended that the directive lines (e.g., #SBATCH) in batch submission scripts do NOT specify processor, task, or GPU configuration information—these lines should only specify the number of nodes required.
For example, do not specify #SBATCH --gpus-per-node=4
in order to use four
GPUs on the node, when each worker may use less than this, as this may assign
all of the GPUs to a single MPI invocation. Instead, the configuration should
be supplied either
in the simulation function
or, if using dynamic resources,
in the generator.
General examples
Slurm - Basic
#!/bin/bash
#SBATCH -J libE_simple
#SBATCH -A <myproject>
#SBATCH -p <partition_name>
#SBATCH -C <constraint_name>
#SBATCH --time 10
#SBATCH --nodes 2
# Usually either -p or -C above is used.
# On some SLURM configurations, these ensure runs can share nodes
export SLURM_EXACT=1
export SLURM_MEM_PER_NODE=0
python libe_calling_script.py -n 8
PBS - Basic
#!/bin/bash -l
#PBS -l select=2
#PBS -l walltime=00:15:00
#PBS -q <queue_name>
#PBS -A <myproject>
# We selected 2 nodes - now running with 8 workers.
export MPICH_GPU_SUPPORT_ENABLED=1
cd $PBS_O_WORKDIR
python libE_calling_script.py -n 8
LSF - Basic
#!/bin/bash -l
#BSUB -P <project code>
#BSUB -J libe_mproc
#BSUB -W 15
#BSUB -nnodes 2
python run_libe_forces.py -n 8
System Examples
Aurora
#!/bin/bash -l
#PBS -l select=2
#PBS -l walltime=00:30:00
#PBS -q <myqueue>
#PBS -A <myproject>
module load frameworks
export MPICH_GPU_SUPPORT_ENABLED=1
cd $PBS_O_WORKDIR
# 2 nodes - 12 sim workers (6 GPUs per node)
python libE_calling_script.py -n 13
# if using libE_specs["use_tiles_as_gpus"] = True
# 2 nodes 24 sim workers (12 GPU tiles per node) libE_specs["use_tiles_as_gpus"] = True
# python libE_calling_script.py -n 25
Frontier (Large WarpX Ensemble)
#!/bin/bash
#SBATCH -J libE_warpX_full_sim_32x40
#SBATCH -A <myproject>
#SBATCH -p batch
#SBATCH --time 6:00:00
#SBATCH --nodes 240
module load cray-python
# Run one gen and 40 sim workers (6 nodes = 48 GPUs each)
python run_gpcam_warpx.py -n 41
Perlmutter
#!/bin/bash
#SBATCH -J libE_small_test
#SBATCH -A <myproject>
#SBATCH -C gpu
#SBATCH --time 10
#SBATCH --nodes 1
# This script is using GPU partition
export MPICH_GPU_SUPPORT_ENABLED=1
# One worker for generator and 4 for sims (one GPU each)
python libe_calling_script.py -n 5
# Or if libE_specs option gen_on_manager=True
# python libe_calling_script.py -n 4
Polaris
#!/bin/bash -l
#PBS -l select=1:system=polaris
#PBS -l walltime=00:15:00
#PBS -l filesystems=home:grand
#PBS -q debug
#PBS -A <myproject>
export MPICH_GPU_SUPPORT_ENABLED=1
cd $PBS_O_WORKDIR
python libE_calling_script.py -n 4
Bebop
#!/bin/bash -l
#PBS -l select=4
#PBS -l walltime=00:15:00
#PBS -q bdwall
#PBS -A [project]
#PBS -N libE_example
cd $PBS_O_WORKDIR
# Choose MPI backend. Note that the built mpi4py in your environment should match.
module load oneapi/mpi
# module load openmpi
python run_libe_example.py -n 16
Bridges - MPI / Central Mode
#!/bin/bash
#SBATCH -J libE_test_central
#SBATCH -N 5
#SBATCH -p RM
#SBATCH -A <my_project>
#SBATCH -o tlib.%j.%N.out
#SBATCH -e tlib.%j.%N.error
#SBATCH -t 00:30:00
# Launch script for running in central mode with mpi4py.
# libEnsemble will run on a dedicated node (or nodes).
# The remaining nodes in the allocation will be dedicated to worker launched apps.
# Initialize Executor with auto-resources=True and central_mode=True.
# User to edit these variables
export EXE=libE_calling_script.py
export NUM_WORKERS=4
mpirun -np $(($NUM_WORKERS+1)) -ppn $(($NUM_WORKERS+1)) python $EXE
# To use local mode instead of mpi4py (with parse_args())
# python $EXE -n $NUM_WORKERS
SLURM - MPI / Distributed Mode (co-locate workers & MPI applications)
#!/bin/bash
#SBATCH -J libE_test
#SBATCH -N 4
#SBATCH -p [queue]
#SBATCH -A <my_project>
#SBATCH -o tlib.%j.%N.out
#SBATCH -e tlib.%j.%N.error
#SBATCH -t 01:00:00
# Launch script that runs in distributed mode with mpi4py.
# Workers are evenly spread over nodes and manager added to the first node.
# Requires even distribution - either multiple workers per node or nodes per worker
# Option for manager to have a dedicated node.
# Use of MPI Executor will ensure workers co-locate tasks with workers
# If node_list file is kept, this informs libe of resources. Else, libe auto-detects.
# User to edit these variables
export EXE=libE_calling_script.py
export NUM_WORKERS=4
export MANAGER_NODE=false # true = Manager has a dedicated node (assign one extra)
export USE_NODE_LIST=true # If false, allow libE to determine node_list from environment.
# Sometimes may be necessary
# As libE shares nodes with user applications allow fallback if contexts overrun.
# unset I_MPI_FABRICS
# export I_MPI_FABRICS_LIST=tmi,tcp
# export I_MPI_FALLBACK=1
# If using in calling script (After N mins manager kills workers and exits cleanly)
export LIBE_WALLCLOCK=55
#-----------------------------------------------------------------------------
# Work out distribution
if [[ $MANAGER_NODE = "true" ]]; then
WORKER_NODES=$(($SLURM_NNODES-1))
else
WORKER_NODES=$SLURM_NNODES
fi
if [[ $NUM_WORKERS -ge $WORKER_NODES ]]; then
SUB_NODE_WORKERS=true
WORKERS_PER_NODE=$(($NUM_WORKERS/$WORKER_NODES))
else
SUB_NODE_WORKERS=false
NODES_PER_WORKER=$(($WORKER_NODES/$NUM_WORKERS))
fi;
#-----------------------------------------------------------------------------
# A little useful information
echo -e "Manager process running on: $HOSTNAME"
echo -e "Directory is: $PWD"
# Generate a node list with 1 node per line:
srun hostname | sort -u > node_list
# Add manager node to machinefile
head -n 1 node_list > machinefile.$SLURM_JOBID
# Add worker nodes to machinefile
if [[ $SUB_NODE_WORKERS = "true" ]]; then
awk -v repeat=$WORKERS_PER_NODE '{for(i=0; i<repeat; i++)print}' node_list \
>>machinefile.$SLURM_JOBID
else
awk -v patt="$NODES_PER_WORKER" 'NR % patt == 1' node_list \
>> machinefile.$SLURM_JOBID
fi;
if [[ $USE_NODE_LIST = "false" ]]; then
rm node_list
wait
fi;
# Put in a timestamp
echo Starting execution at: `date`
# To use srun
export SLURM_HOSTFILE=machinefile.$SLURM_JOBID
# The "arbitrary" flag should ensure SLURM_HOSTFILE is picked up
# cmd="srun --ntasks $(($NUM_WORKERS+1)) -m arbitrary python $EXE"
cmd="srun --ntasks $(($NUM_WORKERS+1)) -m arbitrary python $EXE $LIBE_WALLCLOCK"
echo The command is: $cmd
echo End PBS script information.
echo All further output is from the process being run and not the script.\n\n $cmd
$cmd
# Print the date again -- when finished
echo Finished at: `date`
Summit (Decommissioned) - On Launch Nodes with Multiprocessing
#!/bin/bash -x
#BSUB -P <project code>
#BSUB -J libe_mproc
#BSUB -W 30
#BSUB -nnodes 4
#BSUB -alloc_flags "smt1"
# Script to run libEnsemble using multiprocessing on launch nodes.
# Assumes Conda environment is set up.
# To be run with central job management
# - Manager and workers run on launch node.
# - Workers submit tasks to the compute nodes in the allocation.
# Name of calling script-
export EXE=libE_calling_script.py
# Communication Method
export COMMS="--comms local"
# Number of workers.
export NWORKERS="--nworkers 4"
# Wallclock for libE. (allow clean shutdown)
export LIBE_WALLCLOCK=25 # Optional if pass to script
# Name of Conda environment
export CONDA_ENV_NAME=<conda_env_name>
# Need these if not already loaded
# module load python
# module load gcc/4.8.5
# Activate conda environment
export PYTHONNOUSERSITE=1
. activate $CONDA_ENV_NAME
# hash -d python # Check pick up python in conda env
hash -r # Check no commands hashed (pip/python...)
# Launch libE
# python $EXE $NUM_WORKERS > out.txt 2>&1 # No args. All defined in calling script
# python $EXE $COMMS $NWORKERS > out.txt 2>&1 # If calling script is using parse_args()
python $EXE $LIBE_WALLCLOCK $COMMS $NWORKERS > out.txt 2>&1 # If calling script takes wall-clock as positional arg.