Example Scheduler Submission Scripts
Below are example submission scripts used to configure and launch libEnsemble on a variety of high-powered systems. See here for more information about the respective systems and configuration.
General examples
Slurm - Basic
/examples/libE_submission_scripts/submit_slurm_simple.sh
#!/bin/bash
#SBATCH -J libE_simple
#SBATCH -A <myproject>
#SBATCH -p <partition_name>
#SBATCH -C <constraint_name>
#SBATCH --time 10
#SBATCH --nodes 2
# Usually either -p or -C above is used.
# On some SLURM configurations, these ensure runs can share nodes
export SLURM_EXACT=1
export SLURM_MEM_PER_NODE=0
python libe_calling_script.py -n 8
PBS - Basic
/examples/libE_submission_scripts/submit_pbs_simple.sh
#!/bin/bash -l
#PBS -l select=2
#PBS -l walltime=00:15:00
#PBS -q <queue_name>
#PBS -A <myproject>
# We selected 2 nodes - now running with 8 workers.
export MPICH_GPU_SUPPORT_ENABLED=1
cd $PBS_O_WORKDIR
python libE_calling_script.py -n 8
LSF - Basic
/examples/libE_submission_scripts/submit_lsf_simple.sh
#!/bin/bash -l
#BSUB -P <project code>
#BSUB -J libe_mproc
#BSUB -W 15
#BSUB -nnodes 2
python run_libe_forces.py -n 8
System Examples
Aurora
/examples/libE_submission_scripts/submit_pbs_aurora.sh
#!/bin/bash -l
#PBS -l select=2
#PBS -l walltime=00:30:00
#PBS -q <myqueue>
#PBS -A <myproject>
module load frameworks
export MPICH_GPU_SUPPORT_ENABLED=1
cd $PBS_O_WORKDIR
# 2 nodes - 12 sim workers (6 GPUs per node)
python libE_calling_script.py -n 13
# if using libE_specs["use_tiles_as_gpus"] = True
# 2 nodes 24 sim workers (12 GPU tiles per node) libE_specs["use_tiles_as_gpus"] = True
# python libE_calling_script.py -n 25
Frontier (Large WarpX Ensemble)
/examples/libE_submission_scripts/submit_frontier_large.sh
#!/bin/bash
#SBATCH -J libE_warpX_full_sim_32x40
#SBATCH -A <myproject>
#SBATCH -p batch
#SBATCH --time 6:00:00
#SBATCH --nodes 240
module load cray-python
# Run one gen and 40 sim workers (6 nodes = 48 GPUs each)
python run_gpcam_warpx.py -n 41
Perlmutter
/examples/libE_submission_scripts/submit_perlmutter.sh
#!/bin/bash
#SBATCH -J libE_small_test
#SBATCH -A <myproject>
#SBATCH -C gpu
#SBATCH --time 10
#SBATCH --nodes 1
# This script is using GPU partition
export MPICH_GPU_SUPPORT_ENABLED=1
export SLURM_EXACT=1
# One worker for generator and 4 for sims (one GPU each)
python libe_calling_script.py -n 5
# Or if libE_specs option gen_on_manager=True
python libe_calling_script.py -n 4
Polaris
/examples/libE_submission_scripts/submit_pbs_polaris.sh
#!/bin/bash -l
#PBS -l select=1:system=polaris
#PBS -l walltime=00:15:00
#PBS -l filesystems=home:grand
#PBS -q debug
#PBS -A <myproject>
export MPICH_GPU_SUPPORT_ENABLED=1
cd $PBS_O_WORKDIR
python libE_calling_script.py -n 4
Bridges - Central Mode
/examples/libE_submission_scripts/bridges_submit_slurm_central.sh
#!/bin/bash
#SBATCH -J libE_test_central
#SBATCH -N 5
#SBATCH -p RM
#SBATCH -A <my_project>
#SBATCH -o tlib.%j.%N.out
#SBATCH -e tlib.%j.%N.error
#SBATCH -t 00:30:00
# Launch script for running in central mode with mpi4py.
# libEnsemble will run on a dedicated node (or nodes).
# The remaining nodes in the allocation will be dedicated to worker launched apps.
# Initialize Executor with auto-resources=True and central_mode=True.
# User to edit these variables
export EXE=libE_calling_script.py
export NUM_WORKERS=4
mpirun -np $(($NUM_WORKERS+1)) -ppn $(($NUM_WORKERS+1)) python $EXE
# To use local mode instead of mpi4py (with parse_args())
# python $EXE -n $NUM_WORKERS
Bebop - Central Mode
/examples/libE_submission_scripts/bebop_submit_slurm_central.sh
#!/bin/bash
#SBATCH -J libE_test_central
#SBATCH -N 5
#SBATCH -p knlall
#SBATCH -A <my_project>
#SBATCH -o tlib.%j.%N.out
#SBATCH -e tlib.%j.%N.error
#SBATCH -t 01:00:00
# Launch script for running in central mode with mpi4py.
# libEnsemble will run on a dedicated node (or nodes).
# The remaining nodes in the allocation will be dedicated to worker launched apps.
# Use executor with auto-resources=True and central_mode=True.
# User to edit these variables
export EXE=libE_calling_script.py
export NUM_WORKERS=4
export I_MPI_FABRICS=shm:tmi
# Overcommit will allow ntasks up to the no. of contexts on one node (eg. 320 on Bebop)
srun --overcommit --ntasks=$(($NUM_WORKERS+1)) --nodes=1 python $EXE
# To use local mode instead of mpi4py (with parse_args())
# python calling_script.py -n $NUM_WORKERS
Bebop - Distributed Mode
/examples/libE_submission_scripts/bebop_submit_pbs_distrib.sh
#!/bin/bash -l
#PBS -l select=2:mpiprocs=16
#PBS -l walltime=00:15:00
#PBS -q bdwall
#PBS -A [project]
#PBS -N libE_example
cd $PBS_O_WORKDIR
module load openmpi
mpirun -n 16 --ppn 8 python run_libe_example.py
Summit (Decommissioned) - On Launch Nodes with Multiprocessing
/examples/libE_submission_scripts/summit_submit_mproc.sh
#!/bin/bash -x
#BSUB -P <project code>
#BSUB -J libe_mproc
#BSUB -W 30
#BSUB -nnodes 4
#BSUB -alloc_flags "smt1"
# Script to run libEnsemble using multiprocessing on launch nodes.
# Assumes Conda environment is set up.
# To be run with central job management
# - Manager and workers run on launch node.
# - Workers submit tasks to the compute nodes in the allocation.
# Name of calling script-
export EXE=libE_calling_script.py
# Communication Method
export COMMS="--comms local"
# Number of workers.
export NWORKERS="--nworkers 4"
# Wallclock for libE. (allow clean shutdown)
export LIBE_WALLCLOCK=25 # Optional if pass to script
# Name of Conda environment
export CONDA_ENV_NAME=<conda_env_name>
# Need these if not already loaded
# module load python
# module load gcc/4.8.5
# Activate conda environment
export PYTHONNOUSERSITE=1
. activate $CONDA_ENV_NAME
# hash -d python # Check pick up python in conda env
hash -r # Check no commands hashed (pip/python...)
# Launch libE
# python $EXE $NUM_WORKERS > out.txt 2>&1 # No args. All defined in calling script
# python $EXE $COMMS $NWORKERS > out.txt 2>&1 # If calling script is using parse_args()
python $EXE $LIBE_WALLCLOCK $COMMS $NWORKERS > out.txt 2>&1 # If calling script takes wall-clock as positional arg.