Example libEnsemble Submission Scripts

Below are example submission scripts used to configure and launch libEnsemble on a variety of high-powered systems. See here for more information about the respective systems and configuration.

Bridges - Central Mode

/examples/libE_submission_scripts/bridges_submit_slurm_central.sh
#!/bin/bash
#SBATCH -J libE_test_central
#SBATCH -N 5
#SBATCH -p RM
#SBATCH -A <my_project>
#SBATCH -o tlib.%j.%N.out
#SBATCH -e tlib.%j.%N.error
#SBATCH -t 00:30:00

# Launch script for running in central mode with mpi4py.
#   libEnsemble will run on a dedicated node (or nodes).
#   The remaining nodes in the allocation will be dedicated to worker launched apps.
#   Initialize Executor with auto-resources=True and central_mode=True.

# User to edit these variables
export EXE=libE_calling_script.py
export NUM_WORKERS=4

mpirun -np $(($NUM_WORKERS+1)) -ppn $(($NUM_WORKERS+1)) python $EXE

# To use local mode instead of mpi4py (with parse_args())
# python $EXE --comms local --nworkers $NUM_WORKERS

Bebop - Central Mode

/examples/libE_submission_scripts/bebop_submit_slurm_central.sh
#!/bin/bash
#SBATCH -J libE_test_central
#SBATCH -N 5
#SBATCH -p knlall
#SBATCH -A <my_project>
#SBATCH -o tlib.%j.%N.out
#SBATCH -e tlib.%j.%N.error
#SBATCH -t 01:00:00

# Launch script for running in central mode with mpi4py.
#   libEnsemble will run on a dedicated node (or nodes).
#   The remaining nodes in the allocation will be dedicated to worker launched apps.
#   Use executor with auto-resources=True and central_mode=True.

# User to edit these variables
export EXE=libE_calling_script.py
export NUM_WORKERS=4
export I_MPI_FABRICS=shm:tmi

# Overcommit will allow ntasks up to the no. of contexts on one node (eg. 320 on Bebop)
srun --overcommit --ntasks=$(($NUM_WORKERS+1)) --nodes=1 python $EXE

# To use local mode instead of mpi4py (with parse_args())
# python calling_script.py --comms local --nworkers $NUM_WORKERS

Bebop - Distributed Mode

/examples/libE_submission_scripts/bebop_submit_slurm_distrib.sh
#!/bin/bash
#SBATCH -J libE_test
#SBATCH -N 4
#SBATCH -p knlall
#SBATCH -A <my_project>
#SBATCH -o tlib.%j.%N.out
#SBATCH -e tlib.%j.%N.error
#SBATCH -t 01:00:00

# Launch script that runs in distributed mode with mpi4py.
#   Workers are evenly spread over nodes and manager added to the first node.
#   Requires even distribution - either multiple workers per node or nodes per worker
#   Option for manager to have a dedicated node.
#   Use of MPI Executor will ensure workers co-locate tasks with workers
#   If node_list file is kept, this informs libe of resources. Else, libe auto-detects.

# User to edit these variables
export EXE=libE_calling_script.py
export NUM_WORKERS=4
export MANAGER_NODE=false # true = Manager has a dedicated node (assign one extra)
export USE_NODE_LIST=true # If false, allow libE to determine node_list from environment.

# As libE shares nodes with user applications allow fallback if contexts overrun.
unset I_MPI_FABRICS
export I_MPI_FABRICS_LIST=tmi,tcp
export I_MPI_FALLBACK=1

# If using in calling script (After N mins manager kills workers and exits cleanly)
export LIBE_WALLCLOCK=55

#-----------------------------------------------------------------------------
# Work out distribution
if [[ $MANAGER_NODE = "true" ]]; then
  WORKER_NODES=$(($SLURM_NNODES-1))
else
  WORKER_NODES=$SLURM_NNODES
fi

if [[ $NUM_WORKERS -ge $WORKER_NODES ]]; then
  SUB_NODE_WORKERS=true
  WORKERS_PER_NODE=$(($NUM_WORKERS/$WORKER_NODES))
else
  SUB_NODE_WORKERS=false
  NODES_PER_WORKER=$(($WORKER_NODES/$NUM_WORKERS))
fi;
#-----------------------------------------------------------------------------

# A little useful information
echo -e "Manager process running on: $HOSTNAME"
echo -e "Directory is:  $PWD"

# Generate a node list with 1 node per line:
srun hostname | sort -u > node_list

# Add manager node to machinefile
head -n 1 node_list > machinefile.$SLURM_JOBID

# Add worker nodes to machinefile
if [[ $SUB_NODE_WORKERS = "true" ]]; then
  awk -v repeat=$WORKERS_PER_NODE '{for(i=0;i<repeat;i++)print}' node_list \
  >>machinefile.$SLURM_JOBID
else
  awk -v patt="$NODES_PER_WORKER" 'NR % patt == 1' node_list \
  >> machinefile.$SLURM_JOBID
fi;

if [[ $USE_NODE_LIST = "false" ]]; then
  rm node_list
  wait
fi;

# Put in a timestamp
echo Starting execution at: `date`

# To use srun
export SLURM_HOSTFILE=machinefile.$SLURM_JOBID

# The "arbitrary" flag should ensure SLURM_HOSTFILE is picked up
# cmd="srun --ntasks $(($NUM_WORKERS+1)) -m arbitrary python $EXE"
cmd="srun --ntasks $(($NUM_WORKERS+1)) -m arbitrary python $EXE $LIBE_WALLCLOCK"

echo The command is: $cmd
echo End PBS script information.
echo All further output is from the process being run and not the script.\n\n $cmd

$cmd

# Print the date again -- when finished
echo Finished at: `date`

Cori - Central Mode

/examples/libE_submission_scripts/cori_submit.sh
#!/bin/bash
#SBATCH -J libE_test_central
#SBATCH -N 5
#SBATCH -q debug
#SBATCH -A <my_project>
#SBATCH -o tlib.%j.%N.out
#SBATCH -e tlib.%j.%N.error
#SBATCH -t 01:00:00
#SBATCH -C knl

# Launch script for running in central mode with mpi4py.
#   libEnsemble will run on a dedicated node (or nodes).
#   The remaining nodes in the allocation will be dedicated to worker launched apps.
#   Use executor with auto-resources=True and central_mode=True.

# User to edit these variables
export EXE=libE_calling_script.py
export NUM_WORKERS=4
export I_MPI_FABRICS=shm:ofi  # Recommend OFI

# Ensure anaconda Python module is loaded
module load python/3.7-anaconda-2019.07

# If libensemble is installed under common (set to your install location and python version)
export PYTHONPATH=/global/common/software/<my_project>/<user_name>/packages/lib/python3.7/site-packages:$PYTHONPATH

# Overcommit will allow ntasks up to the no. of contexts on one node (eg. 320 on Bebop)
srun --overcommit --ntasks=$(($NUM_WORKERS+1)) --nodes=1 python $EXE

# To use local mode instead of mpi4py (with parse_args())
# python calling_script.py --comms local --nworkers $NUM_WORKERS

Blues (Blue Gene Q) - Distributed Mode

/examples/libE_submission_scripts/blues_script.pbs
#!/bin/sh

# Job name to be reported by qstat
#PBS -N libE_run

# Declare Job, non-rerunable
#PBS -r n

# Specify name for output log file
#PBS -o log_libE_run

# Join standard output and error so we only get one logfile
#PBS -j oe

# Mail to user on a=abort, b=begin, e=end
#PBS -m aeb

# set the email address where job-related notifications will be sent
#PBS -M name@email.com

# Number of nodes (request 4)
#PBS -l nodes=4:ppn=16

# Specify CPU time needed
#PBS -l walltime=0:01:00

# Select queue
##PBS -q haswell
##PBS -q biggpu
##PBS -q ivy
#PBS -q shared

cd $PBS_O_WORKDIR

# A little useful information for the log file...
echo Manager process running on: $HOSTNAME
echo Directory is:  $PWD
echo PBS has allocated the following nodes:
cat $PBS_NODEFILE
NPROCS="$(wc -l < $PBS_NODEFILE)"
echo This job has allocated $NPROCS cores

rm libE_machinefile
# Parse the PBS_NODEFILE to a machinefile (to be used below) in order to put
# ranks 1-4 on each of the 4 nodes, and put rank 0 (the manager) on the first
# node as well.
head -n 1 $PBS_NODEFILE > libE_machinefile
cat $PBS_NODEFILE | sort | uniq >> libE_machinefile


# Put in a timestamp
echo Starting execution at: `date`

pwd
cmd="mpiexec -np 5 -machinefile libE_machinefile python libE_calling_script.py libE_machinefile"
# This note that this command passes the libE_machinefile to both MPI and the
# libE_calling_script, in the latter script, it can be parsed and given to the
# alloc_func

echo The command is: $cmd
echo End PBS script information.
echo All further output is from the process being run and not the pbs script.\n\n
$cmd

# Print the date again -- when finished
echo Finished at: `date`

Theta - On MOM Node with Multiprocessing

/examples/libE_submission_scripts/theta_submit_mproc.sh
#!/bin/bash -x
#COBALT -t 00:30:00
#COBALT -O libE_mproc_MOM
#COBALT -n 4
#COBALT -q debug-flat-quad # Up to 8 nodes only
##COBALT -q default # For large jobs >=128 nodes
##COBALT -A <project code>

# Script to run libEnsemble using multiprocessing on launch nodes.
# Assumes Conda environment is set up.

# To be run with central job management
# - Manager and workers run on launch node.
# - Workers submit tasks to the compute nodes in the allocation.

# Name of calling script
export EXE=libE_calling_script.py

# Communication Method
export COMMS="--comms local"

# Number of workers.
export NWORKERS="--nworkers 4"

# Wallclock for libE (allow clean shutdown)
export LIBE_WALLCLOCK=25 # Optional if pass to script

# Name of Conda environment
export CONDA_ENV_NAME=<conda_env_name>

# Conda location - theta specific
export PATH=/opt/intel/python/2017.0.035/intelpython35/bin:$PATH
export LD_LIBRARY_PATH=~/.conda/envs/$CONDA_ENV_NAME/lib:$LD_LIBRARY_PATH
export PMI_NO_FORK=1 # Required for python kills on Theta

# Unload Theta modules that may interfere with job monitoring/kills
module unload trackdeps
module unload darshan
module unload xalt

# Activate conda environment
export PYTHONNOUSERSITE=1
. activate $CONDA_ENV_NAME

# Launch libE
# python $EXE $NUM_WORKERS > out.txt 2>&1  # No args. All defined in calling script
# python $EXE $COMMS $NWORKERS > out.txt 2>&1  # If calling script is using parse_args()
python $EXE $LIBE_WALLCLOCK $COMMS $NWORKERS > out.txt 2>&1 # If calling script takes wall-clock as positional arg.

Theta - Central Mode with Balsam

/examples/libE_submission_scripts/theta_submit_balsam.sh
#!/bin/bash -x
#COBALT -t 30
#COBALT -O libE_MPI_balsam
#COBALT -n 5
#COBALT -q debug-flat-quad # Up to 8 nodes only # Use default for >=128 nodes
#COBALT -A <project code>

# Script to launch libEnsemble using Balsam.
#   Assumes Conda environment is set up.
#   Requires Balsam is installed and a database initialized.

# To be run with central job management
# - Manager and workers run on one node (or a dedicated set of nodes).
# - Workers submit tasks to the rest of the nodes in the pool.

# Name of calling script
export EXE=libE_calling_script.py

# Number of workers.
export NUM_WORKERS=4

# Number of nodes to run libE
export LIBE_NODES=1

# Balsam wall-clock in minutes - make few mins smaller than batch wallclock
export BALSAM_WALLCLOCK=25

# Name of working directory within database where Balsam places running jobs/output
export WORKFLOW_NAME=libe_workflow

# Wall-clock in mins for libE (allow clean shutdown).
# Script must be set up to receive as argument.
export LIBE_WALLCLOCK=$(($BALSAM_WALLCLOCK-3))

# libEnsemble calling script arguments (some alternatives shown)

# No args. All defined in calling script
export SCRIPT_ARGS=''

# If calling script takes wall-clock as positional argument.
# export SCRIPT_ARGS="$LIBE_WALLCLOCK"

# Name of Conda environment
export CONDA_ENV_NAME=<conda_env_name>

# Name of database
export BALSAM_DB_NAME=<dbase_name>  # default - to use default database.

# Required for killing tasks from workers on Theta
export PMI_NO_FORK=1

# Unload Theta modules that may interfere with job monitoring/kills
module unload trackdeps
module unload darshan
module unload xalt

# Obtain Conda PATH from miniconda-3/latest module
CONDA_DIR=/soft/datascience/conda/miniconda3/latest/bin

# Ensure environment isolated
export PYTHONNOUSERSITE=1

# Activate conda environment
source $CONDA_DIR/activate $CONDA_ENV_NAME

# Activate Balsam database
source balsamactivate $BALSAM_DB_NAME

# Currently need at least one DB connection per worker (for postgres).
if [[ $NUM_WORKERS -gt 100 ]]
then
   #Add a margin
   echo -e "max_connections=$(($NUM_WORKERS+20)) #Appended by submission script" \
   >> $BALSAM_DB_PATH/balsamdb/postgresql.conf
fi
wait

# Make sure no existing apps/tasks registered to database
balsam rm apps --all --force
balsam rm jobs --all --force
wait
sleep 3

# Add calling script to Balsam database as app and job.
export THIS_DIR=$PWD
export SCRIPT_BASENAME=${EXE%.*}

# Multiple nodes
export LIBE_PROCS=$((NUM_WORKERS+1))  # Manager and workers
export PROCS_PER_NODE=$((LIBE_PROCS/LIBE_NODES))  # Must divide evenly

balsam app --name $SCRIPT_BASENAME.app --exec $EXE --desc "Run $SCRIPT_BASENAME"

balsam job --name job_$SCRIPT_BASENAME --workflow $WORKFLOW_NAME \
           --application $SCRIPT_BASENAME.app --args $SCRIPT_ARGS \
           --wall-time-minutes $LIBE_WALLCLOCK \
           --num-nodes $LIBE_NODES --ranks-per-node $PROCS_PER_NODE \
           --url-out="local:/$THIS_DIR" --stage-out-files="*.out *.txt *.log" \
           --url-in="local:/$THIS_DIR/*" --yes

# Hyper-thread libE (note this will not affect HT status of user calcs - only libE itself)
# E.g. Running 255 workers and one manager on one libE node.
# balsam job --name job_$SCRIPT_BASENAME --workflow $WORKFLOW_NAME \
#            --application $SCRIPT_BASENAME.app --args $SCRIPT_ARGS \
#            --wall-time-minutes $LIBE_WALLCLOCK \
#            --num-nodes 1 --ranks-per-node 256 --threads-per-core 4 \
#            --url-out="local:/$THIS_DIR" --stage-out-files="*.out *.txt *.log" \
#            --url-in="local:/$THIS_DIR/*" --yes

# Run job
balsam launcher --consume-all --job-mode=mpi --num-transition-threads=1

wait
source balsamdeactivate

Summit - On Launch Nodes with Multiprocessing

/examples/libE_submission_scripts/summit_submit_mproc.sh
#!/bin/bash -x
#BSUB -P <project code>
#BSUB -J libe_mproc
#BSUB -W 30
#BSUB -nnodes 4
#BSUB -alloc_flags "smt1"

# Script to run libEnsemble using multiprocessing on launch nodes.
# Assumes Conda environment is set up.

# To be run with central job management
# - Manager and workers run on launch node.
# - Workers submit tasks to the compute nodes in the allocation.

# Name of calling script-
export EXE=libE_calling_script.py

# Communication Method
export COMMS="--comms local"

# Number of workers.
export NWORKERS="--nworkers 4"

# Wallclock for libE.  (allow clean shutdown)
export LIBE_WALLCLOCK=25 # Optional if pass to script

# Name of Conda environment
export CONDA_ENV_NAME=<conda_env_name>

# Need these if not already loaded
# module load python
# module load gcc/4.8.5

# Activate conda environment
export PYTHONNOUSERSITE=1
. activate $CONDA_ENV_NAME

# hash -d python # Check pick up python in conda env
hash -r # Check no commands hashed (pip/python...)

# Launch libE
# python $EXE $NUM_WORKERS > out.txt 2>&1  # No args. All defined in calling script
# python $EXE $COMMS $NWORKERS > out.txt 2>&1  # If calling script is using parse_args()
python $EXE $LIBE_WALLCLOCK $COMMS $NWORKERS > out.txt 2>&1 # If calling script takes wall-clock as positional arg.