Template for GPU executables with dynamic resources

forces_gpu_var_resources.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)

Launches the forces MPI app and auto-assigns ranks and GPU resources (based on generator output).

forces_simf.py
 1import numpy as np
 2
 3# Optional status codes to display in libE_stats.txt for each gen or sim
 4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 5
 6# Optional - to print GPU settings
 7from libensemble.tools.test_support import check_gpu_setting
 8
 9
10def run_forces(H, persis_info, sim_specs, libE_info):
11    """Launches the forces MPI app and auto-assigns ranks and GPU resources
12    (based on generator output).
13    """
14
15    calc_status = 0
16
17    # Parse out num particles, from generator function
18    particles = str(int(H["x"][0][0]))
19
20    # app arguments: num particles, timesteps, also using num particles as seed
21    args = particles + " " + str(10) + " " + particles
22
23    # Retrieve our MPI Executor
24    exctr = libE_info["executor"]
25
26    # Submit our forces app for execution.
27    task = exctr.submit(app_name="forces", app_args=args)
28
29    # Block until the task finishes
30    task.wait()
31
32    # Optional - prints GPU assignment (method and numbers)
33    check_gpu_setting(task, assert_setting=False, print_setting=True)
34
35    # Try loading final energy reading, set the sim's status
36    statfile = "forces.stat"
37    try:
38        data = np.loadtxt(statfile)
39        final_energy = data[-1]
40        calc_status = WORKER_DONE
41    except Exception:
42        final_energy = np.nan
43        calc_status = TASK_FAILED
44
45    # Define our output array, populate with energy reading
46    output = np.zeros(1, dtype=sim_specs["out"])
47    output["energy"] = final_energy
48
49    # Return final information to worker, for reporting to manager
50    return output, persis_info, calc_status
Example usage

Note the use of the generator function uniform_sample_with_var_gpus that sets num_gpus as a gen_specs output field corresponding to each generated simulation input.

The special generator output field “num_gpus” is automatically picked up by each worker and will be used when the simulation is run, unless overridden.

  1#!/usr/bin/env python
  2
  3"""
  4This example is similar to the forces_gpu test.
  5
  6The forces.c application should be built by setting the GPU preprocessor condition
  7(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
  8in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
  9and confirming it is running on the GPU (this is given clearly in the output).
 10
 11A number of GPUs is requested based on the number of particles (randomly chosen
 12from the range for each simulation). For simplicity, the number of GPUs requested
 13is based on a linear split of the range (lb to ub), rather than absolute particle
 14count.
 15
 16To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
 17will compile forces without the -DGPU option. It is recommended that the ub and/or lb for
 18particle counts are reduced for CPU performance.
 19"""
 20
 21import os
 22import sys
 23
 24import numpy as np
 25from forces_simf import run_forces  # Sim func from current dir
 26
 27from libensemble import Ensemble
 28from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
 29from libensemble.executors import MPIExecutor
 30from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_var_gpus as gen_f
 31from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
 32
 33if __name__ == "__main__":
 34    # Initialize MPI Executor
 35    exctr = MPIExecutor()
 36    sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
 37
 38    if not os.path.isfile(sim_app):
 39        sys.exit("forces.x not found - please build first in ../forces_app dir")
 40
 41    exctr.register_app(full_path=sim_app, app_name="forces")
 42
 43    # Parse number of workers, comms type, etc. from arguments
 44    ensemble = Ensemble(parse_args=True, executor=exctr)
 45    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
 46
 47    # Persistent gen does not need resources
 48    ensemble.libE_specs = LibeSpecs(
 49        num_resource_sets=nsim_workers,
 50        sim_dirs_make=True,
 51        stats_fmt={"show_resource_sets": True},  # see resource sets in libE_stats.txt
 52        # resource_info = {"gpus_on_node": 4},  # for mocking GPUs
 53    )
 54
 55    ensemble.sim_specs = SimSpecs(
 56        sim_f=run_forces,
 57        inputs=["x"],
 58        outputs=[("energy", float)],
 59    )
 60
 61    ensemble.gen_specs = GenSpecs(
 62        gen_f=gen_f,
 63        inputs=[],  # No input when start persistent generator
 64        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
 65        outputs=[
 66            ("x", float, (1,)),
 67            ("num_gpus", int),  # num_gpus auto given to sim when use MPIExecutor.
 68        ],
 69        user={
 70            "initial_batch_size": nsim_workers,
 71            "lb": np.array([50000]),  # min particles
 72            "ub": np.array([100000]),  # max particles
 73            "max_gpus": nsim_workers,
 74        },
 75    )
 76
 77    # Starts one persistent generator. Simulated values are returned in batch.
 78    ensemble.alloc_specs = AllocSpecs(
 79        alloc_f=alloc_f,
 80        user={
 81            "async_return": False,  # False causes batch returns
 82        },
 83    )
 84
 85    # Instruct libEnsemble to exit after this many simulations.
 86    ensemble.exit_criteria = ExitCriteria(sim_max=8)
 87
 88    # Seed random streams for each worker, particularly for gen_f
 89    ensemble.add_random_streams()
 90
 91    # Run ensemble
 92    ensemble.run()
 93
 94    if ensemble.is_manager:
 95        # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
 96        if ensemble.exit_criteria.sim_max == 8:
 97            chksum = np.sum(ensemble.H["energy"])
 98            assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
 99            print("Checksum passed")
100        else:
101            print("Run complete; a checksum has not been provided for the given sim_max")

Also see the Forces GPU tutorial and the video demonstration on Frontier.