Template for GPU executables

forces_gpu.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)

Launches the forces MPI app and auto-assigns ranks and GPU resources.

Assigns one MPI rank to each GPU assigned to the worker.

forces_simf.py
 1import numpy as np
 2
 3# Optional status codes to display in libE_stats.txt for each gen or sim
 4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 5
 6# Optional - to print GPU settings
 7from libensemble.tools.test_support import check_gpu_setting
 8
 9
10def run_forces(H, persis_info, sim_specs, libE_info):
11    """Launches the forces MPI app and auto-assigns ranks and GPU resources.
12
13    Assigns one MPI rank to each GPU assigned to the worker.
14    """
15
16    calc_status = 0
17
18    # Parse out num particles, from generator function
19    particles = str(int(H["x"][0][0]))
20
21    # app arguments: num particles, timesteps, also using num particles as seed
22    args = particles + " " + str(10) + " " + particles
23
24    # Retrieve our MPI Executor
25    exctr = libE_info["executor"]
26
27    # Submit our forces app for execution.
28    task = exctr.submit(
29        app_name="forces",
30        app_args=args,
31        auto_assign_gpus=True,
32        match_procs_to_gpus=True,
33    )
34
35    # Block until the task finishes
36    task.wait()
37
38    # Optional - prints GPU assignment (method and numbers)
39    check_gpu_setting(task, assert_setting=False, print_setting=True)
40
41    # Try loading final energy reading, set the sim's status
42    statfile = "forces.stat"
43    try:
44        data = np.loadtxt(statfile)
45        final_energy = data[-1]
46        calc_status = WORKER_DONE
47    except Exception:
48        final_energy = np.nan
49        calc_status = TASK_FAILED
50
51    # Define our output array, populate with energy reading
52    output = np.zeros(1, dtype=sim_specs["out"])
53    output["energy"] = final_energy
54
55    # Return final information to worker, for reporting to manager
56    return output, persis_info, calc_status
Example usage
 1#!/usr/bin/env python
 2
 3"""
 4This example is based on the simple forces test. The default number of
 5particles is increased considerably to give perceptible time on the GPUs when
 6live-checking GPU usage.
 7
 8The forces.c application should be built by setting the GPU preprocessor condition
 9(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
10in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
11and confirm it is running on the GPU (this is given clearly in the output).
12
13To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
14will compile forces without -DGPU option. It is recommended that the ub and/or lb for
15particle counts are reduced for CPU performance.
16"""
17
18import os
19import sys
20
21import numpy as np
22from forces_simf import run_forces  # Sim func from current dir
23
24from libensemble import Ensemble
25from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
26from libensemble.executors import MPIExecutor
27from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
28from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
29
30if __name__ == "__main__":
31    # Initialize MPI Executor
32    exctr = MPIExecutor()
33    sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
34
35    if not os.path.isfile(sim_app):
36        sys.exit("forces.x not found - please build first in ../forces_app dir")
37
38    exctr.register_app(full_path=sim_app, app_name="forces")
39
40    # Parse number of workers, comms type, etc. from arguments
41    ensemble = Ensemble(parse_args=True, executor=exctr)
42    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
43
44    # Persistent gen does not need resources
45    ensemble.libE_specs = LibeSpecs(
46        num_resource_sets=nsim_workers,
47        sim_dirs_make=True,
48        # resource_info = {"gpus_on_node": 4}  # for mocking GPUs
49    )
50
51    ensemble.sim_specs = SimSpecs(
52        sim_f=run_forces,
53        inputs=["x"],
54        outputs=[("energy", float)],
55    )
56
57    ensemble.gen_specs = GenSpecs(
58        gen_f=gen_f,
59        inputs=[],  # No input when start persistent generator
60        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
61        outputs=[("x", float, (1,))],
62        user={
63            "initial_batch_size": nsim_workers,
64            "lb": np.array([50000]),  # min particles
65            "ub": np.array([100000]),  # max particles
66        },
67    )
68
69    # Starts one persistent generator. Simulated values are returned in batch.
70    ensemble.alloc_specs = AllocSpecs(
71        alloc_f=alloc_f,
72        user={
73            "async_return": False,  # False causes batch returns
74        },
75    )
76
77    # Instruct libEnsemble to exit after this many simulations
78    ensemble.exit_criteria = ExitCriteria(sim_max=8)
79
80    # Seed random streams for each worker, particularly for gen_f
81    ensemble.add_random_streams()
82
83    # Run ensemble
84    ensemble.run()
85
86    if ensemble.is_manager:
87        # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
88        if ensemble.exit_criteria.sim_max == 8:
89            chksum = np.sum(ensemble.H["energy"])
90            assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
91            print("Checksum passed")
92        else:
93            print("Run complete. A checksum has not been provided for the given sim_max")

Also see the Forces GPU tutorial and the video demonstration on Frontier.