Template for GPU executables
- forces_gpu.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)
Launches the forces MPI app and auto-assigns ranks and GPU resources.
Assigns one MPI rank to each GPU assigned to the worker.
forces_simf.py
1import numpy as np
2
3# Optional status codes to display in libE_stats.txt for each gen or sim
4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
5
6# Optional - to print GPU settings
7from libensemble.tools.test_support import check_gpu_setting
8
9
10def run_forces(H, persis_info, sim_specs, libE_info):
11 """Launches the forces MPI app and auto-assigns ranks and GPU resources.
12
13 Assigns one MPI rank to each GPU assigned to the worker.
14 """
15
16 calc_status = 0
17
18 # Parse out num particles, from generator function
19 particles = str(int(H["x"][0][0]))
20
21 # app arguments: num particles, timesteps, also using num particles as seed
22 args = particles + " " + str(10) + " " + particles
23
24 # Retrieve our MPI Executor
25 exctr = libE_info["executor"]
26
27 # Submit our forces app for execution.
28 task = exctr.submit(
29 app_name="forces",
30 app_args=args,
31 auto_assign_gpus=True,
32 match_procs_to_gpus=True,
33 )
34
35 # Block until the task finishes
36 task.wait()
37
38 # Optional - prints GPU assignment (method and numbers)
39 check_gpu_setting(task, assert_setting=False, print_setting=True)
40
41 # Try loading final energy reading, set the sim's status
42 statfile = "forces.stat"
43 try:
44 data = np.loadtxt(statfile)
45 final_energy = data[-1]
46 calc_status = WORKER_DONE
47 except Exception:
48 final_energy = np.nan
49 calc_status = TASK_FAILED
50
51 # Define our output array, populate with energy reading
52 output = np.zeros(1, dtype=sim_specs["out"])
53 output["energy"] = final_energy
54
55 # Return final information to worker, for reporting to manager
56 return output, persis_info, calc_status
Example usage
1#!/usr/bin/env python
2
3"""
4This example is based on the simple forces test. The default number of
5particles is increased considerably to give perceptible time on the GPUs when
6live-checking GPU usage.
7
8The forces.c application should be built by setting the GPU preprocessor condition
9(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
10in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
11and confirm it is running on the GPU (this is given clearly in the output).
12
13To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
14will compile forces without -DGPU option. It is recommended that the ub and/or lb for
15particle counts are reduced for CPU performance.
16"""
17
18import os
19import sys
20
21import numpy as np
22from forces_simf import run_forces # Sim func from current dir
23
24from libensemble import Ensemble
25from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
26from libensemble.executors import MPIExecutor
27from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
28from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
29
30if __name__ == "__main__":
31 # Initialize MPI Executor
32 exctr = MPIExecutor()
33 sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
34
35 if not os.path.isfile(sim_app):
36 sys.exit("forces.x not found - please build first in ../forces_app dir")
37
38 exctr.register_app(full_path=sim_app, app_name="forces")
39
40 # Parse number of workers, comms type, etc. from arguments
41 ensemble = Ensemble(parse_args=True, executor=exctr)
42 nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator
43
44 # Persistent gen does not need resources
45 ensemble.libE_specs = LibeSpecs(
46 num_resource_sets=nsim_workers,
47 sim_dirs_make=True,
48 # resource_info = {"gpus_on_node": 4} # for mocking GPUs
49 )
50
51 ensemble.sim_specs = SimSpecs(
52 sim_f=run_forces,
53 inputs=["x"],
54 outputs=[("energy", float)],
55 )
56
57 ensemble.gen_specs = GenSpecs(
58 gen_f=gen_f,
59 inputs=[], # No input when start persistent generator
60 persis_in=["sim_id"], # Return sim_ids of evaluated points to generator
61 outputs=[("x", float, (1,))],
62 user={
63 "initial_batch_size": nsim_workers,
64 "lb": np.array([50000]), # min particles
65 "ub": np.array([100000]), # max particles
66 },
67 )
68
69 # Starts one persistent generator. Simulated values are returned in batch.
70 ensemble.alloc_specs = AllocSpecs(
71 alloc_f=alloc_f,
72 user={
73 "async_return": False, # False causes batch returns
74 },
75 )
76
77 # Instruct libEnsemble to exit after this many simulations
78 ensemble.exit_criteria = ExitCriteria(sim_max=8)
79
80 # Seed random streams for each worker, particularly for gen_f
81 ensemble.add_random_streams()
82
83 # Run ensemble
84 ensemble.run()
85
86 if ensemble.is_manager:
87 # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
88 if ensemble.exit_criteria.sim_max == 8:
89 chksum = np.sum(ensemble.H["energy"])
90 assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
91 print("Checksum passed")
92 else:
93 print("Run complete. A checksum has not been provided for the given sim_max")
Also see the Forces GPU tutorial and the video demonstration on Frontier.