Template for GPU executables with dynamic resources
- forces_gpu_var_resources.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)
Launches the forces MPI app and auto-assigns ranks and GPU resources (based on generator output).
forces_simf.py
1import numpy as np
2
3# Optional status codes to display in libE_stats.txt for each gen or sim
4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
5
6# Optional - to print GPU settings
7from libensemble.tools.test_support import check_gpu_setting
8
9
10def run_forces(H, persis_info, sim_specs, libE_info):
11 """Launches the forces MPI app and auto-assigns ranks and GPU resources
12 (based on generator output).
13 """
14
15 calc_status = 0
16
17 # Parse out num particles, from generator function
18 particles = str(int(H["x"][0][0]))
19
20 # app arguments: num particles, timesteps, also using num particles as seed
21 args = particles + " " + str(10) + " " + particles
22
23 # Retrieve our MPI Executor
24 exctr = libE_info["executor"]
25
26 # Submit our forces app for execution.
27 task = exctr.submit(app_name="forces", app_args=args)
28
29 # Block until the task finishes
30 task.wait()
31
32 # Optional - prints GPU assignment (method and numbers)
33 check_gpu_setting(task, assert_setting=False, print_setting=True)
34
35 # Try loading final energy reading, set the sim's status
36 statfile = "forces.stat"
37 try:
38 data = np.loadtxt(statfile)
39 final_energy = data[-1]
40 calc_status = WORKER_DONE
41 except Exception:
42 final_energy = np.nan
43 calc_status = TASK_FAILED
44
45 # Define our output array, populate with energy reading
46 output = np.zeros(1, dtype=sim_specs["out"])
47 output["energy"] = final_energy
48
49 # Return final information to worker, for reporting to manager
50 return output, persis_info, calc_status
Example usage
Note the use of the generator function uniform_sample_with_var_gpus
that sets num_gpus
as a gen_specs
output field corresponding to each generated simulation input.
The special generator output field “num_gpus” is automatically picked up by each worker and will be used when the simulation is run, unless overridden.
1#!/usr/bin/env python
2
3"""
4This example is similar to the forces_gpu test.
5
6The forces.c application should be built by setting the GPU preprocessor condition
7(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
8in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
9and confirming it is running on the GPU (this is given clearly in the output).
10
11A number of GPUs is requested based on the number of particles (randomly chosen
12from the range for each simulation). For simplicity, the number of GPUs requested
13is based on a linear split of the range (lb to ub), rather than absolute particle
14count.
15
16To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
17will compile forces without the -DGPU option. It is recommended that the ub and/or lb for
18particle counts are reduced for CPU performance.
19"""
20
21import os
22import sys
23
24import numpy as np
25from forces_simf import run_forces # Sim func from current dir
26
27from libensemble import Ensemble
28from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
29from libensemble.executors import MPIExecutor
30from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_var_gpus as gen_f
31from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
32
33if __name__ == "__main__":
34 # Initialize MPI Executor
35 exctr = MPIExecutor()
36 sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
37
38 if not os.path.isfile(sim_app):
39 sys.exit("forces.x not found - please build first in ../forces_app dir")
40
41 exctr.register_app(full_path=sim_app, app_name="forces")
42
43 # Parse number of workers, comms type, etc. from arguments
44 ensemble = Ensemble(parse_args=True, executor=exctr)
45 nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator
46
47 # Persistent gen does not need resources
48 ensemble.libE_specs = LibeSpecs(
49 num_resource_sets=nsim_workers,
50 sim_dirs_make=True,
51 stats_fmt={"show_resource_sets": True}, # see resource sets in libE_stats.txt
52 # resource_info = {"gpus_on_node": 4}, # for mocking GPUs
53 )
54
55 ensemble.sim_specs = SimSpecs(
56 sim_f=run_forces,
57 inputs=["x"],
58 outputs=[("energy", float)],
59 )
60
61 ensemble.gen_specs = GenSpecs(
62 gen_f=gen_f,
63 inputs=[], # No input when start persistent generator
64 persis_in=["sim_id"], # Return sim_ids of evaluated points to generator
65 outputs=[
66 ("x", float, (1,)),
67 ("num_gpus", int), # num_gpus auto given to sim when use MPIExecutor.
68 ],
69 user={
70 "initial_batch_size": nsim_workers,
71 "lb": np.array([50000]), # min particles
72 "ub": np.array([100000]), # max particles
73 "max_gpus": nsim_workers,
74 },
75 )
76
77 # Starts one persistent generator. Simulated values are returned in batch.
78 ensemble.alloc_specs = AllocSpecs(
79 alloc_f=alloc_f,
80 user={
81 "async_return": False, # False causes batch returns
82 },
83 )
84
85 # Instruct libEnsemble to exit after this many simulations.
86 ensemble.exit_criteria = ExitCriteria(sim_max=8)
87
88 # Seed random streams for each worker, particularly for gen_f
89 ensemble.add_random_streams()
90
91 # Run ensemble
92 ensemble.run()
93
94 if ensemble.is_manager:
95 # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
96 if ensemble.exit_criteria.sim_max == 8:
97 chksum = np.sum(ensemble.H["energy"])
98 assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
99 print("Checksum passed")
100 else:
101 print("Run complete; a checksum has not been provided for the given sim_max")
Also see the Forces GPU tutorial and the video demonstration on Frontier.