chombo-discharge
Loading...
Searching...
No Matches
CD_LoadBalancingImplem.H
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: 2021-2026 SINTEF Energy Research
3 *
4 * SPDX-License-Identifier: GPL-3.0-or-later
5 */
6
13#ifndef CD_LOADBALANCEIMPLEM_H
14#define CD_LOADBALANCEIMPLEM_H
15
16// Std includes
17#include <algorithm>
18#include <random>
19#include <chrono>
20#include <limits>
21#include <array>
22#include <tuple>
23
24// Chombo includes
25#include <LoadBalance.H>
26#include <BoxLayout.H>
27#include <ParmParse.H>
28#include <CH_Timer.H>
29
30// Our includes
31#include <CD_LoadBalancing.H>
32#include <CD_NamespaceHeader.H>
33
34template <class T>
35void
37{
38 CH_TIME("LoadBalancing::makeBalance");
39
40 // LoadBalance(a_ranks, a_loads, a_boxes);
42 LoadBalancing::makeBalance<T>(a_ranks, rankLoads, a_loads, a_boxes);
43}
44
45template <class T>
46void
49 const Vector<T>& a_boxLoads,
50 const Vector<Box>& a_boxes)
51{
52 CH_TIME("LoadBalancing::makeBalance");
53
54 // Convert everything to floating points
56 for (int i = 0; i < a_boxLoads.size(); i++) {
57 boxLoads.push_back(1.0 * a_boxLoads[i]);
58 }
59
60 // Minimum number of grid subsets is the number of boxes, and the maximum number of grid subsets
61 // is the number of ranks.
62 const int numBoxes = static_cast<int>(a_boxes.size());
63 const int numRanks = static_cast<int>(numProc());
64 const int numSubsets = std::min(numBoxes, numRanks);
65
66 a_ranks.resize(numBoxes);
67
68 if (numSubsets > 0) {
69
70 // Figure out the total and target load (load per subset) on this level.
71 Real totalLoad = 0.0;
72 for (int ibox = 0; ibox < numBoxes; ibox++) {
74 }
75
77
78 // Build the grid subsets. When we do this we iterate through the boxes and try to ensure that we partition
79 // the subsets such that the subsetLoad is as close to the dynamic targetLoad as possible.
80 //
81 // The pair contains the starting index for the subset and the computational load for the subset.
84
86
87 int firstSubsetBox = 0;
88
90
91 for (int curSubset = 0; curSubset < numSubsets; curSubset++) {
92
93 // The firstSubsetBox is the index for the first box in this subset (always assigned).
95
97
98 const int subsetsLeft = numSubsets - (curSubset + 1);
99 const int boxesLeft = numBoxes - (firstSubsetBox + 1);
100
101 if (boxesLeft > subsetsLeft) {
102 for (int ibox = firstSubsetBox + 1; ibox < numBoxes; ibox++) {
103
104 // Hook for catching case when we add too many boxes to this subset. Each remaining subset must have at least one box.
105 if (numBoxes - lastSubsetBox - 1 <= subsetsLeft) {
106 break;
107 }
108
109 // Check if we should add this box - we do this by making sure that the dynamically moving target load stays as close
110 // to the static load as possible.
111 //
112 // In the below, '1' is the load without ibox, and '2' is the load with ibox
113 const Real load1 = subsetLoad;
114 const Real load2 = subsetLoad + boxLoads[ibox];
115
116 // Check if we should add this box.
117 bool addBoxToSubset = false;
118
119 if (boxLoads[ibox] <= std::numeric_limits<Real>::epsilon()) {
120 addBoxToSubset = true;
121 }
122 else if (load1 > staticTargetLoad) {
123 addBoxToSubset = false;
124 }
125 else if (load2 <= staticTargetLoad) {
126 addBoxToSubset = true;
127 }
129 // Compute the new average load if we add or don't add this box to the current subset. Accept the answer
130 // that leads to a smallest deviation from the static target load.
133
135 addBoxToSubset = true;
136 }
137 }
138
139 // Add box or break out of box iteration.
140 if (addBoxToSubset) {
143
144 continue;
145 }
146 else {
147 lastSubsetBox = ibox - 1;
148
149 break;
150 }
151 }
152 }
153
154 // Create the subset
156
157 // Update the remaining load.
160
161 // Update start box for next iteration.
163 }
164
165 // Sort the subsets from largest to smallest computational load.
166 std::sort(subsets.begin(), subsets.end(), [](const Subset& A, const Subset& B) -> bool {
167 return A.second > B.second;
168 });
169
170 // Get the accumulated loads per rank, sorted by lowest-to-highest load.
172
173 // Assign the most expensive grid subset to the rank with the lowest accumulated load.
174 for (int i = 0; i < subsets.size(); i++) {
175 const int startIndex = subsets[i].first.first;
176 const int endIndex = subsets[i].first.second;
177 const Real subsetLoad = subsets[i].second;
178 const int rank = sortedRankLoads[i].first;
179
180 // Assign to rank
181 for (int ibox = startIndex; ibox <= endIndex; ibox++) {
182 a_ranks[ibox] = rank;
183 }
184
185 // Update load on this rank.
186 a_rankLoads.incrementLoad(rank, subsetLoad);
187 }
188 }
189 else {
190 a_ranks.resize(0);
191 }
192}
193
194template <class T>
197{
198 CH_TIME("LoadBalancing::packPairs");
199
201 for (int i = 0; i < a_boxes.size(); i++) {
202 vec.emplace_back(a_boxes[i], a_loads[i]);
203 }
204
205 return vec;
206}
207
208template <class T>
209void
211{
212 CH_TIME("LoadBalancing::unpackPairs");
213
214 // Reconstruct boxes and loads
215 a_boxes.resize(0);
216 a_loads.resize(0);
217
218 for (const auto& v : a_pairs) {
219 a_boxes.push_back(v.first);
220 a_loads.push_back(v.second);
221 }
222}
223
224template <typename T>
225void
227{
228 CH_TIME("LoadBalancing::sort");
229
230 for (int lvl = 0; lvl < a_boxes.size(); lvl++) {
232 }
233}
234
235template <typename T>
236void
238{
239 CH_TIME("LoadBalancing::sort");
240
241 switch (a_which) {
242 case BoxSorting::None: {
243 break;
244 }
245 case BoxSorting::Std: {
247
248 break;
249 }
250 case BoxSorting::Shuffle: {
252
253 break;
254 }
255 case BoxSorting::Morton: {
256 LoadBalancing::sortSFC<T, SpaceDim>(a_boxes, a_loads, LoadBalancing::mortonIndex<SpaceDim>);
257
258 break;
259 }
260 case BoxSorting::Hilbert: {
261 LoadBalancing::sortSFC<T, SpaceDim>(a_boxes, a_loads, LoadBalancing::hilbertIndex<SpaceDim>);
262
263 break;
264 }
265 default: {
266 MayDay::Abort("LoadBalancing::sort_boxes - unknown algorithm requested");
267
268 break;
269 }
270 }
271}
272
273template <class T>
274void
276{
277 CH_TIME("LoadBalancing::standardSort");
278
280
281 // Call std::sort, using box1 < box2 lambda as sorting criterion.
282 std::sort(std::begin(vec), std::end(vec), [](const std::pair<Box, T>& v1, const std::pair<Box, T>& v2) {
283 return v1.first < v2.first;
284 });
285
287}
288
289template <class T>
290void
292{
293 CH_TIME("LoadBalancing::shuffleSort");
294
295 auto vec = packPairs(a_boxes, a_loads);
296
297 // Set up RNG
298 auto seed = static_cast<int>(std::chrono::system_clock::now().time_since_epoch().count());
299#ifdef CH_MPI // Broadcast
300 MPI_Bcast(&seed, 1, MPI_INT, 0, Chombo_MPI::comm);
301#endif
302
303 // Shuffle vector
305 std::shuffle(vec.begin(), vec.end(), e);
306
307 // Split boxes and loads
309}
310
311template <class T, int DIM>
312void
316
317{
318 CH_TIME("LoadBalancing::sortSFC");
319
321
322 const int n = static_cast<int>(a_boxes.size());
323 if (n == 0 || n != a_loads.size()) {
324 return;
325 }
326
327 // Pack data into a vector of tuples. Tuple contains (box, load, key).
329 for (std::size_t i = 0; i < n; ++i) {
330 buf.emplace_back(std::move(a_boxes[i]), std::move(a_loads[i]), uint64_t{0});
331 }
332
333 // Compute Hilbert keys
334 for (std::size_t i = 0; i < n; ++i) {
335 const auto& b = std::get<0>(buf[i]);
336 const auto iv = b.smallEnd();
337
339
340 for (int d = 0; d < SpaceDim; ++d) {
341 c[d] = static_cast<uint32_t>(iv[d]);
342 }
343
345 }
346
347 // 3) Sort by hilbert key.
348 std::sort(buf.begin(), buf.end(), [](const Bundle& a, const Bundle& b) noexcept {
349 return std::get<2>(a) < std::get<2>(b);
350 });
351
352 // Unpack into original containers.
353 for (std::size_t i = 0; i < n; ++i) {
356 }
357}
358
359template <int DIM>
362{
363 uint64_t code = 0;
364
365 // If this fails then we can't compute a Morton code using a 64-bit integer. We must either scale back the boxes
366 // by their blocking factors, or switch to 128/256 bit integers.
367 const int maxDim = 1 << 21;
368
369 for (int dir = 0; dir < SpaceDim; dir++) {
370 if (a_coords[dir] > maxDim) {
371 MayDay::Abort("LoadBalancing::mortonIndex - logic bust");
372 }
373 }
374
375 for (int bit = 20; bit >= 0; --bit) {
376 for (int dir = CH_SPACEDIM - 1; dir >= 0; --dir) {
377 const uint64_t b = (static_cast<uint64_t>(static_cast<uint32_t>(a_coords[dir])) >> bit) & 1ULL;
378
379 code = (code << 1) | b;
380 }
381 }
382
383 return code;
384}
385
386template <int DIM>
389{
390 CH_TIME("LoadBalancing::hilbertIndex");
391
392 constexpr int nbits = 21;
393
394 uint32_t x[DIM];
395
396 for (int i = 0; i < DIM; ++i) {
397 x[i] = a_coords[i];
398 }
399
400 const uint32_t M = 1U << (nbits - 1);
401
402 for (uint32_t Q = M; Q > 1; Q >>= 1) {
403 const uint32_t P = Q - 1;
404
405 for (int i = 0; i < DIM; ++i) {
406 if (x[i] & Q) {
407 x[0] ^= P;
408 }
409 else {
410 const uint32_t t = (x[0] ^ x[i]) & P;
411
412 x[0] ^= t;
413 x[i] ^= t;
414 }
415 }
416 }
417
418 for (int i = 1; i < DIM; ++i) {
419 x[i] ^= x[i - 1];
420 }
421
422 uint32_t t = 0;
423 for (uint32_t Q = M; Q > 1; Q >>= 1) {
424 if (x[DIM - 1] & Q) {
425 t ^= (Q - 1);
426 }
427 }
428
429 for (int i = 0; i < DIM; ++i) {
430 x[i] ^= t;
431 }
432
433 uint64_t idx = 0;
434
435 for (int b = nbits - 1; b >= 0; --b) {
436 for (int i = 0; i < DIM; ++i) {
437 idx = (idx << 1) | ((x[i] >> b) & 1U);
438 }
439 }
440
441 return idx;
442}
443
444#include <CD_NamespaceFooter.H>
445
446#endif
BoxSorting
Enum for sorting boxes.
Definition CD_BoxSorting.H:22
Declaration of a static class for various load balancing operations.
static uint64_t mortonIndex(const std::array< uint32_t, DIM > a_coords) noexcept
Helper function for computing a Morton code using 21 bits per direction.
Definition CD_LoadBalancingImplem.H:361
static void makeBalance(Vector< int > &a_ranks, const Vector< T > &a_loads, const Vector< Box > &a_boxes)
Load balancing, assigning ranks to boxes.
Definition CD_LoadBalancingImplem.H:36
static uint64_t hilbertIndex(const std::array< uint32_t, DIM > &a_coords)
Helper function for computing a Hilbert index.
Definition CD_LoadBalancingImplem.H:388
static void standardSort(Vector< Box > &a_boxes, Vector< T > &a_loads)
Standard box sorting, calls C++ std::sort.
Definition CD_LoadBalancingImplem.H:275
static void sort(Vector< Vector< Box > > &a_boxes, Vector< Vector< T > > &a_loads, const BoxSorting a_whichSorting)
Sorts boxes and loads over a hierarchy according to some sorting criterion.
Definition CD_LoadBalancingImplem.H:226
static void unpackPairs(Vector< Box > &a_boxes, Vector< T > &a_loads, const std::vector< std::pair< Box, T > > &a_pairs)
Splits vector pair into separate boxes and loads.
Definition CD_LoadBalancingImplem.H:210
static void sortSFC(Vector< Box > &a_boxes, Vector< T > &a_loads, const std::function< uint64_t(const std::array< uint32_t, DIM >)> &a_sfcEncoder) noexcept
Generic SFC sorting function.
Definition CD_LoadBalancingImplem.H:313
static std::vector< std::pair< Box, T > > packPairs(const Vector< Box > &a_boxes, const Vector< T > &a_loads)
Utility function which packs boxes and loads into a vector of pairs.
Definition CD_LoadBalancingImplem.H:196
static void shuffleSort(Vector< Box > &a_boxes, Vector< T > &a_loads)
Randomly shuffles boxes and loads.
Definition CD_LoadBalancingImplem.H:291
Class for holding computational loads.
Definition CD_Loads.H:31
Base class for a tracer particle solver. This solver can advance particles in a pre-defined velocity ...
Definition CD_TracerParticleSolver.H:38
TracerParticleSolver()
Default constructor.
Definition CD_TracerParticleSolverImplem.H:26