chombo-discharge
Loading...
Searching...
No Matches
CD_LoadBalancingImplem.H
Go to the documentation of this file.
1/* chombo-discharge
2 * Copyright © 2021 SINTEF Energy Research.
3 * Please refer to Copyright.txt and LICENSE in the chombo-discharge root directory.
4 */
5
12#ifndef CD_LoadBalanceImplem_H
13#define CD_LoadBalanceImplem_H
14
15// Std includes
16#include <algorithm>
17#include <random>
18#include <chrono>
19#include <limits>
20#include <array>
21#include <tuple>
22
23// Chombo includes
24#include <LoadBalance.H>
25#include <BoxLayout.H>
26#include <ParmParse.H>
27#include <CH_Timer.H>
28
29// Our includes
30#include <CD_LoadBalancing.H>
31#include <CD_NamespaceHeader.H>
32
33template <class T>
34void
36{
37 CH_TIME("LoadBalancing::makeBalance");
38
39 // LoadBalance(a_ranks, a_loads, a_boxes);
41 LoadBalancing::makeBalance<T>(a_ranks, rankLoads, a_loads, a_boxes);
42}
43
44template <class T>
45void
48 const Vector<T>& a_boxLoads,
49 const Vector<Box>& a_boxes)
50{
51 CH_TIME("LoadBalancing::makeBalance");
52
53 // Convert everything to floating points
55 for (int i = 0; i < a_boxLoads.size(); i++) {
56 boxLoads.push_back(1.0 * a_boxLoads[i]);
57 }
58
59 // Minimum number of grid subsets is the number of boxe, and the maximum number of grid subsets
60 // is the number of ranks.
61 const int numBoxes = a_boxes.size();
62 const int numRanks = numProc();
63 const int numSubsets = std::min(numBoxes, numRanks);
64
65 a_ranks.resize(numBoxes);
66
67 if (numSubsets > 0) {
68
69 // Figure out the total and target load (load per subset) on this level.
70 Real totalLoad = 0.0;
71 for (int ibox = 0; ibox < numBoxes; ibox++) {
73 }
74
76
77 // Build the grid subsets. When we do this we iterate through the boxes and try to ensure that we partition
78 // the subsets such that the subsetLoad is as close to the dynamic targetLoad as possible.
79 //
80 // The pair contains the starting index for the subset and the computational load for the subset.
83
85
86 int firstSubsetBox = 0;
87
89
90 for (int curSubset = 0; curSubset < numSubsets; curSubset++) {
91
92 // The firstSubsetBox is the index for the first box in this subset (always assigned).
94
96
97 const int subsetsLeft = numSubsets - (curSubset + 1);
98 const int boxesLeft = numBoxes - (firstSubsetBox + 1);
99
100 if (boxesLeft > subsetsLeft) {
101 for (int ibox = firstSubsetBox + 1; ibox < numBoxes; ibox++) {
102
103 // Hook for catching case when we add too many boxes to this subset. Each remaining subset must have at least one box.
104 if (numBoxes - lastSubsetBox - 1 <= subsetsLeft) {
105 break;
106 }
107
108 // Check if we should add this box - we do this by making sure that the dynamically moving target load stays as close
109 // to the static load as possible.
110 //
111 // In the below, '1' is the load without ibox, and '2' is the load with ibox
112 const Real load1 = subsetLoad;
113 const Real load2 = subsetLoad + boxLoads[ibox];
114
115 // Check if we should add this box.
116 bool addBoxToSubset = false;
117
118 if (boxLoads[ibox] <= std::numeric_limits<Real>::epsilon()) {
119 addBoxToSubset = true;
120 }
121 else if (load1 > staticTargetLoad) {
122 addBoxToSubset = false;
123 }
124 else if (load2 <= staticTargetLoad) {
125 addBoxToSubset = true;
126 }
128 // Compute the new average load if we add or don't add this box to the current subset. Accept the answer
129 // that leads to a smallest deviation from the static target load.
132
134 addBoxToSubset = true;
135 }
136 }
137
138 // Add box or break out of box iteration.
139 if (addBoxToSubset) {
142
143 continue;
144 }
145 else {
146 lastSubsetBox = ibox - 1;
147
148 break;
149 }
150 }
151 }
152
153 // Create the subset
155
156 // Update the remaining load.
159
160 // Update start box for next iteration.
162 }
163
164 // Sort the subsets from largest to smallest computational load.
165 std::sort(subsets.begin(), subsets.end(), [](const Subset& A, const Subset& B) -> bool {
166 return A.second > B.second;
167 });
168
169 // Get the accumulated loads per rank, sorted by lowest-to-highest load.
171
172 // Assign the most expensive grid subset to the rank with the lowest accumulated load.
173 for (int i = 0; i < subsets.size(); i++) {
174 const int startIndex = subsets[i].first.first;
175 const int endIndex = subsets[i].first.second;
176 const Real subsetLoad = subsets[i].second;
177 const int rank = sortedRankLoads[i].first;
178
179 // Assign to rank
180 for (int ibox = startIndex; ibox <= endIndex; ibox++) {
181 a_ranks[ibox] = rank;
182 }
183
184 // Update load on this rank.
185 a_rankLoads.incrementLoad(rank, subsetLoad);
186 }
187 }
188 else {
189 a_ranks.resize(0);
190 }
191}
192
193template <class T>
196{
197 CH_TIME("LoadBalancing::packPairs");
198
200 for (int i = 0; i < a_boxes.size(); i++) {
201 vec.emplace_back(a_boxes[i], a_loads[i]);
202 }
203
204 return vec;
205}
206
207template <class T>
208void
210{
211 CH_TIME("LoadBalancing::unpackPairs");
212
213 // Reconstruct boxes and loads
214 a_boxes.resize(0);
215 a_loads.resize(0);
216
217 for (const auto& v : a_pairs) {
218 a_boxes.push_back(v.first);
219 a_loads.push_back(v.second);
220 }
221}
222
223template <typename T>
224void
226{
227 CH_TIME("LoadBalancing::sort");
228
229 for (int lvl = 0; lvl < a_boxes.size(); lvl++) {
231 }
232}
233
234template <typename T>
235void
237{
238 CH_TIME("LoadBalancing::sort");
239
240 switch (a_which) {
241 case BoxSorting::None: {
242 break;
243 }
244 case BoxSorting::Std: {
246
247 break;
248 }
249 case BoxSorting::Shuffle: {
251
252 break;
253 }
254 case BoxSorting::Morton: {
255 LoadBalancing::sortSFC<T, SpaceDim>(a_boxes, a_loads, LoadBalancing::mortonIndex<SpaceDim>);
256
257 break;
258 }
259 case BoxSorting::Hilbert: {
260 LoadBalancing::sortSFC<T, SpaceDim>(a_boxes, a_loads, LoadBalancing::hilbertIndex<SpaceDim>);
261
262 break;
263 }
264 default: {
265 MayDay::Abort("LoadBalancing::sort_boxes - unknown algorithm requested");
266
267 break;
268 }
269 }
270}
271
272template <class T>
273void
275{
276 CH_TIME("LoadBalancing::standardSort");
277
279
280 // Call std::sort, using box1 < box2 lambda as sorting criterion.
281 std::sort(std::begin(vec), std::end(vec), [](const std::pair<Box, T>& v1, const std::pair<Box, T>& v2) {
282 return v1.first < v2.first;
283 });
284
286}
287
288template <class T>
289void
291{
292 CH_TIME("LoadBalancing::shuffleSort");
293
294 auto vec = packPairs(a_boxes, a_loads);
295
296 // Set up RNG
297 int seed = std::chrono::system_clock::now().time_since_epoch().count();
298#ifdef CH_MPI // Broadcast
299 MPI_Bcast(&seed, 1, MPI_INT, 0, Chombo_MPI::comm);
300#endif
301
302 // Shuffle vector
304 std::shuffle(vec.begin(), vec.end(), e);
305
306 // Split boxes and loads
308}
309
310template <class T, int DIM>
311void
315
316{
317 CH_TIME("LoadBalancing::sortSFC");
318
320
321 const int n = a_boxes.size();
322 if (n == 0 || n != a_loads.size()) {
323 return;
324 }
325
326 // Pack data into a vector of tuples. Tuple contains (box, load, key).
328 for (std::size_t i = 0; i < n; ++i) {
329 buf.emplace_back(std::move(a_boxes[i]), std::move(a_loads[i]), uint64_t{0});
330 }
331
332 // Compute Hilbert keys
333 for (std::size_t i = 0; i < n; ++i) {
334 const auto& b = std::get<0>(buf[i]);
335 const auto iv = b.smallEnd();
336
338
339 for (int d = 0; d < SpaceDim; ++d) {
340 c[d] = static_cast<uint32_t>(iv[d]);
341 }
342
344 }
345
346 // 3) Sort by hilbert key.
347 std::sort(buf.begin(), buf.end(), [](const Bundle& a, const Bundle& b) noexcept {
348 return std::get<2>(a) < std::get<2>(b);
349 });
350
351 // Unpack into original containers.
352 for (std::size_t i = 0; i < n; ++i) {
355 }
356}
357
358template <int DIM>
361{
362 uint64_t code = 0;
363
364 // If this fails then we can't compute a Morton code using a 64-bit integer. We must either scale back the boxes
365 // by their blocking factors, or switch to 128/256 bit integers.
366 const int maxDim = std::pow(2, 21);
367
368 for (int dir = 0; dir < SpaceDim; dir++) {
369 if (a_coords[dir] > maxDim) {
370 MayDay::Abort("LoadBalancing::mortonIndex - logic bust");
371 }
372 }
373
374 for (int bit = 20; bit >= 0; --bit) {
375 for (int dir = CH_SPACEDIM - 1; dir >= 0; --dir) {
376 const uint64_t b = (static_cast<uint64_t>(static_cast<uint32_t>(a_coords[dir])) >> bit) & 1ull;
377
378 code = (code << 1) | b;
379 }
380 }
381
382 return code;
383}
384
385template <int DIM>
388{
389 CH_TIME("LoadBalancing::hilbertIndex");
390
391 constexpr int nbits = 21;
392
393 uint32_t x[DIM];
394
395 for (int i = 0; i < DIM; ++i) {
396 x[i] = a_coords[i];
397 }
398
399 const uint32_t M = 1u << (nbits - 1);
400
401 for (uint32_t Q = M; Q > 1; Q >>= 1) {
402 const uint32_t P = Q - 1;
403
404 for (int i = 0; i < DIM; ++i) {
405 if (x[i] & Q) {
406 x[0] ^= P;
407 }
408 else {
409 const uint32_t t = (x[0] ^ x[i]) & P;
410
411 x[0] ^= t;
412 x[i] ^= t;
413 }
414 }
415 }
416
417 for (int i = 1; i < DIM; ++i) {
418 x[i] ^= x[i - 1];
419 }
420
421 uint32_t t = 0;
422 for (uint32_t Q = M; Q > 1; Q >>= 1) {
423 if (x[DIM - 1] & Q) {
424 t ^= (Q - 1);
425 }
426 }
427
428 for (int i = 0; i < DIM; ++i) {
429 x[i] ^= t;
430 }
431
432 uint64_t idx = 0;
433
434 for (int b = nbits - 1; b >= 0; --b) {
435 for (int i = 0; i < DIM; ++i) {
436 idx = (idx << 1) | ((x[i] >> b) & 1u);
437 }
438 }
439
440 return idx;
441}
442
443#include <CD_NamespaceFooter.H>
444
445#endif
BoxSorting
Enum for sorting boxes.
Definition CD_BoxSorting.H:21
Declaration of a static class for various load balancing operations.
static uint64_t mortonIndex(const std::array< uint32_t, DIM > a_coords) noexcept
Helper function for computing a Morton code using 21 bits per direction.
Definition CD_LoadBalancingImplem.H:360
static void makeBalance(Vector< int > &a_ranks, const Vector< T > &a_loads, const Vector< Box > &a_boxes)
Load balancing, assigning ranks to boxes.
Definition CD_LoadBalancingImplem.H:35
static uint64_t hilbertIndex(const std::array< uint32_t, DIM > &a_coords)
Helper function for computing a Hilbert index.
Definition CD_LoadBalancingImplem.H:387
static void standardSort(Vector< Box > &a_boxes, Vector< T > &a_loads)
Standard box sorting, calls C++ std::sort.
Definition CD_LoadBalancingImplem.H:274
static void sort(Vector< Vector< Box > > &a_boxes, Vector< Vector< T > > &a_loads, const BoxSorting a_whichSorting)
Sorts boxes and loads over a hierarchy according to some sorting criterion.
Definition CD_LoadBalancingImplem.H:225
static void unpackPairs(Vector< Box > &a_boxes, Vector< T > &a_loads, const std::vector< std::pair< Box, T > > &a_pairs)
Splits vector pair into separate boxes and loads.
Definition CD_LoadBalancingImplem.H:209
static void sortSFC(Vector< Box > &a_boxes, Vector< T > &a_loads, const std::function< uint64_t(const std::array< uint32_t, DIM >)> &a_sfcEncoder) noexcept
Generic SFC sorting function.
Definition CD_LoadBalancingImplem.H:312
static std::vector< std::pair< Box, T > > packPairs(const Vector< Box > &a_boxes, const Vector< T > &a_loads)
Utility function which packs boxes and loads into a vector of pairs.
Definition CD_LoadBalancingImplem.H:195
static void shuffleSort(Vector< Box > &a_boxes, Vector< T > &a_loads)
Randomly shuffles boxes and loads.
Definition CD_LoadBalancingImplem.H:290
Class for holding computational loads.
Definition CD_Loads.H:30
Base class for a tracer particle solver. This solver can advance particles in a pre-defined velocity ...
Definition CD_TracerParticleSolver.H:37
TracerParticleSolver()
Default constructor.
Definition CD_TracerParticleSolverImplem.H:25