LLVM OpenMP* Runtime Library
kmp_dispatch_hier.h
1 /*
2  * kmp_dispatch_hier.h -- hierarchical scheduling methods and data structures
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_DISPATCH_HIER_H
14 #define KMP_DISPATCH_HIER_H
15 #include "kmp.h"
16 #include "kmp_dispatch.h"
17 
18 // Layer type for scheduling hierarchy
19 enum kmp_hier_layer_e {
20  LAYER_THREAD = -1,
21  LAYER_L1,
22  LAYER_L2,
23  LAYER_L3,
24  LAYER_NUMA,
25  LAYER_LOOP,
26  LAYER_LAST
27 };
28 
29 // Convert hierarchy type (LAYER_L1, LAYER_L2, etc.) to C-style string
30 static inline const char *__kmp_get_hier_str(kmp_hier_layer_e type) {
31  switch (type) {
32  case kmp_hier_layer_e::LAYER_THREAD:
33  return "THREAD";
34  case kmp_hier_layer_e::LAYER_L1:
35  return "L1";
36  case kmp_hier_layer_e::LAYER_L2:
37  return "L2";
38  case kmp_hier_layer_e::LAYER_L3:
39  return "L3";
40  case kmp_hier_layer_e::LAYER_NUMA:
41  return "NUMA";
42  case kmp_hier_layer_e::LAYER_LOOP:
43  return "WHOLE_LOOP";
44  case kmp_hier_layer_e::LAYER_LAST:
45  return "LAST";
46  }
47  KMP_ASSERT(0);
48  // Appease compilers, should never get here
49  return "ERROR";
50 }
51 
52 // Structure to store values parsed from OMP_SCHEDULE for scheduling hierarchy
53 typedef struct kmp_hier_sched_env_t {
54  int size;
55  int capacity;
56  enum sched_type *scheds;
57  kmp_int32 *small_chunks;
58  kmp_int64 *large_chunks;
59  kmp_hier_layer_e *layers;
60  // Append a level of the hierarchy
61  void append(enum sched_type sched, kmp_int32 chunk, kmp_hier_layer_e layer) {
62  if (capacity == 0) {
63  scheds = (enum sched_type *)__kmp_allocate(sizeof(enum sched_type) *
64  kmp_hier_layer_e::LAYER_LAST);
65  small_chunks = (kmp_int32 *)__kmp_allocate(sizeof(kmp_int32) *
66  kmp_hier_layer_e::LAYER_LAST);
67  large_chunks = (kmp_int64 *)__kmp_allocate(sizeof(kmp_int64) *
68  kmp_hier_layer_e::LAYER_LAST);
69  layers = (kmp_hier_layer_e *)__kmp_allocate(sizeof(kmp_hier_layer_e) *
70  kmp_hier_layer_e::LAYER_LAST);
71  capacity = kmp_hier_layer_e::LAYER_LAST;
72  }
73  int current_size = size;
74  KMP_DEBUG_ASSERT(current_size < kmp_hier_layer_e::LAYER_LAST);
75  scheds[current_size] = sched;
76  layers[current_size] = layer;
77  small_chunks[current_size] = chunk;
78  large_chunks[current_size] = (kmp_int64)chunk;
79  size++;
80  }
81  // Sort the hierarchy using selection sort, size will always be small
82  // (less than LAYER_LAST) so it is not necessary to use an nlog(n) algorithm
83  void sort() {
84  if (size <= 1)
85  return;
86  for (int i = 0; i < size; ++i) {
87  int switch_index = i;
88  for (int j = i + 1; j < size; ++j) {
89  if (layers[j] < layers[switch_index])
90  switch_index = j;
91  }
92  if (switch_index != i) {
93  kmp_hier_layer_e temp1 = layers[i];
94  enum sched_type temp2 = scheds[i];
95  kmp_int32 temp3 = small_chunks[i];
96  kmp_int64 temp4 = large_chunks[i];
97  layers[i] = layers[switch_index];
98  scheds[i] = scheds[switch_index];
99  small_chunks[i] = small_chunks[switch_index];
100  large_chunks[i] = large_chunks[switch_index];
101  layers[switch_index] = temp1;
102  scheds[switch_index] = temp2;
103  small_chunks[switch_index] = temp3;
104  large_chunks[switch_index] = temp4;
105  }
106  }
107  }
108  // Free all memory
109  void deallocate() {
110  if (capacity > 0) {
111  __kmp_free(scheds);
112  __kmp_free(layers);
113  __kmp_free(small_chunks);
114  __kmp_free(large_chunks);
115  scheds = NULL;
116  layers = NULL;
117  small_chunks = NULL;
118  large_chunks = NULL;
119  }
120  size = 0;
121  capacity = 0;
122  }
123 } kmp_hier_sched_env_t;
124 
125 extern int __kmp_dispatch_hand_threading;
126 extern kmp_hier_sched_env_t __kmp_hier_scheds;
127 
128 // Sizes of layer arrays bounded by max number of detected L1s, L2s, etc.
129 extern int __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LAST + 1];
130 extern int __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LAST + 1];
131 
132 extern int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type);
133 extern int __kmp_dispatch_get_id(int gtid, kmp_hier_layer_e type);
134 extern int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1,
135  kmp_hier_layer_e t2);
136 extern void __kmp_dispatch_free_hierarchies(kmp_team_t *team);
137 
138 template <typename T> struct kmp_hier_shared_bdata_t {
139  typedef typename traits_t<T>::signed_t ST;
140  volatile kmp_uint64 val[2];
141  kmp_int32 status[2];
142  T lb[2];
143  T ub[2];
144  ST st[2];
145  dispatch_shared_info_template<T> sh[2];
146  void zero() {
147  val[0] = val[1] = 0;
148  status[0] = status[1] = 0;
149  lb[0] = lb[1] = 0;
150  ub[0] = ub[1] = 0;
151  st[0] = st[1] = 0;
152  sh[0].u.s.iteration = sh[1].u.s.iteration = 0;
153  }
154  void set_next_hand_thread(T nlb, T nub, ST nst, kmp_int32 nstatus,
155  kmp_uint64 index) {
156  lb[1 - index] = nlb;
157  ub[1 - index] = nub;
158  st[1 - index] = nst;
159  status[1 - index] = nstatus;
160  }
161  void set_next(T nlb, T nub, ST nst, kmp_int32 nstatus, kmp_uint64 index) {
162  lb[1 - index] = nlb;
163  ub[1 - index] = nub;
164  st[1 - index] = nst;
165  status[1 - index] = nstatus;
166  sh[1 - index].u.s.iteration = 0;
167  }
168 
169  kmp_int32 get_next_status(kmp_uint64 index) const {
170  return status[1 - index];
171  }
172  T get_next_lb(kmp_uint64 index) const { return lb[1 - index]; }
173  T get_next_ub(kmp_uint64 index) const { return ub[1 - index]; }
174  ST get_next_st(kmp_uint64 index) const { return st[1 - index]; }
175  dispatch_shared_info_template<T> volatile *get_next_sh(kmp_uint64 index) {
176  return &(sh[1 - index]);
177  }
178 
179  kmp_int32 get_curr_status(kmp_uint64 index) const { return status[index]; }
180  T get_curr_lb(kmp_uint64 index) const { return lb[index]; }
181  T get_curr_ub(kmp_uint64 index) const { return ub[index]; }
182  ST get_curr_st(kmp_uint64 index) const { return st[index]; }
183  dispatch_shared_info_template<T> volatile *get_curr_sh(kmp_uint64 index) {
184  return &(sh[index]);
185  }
186 };
187 
188 /*
189  * In the barrier implementations, num_active is the number of threads that are
190  * attached to the kmp_hier_top_unit_t structure in the scheduling hierarchy.
191  * bdata is the shared barrier data that resides on the kmp_hier_top_unit_t
192  * structure. tdata is the thread private data that resides on the thread
193  * data structure.
194  *
195  * The reset_shared() method is used to initialize the barrier data on the
196  * kmp_hier_top_unit_t hierarchy structure
197  *
198  * The reset_private() method is used to initialize the barrier data on the
199  * thread's private dispatch buffer structure
200  *
201  * The barrier() method takes an id, which is that thread's id for the
202  * kmp_hier_top_unit_t structure, and implements the barrier. All threads wait
203  * inside barrier() until all fellow threads who are attached to that
204  * kmp_hier_top_unit_t structure have arrived.
205  */
206 
207 // Core barrier implementation
208 // Can be used in a unit with between 2 to 8 threads
209 template <typename T> class core_barrier_impl {
210  static inline kmp_uint64 get_wait_val(int num_active) {
211  kmp_uint64 wait_val = 0LL;
212  switch (num_active) {
213  case 2:
214  wait_val = 0x0101LL;
215  break;
216  case 3:
217  wait_val = 0x010101LL;
218  break;
219  case 4:
220  wait_val = 0x01010101LL;
221  break;
222  case 5:
223  wait_val = 0x0101010101LL;
224  break;
225  case 6:
226  wait_val = 0x010101010101LL;
227  break;
228  case 7:
229  wait_val = 0x01010101010101LL;
230  break;
231  case 8:
232  wait_val = 0x0101010101010101LL;
233  break;
234  default:
235  // don't use the core_barrier_impl for more than 8 threads
236  KMP_ASSERT(0);
237  }
238  return wait_val;
239  }
240 
241 public:
242  static void reset_private(kmp_int32 num_active,
243  kmp_hier_private_bdata_t *tdata);
244  static void reset_shared(kmp_int32 num_active,
245  kmp_hier_shared_bdata_t<T> *bdata);
246  static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t<T> *bdata,
247  kmp_hier_private_bdata_t *tdata);
248 };
249 
250 template <typename T>
251 void core_barrier_impl<T>::reset_private(kmp_int32 num_active,
252  kmp_hier_private_bdata_t *tdata) {
253  tdata->num_active = num_active;
254  tdata->index = 0;
255  tdata->wait_val[0] = tdata->wait_val[1] = get_wait_val(num_active);
256 }
257 template <typename T>
258 void core_barrier_impl<T>::reset_shared(kmp_int32 num_active,
259  kmp_hier_shared_bdata_t<T> *bdata) {
260  bdata->val[0] = bdata->val[1] = 0LL;
261  bdata->status[0] = bdata->status[1] = 0LL;
262 }
263 template <typename T>
264 void core_barrier_impl<T>::barrier(kmp_int32 id,
265  kmp_hier_shared_bdata_t<T> *bdata,
266  kmp_hier_private_bdata_t *tdata) {
267  kmp_uint64 current_index = tdata->index;
268  kmp_uint64 next_index = 1 - current_index;
269  kmp_uint64 current_wait_value = tdata->wait_val[current_index];
270  kmp_uint64 next_wait_value =
271  (current_wait_value ? 0 : get_wait_val(tdata->num_active));
272  KD_TRACE(10, ("core_barrier_impl::barrier(): T#%d current_index:%llu "
273  "next_index:%llu curr_wait:%llu next_wait:%llu\n",
274  __kmp_get_gtid(), current_index, next_index, current_wait_value,
275  next_wait_value));
276  char v = (current_wait_value ? '\1' : '\0');
277  (RCAST(volatile char *, &(bdata->val[current_index])))[id] = v;
278  __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
279  __kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
280  tdata->wait_val[current_index] = next_wait_value;
281  tdata->index = next_index;
282 }
283 
284 // Counter barrier implementation
285 // Can be used in a unit with arbitrary number of active threads
286 template <typename T> class counter_barrier_impl {
287 public:
288  static void reset_private(kmp_int32 num_active,
289  kmp_hier_private_bdata_t *tdata);
290  static void reset_shared(kmp_int32 num_active,
291  kmp_hier_shared_bdata_t<T> *bdata);
292  static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t<T> *bdata,
293  kmp_hier_private_bdata_t *tdata);
294 };
295 
296 template <typename T>
297 void counter_barrier_impl<T>::reset_private(kmp_int32 num_active,
298  kmp_hier_private_bdata_t *tdata) {
299  tdata->num_active = num_active;
300  tdata->index = 0;
301  tdata->wait_val[0] = tdata->wait_val[1] = (kmp_uint64)num_active;
302 }
303 template <typename T>
304 void counter_barrier_impl<T>::reset_shared(kmp_int32 num_active,
305  kmp_hier_shared_bdata_t<T> *bdata) {
306  bdata->val[0] = bdata->val[1] = 0LL;
307  bdata->status[0] = bdata->status[1] = 0LL;
308 }
309 template <typename T>
310 void counter_barrier_impl<T>::barrier(kmp_int32 id,
311  kmp_hier_shared_bdata_t<T> *bdata,
312  kmp_hier_private_bdata_t *tdata) {
313  volatile kmp_int64 *val;
314  kmp_uint64 current_index = tdata->index;
315  kmp_uint64 next_index = 1 - current_index;
316  kmp_uint64 current_wait_value = tdata->wait_val[current_index];
317  kmp_uint64 next_wait_value = current_wait_value + tdata->num_active;
318 
319  KD_TRACE(10, ("counter_barrier_impl::barrier(): T#%d current_index:%llu "
320  "next_index:%llu curr_wait:%llu next_wait:%llu\n",
321  __kmp_get_gtid(), current_index, next_index, current_wait_value,
322  next_wait_value));
323  val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index]));
324  KMP_TEST_THEN_INC64(val);
325  __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
326  __kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
327  tdata->wait_val[current_index] = next_wait_value;
328  tdata->index = next_index;
329 }
330 
331 // Data associated with topology unit within a layer
332 // For example, one kmp_hier_top_unit_t corresponds to one L1 cache
333 template <typename T> struct kmp_hier_top_unit_t {
334  typedef typename traits_t<T>::signed_t ST;
335  typedef typename traits_t<T>::unsigned_t UT;
336  kmp_int32 active; // number of topology units that communicate with this unit
337  // chunk information (lower/upper bound, stride, etc.)
338  dispatch_private_info_template<T> hier_pr;
339  kmp_hier_top_unit_t<T> *hier_parent; // pointer to parent unit
340  kmp_hier_shared_bdata_t<T> hier_barrier; // shared barrier data for this unit
341 
342  kmp_int32 get_hier_id() const { return hier_pr.hier_id; }
343  void reset_shared_barrier() {
344  KMP_DEBUG_ASSERT(active > 0);
345  if (active == 1)
346  return;
347  hier_barrier.zero();
348  if (active >= 2 && active <= 8) {
349  core_barrier_impl<T>::reset_shared(active, &hier_barrier);
350  } else {
351  counter_barrier_impl<T>::reset_shared(active, &hier_barrier);
352  }
353  }
354  void reset_private_barrier(kmp_hier_private_bdata_t *tdata) {
355  KMP_DEBUG_ASSERT(tdata);
356  KMP_DEBUG_ASSERT(active > 0);
357  if (active == 1)
358  return;
359  if (active >= 2 && active <= 8) {
360  core_barrier_impl<T>::reset_private(active, tdata);
361  } else {
362  counter_barrier_impl<T>::reset_private(active, tdata);
363  }
364  }
365  void barrier(kmp_int32 id, kmp_hier_private_bdata_t *tdata) {
366  KMP_DEBUG_ASSERT(tdata);
367  KMP_DEBUG_ASSERT(active > 0);
368  KMP_DEBUG_ASSERT(id >= 0 && id < active);
369  if (active == 1) {
370  tdata->index = 1 - tdata->index;
371  return;
372  }
373  if (active >= 2 && active <= 8) {
374  core_barrier_impl<T>::barrier(id, &hier_barrier, tdata);
375  } else {
376  counter_barrier_impl<T>::barrier(id, &hier_barrier, tdata);
377  }
378  }
379 
380  kmp_int32 get_next_status(kmp_uint64 index) const {
381  return hier_barrier.get_next_status(index);
382  }
383  T get_next_lb(kmp_uint64 index) const {
384  return hier_barrier.get_next_lb(index);
385  }
386  T get_next_ub(kmp_uint64 index) const {
387  return hier_barrier.get_next_ub(index);
388  }
389  ST get_next_st(kmp_uint64 index) const {
390  return hier_barrier.get_next_st(index);
391  }
392  dispatch_shared_info_template<T> volatile *get_next_sh(kmp_uint64 index) {
393  return hier_barrier.get_next_sh(index);
394  }
395 
396  kmp_int32 get_curr_status(kmp_uint64 index) const {
397  return hier_barrier.get_curr_status(index);
398  }
399  T get_curr_lb(kmp_uint64 index) const {
400  return hier_barrier.get_curr_lb(index);
401  }
402  T get_curr_ub(kmp_uint64 index) const {
403  return hier_barrier.get_curr_ub(index);
404  }
405  ST get_curr_st(kmp_uint64 index) const {
406  return hier_barrier.get_curr_st(index);
407  }
408  dispatch_shared_info_template<T> volatile *get_curr_sh(kmp_uint64 index) {
409  return hier_barrier.get_curr_sh(index);
410  }
411 
412  void set_next_hand_thread(T lb, T ub, ST st, kmp_int32 status,
413  kmp_uint64 index) {
414  hier_barrier.set_next_hand_thread(lb, ub, st, status, index);
415  }
416  void set_next(T lb, T ub, ST st, kmp_int32 status, kmp_uint64 index) {
417  hier_barrier.set_next(lb, ub, st, status, index);
418  }
419  dispatch_private_info_template<T> *get_my_pr() { return &hier_pr; }
420  kmp_hier_top_unit_t<T> *get_parent() { return hier_parent; }
421  dispatch_private_info_template<T> *get_parent_pr() {
422  return &(hier_parent->hier_pr);
423  }
424 
425  kmp_int32 is_active() const { return active; }
426  kmp_int32 get_num_active() const { return active; }
427 #ifdef KMP_DEBUG
428  void print() {
429  KD_TRACE(
430  10,
431  (" kmp_hier_top_unit_t: active:%d pr:%p lb:%d ub:%d st:%d tc:%d\n",
432  active, &hier_pr, hier_pr.u.p.lb, hier_pr.u.p.ub, hier_pr.u.p.st,
433  hier_pr.u.p.tc));
434  }
435 #endif
436 };
437 
438 // Information regarding a single layer within the scheduling hierarchy
439 template <typename T> struct kmp_hier_layer_info_t {
440  int num_active; // number of threads active in this level
441  kmp_hier_layer_e type; // LAYER_L1, LAYER_L2, etc.
442  enum sched_type sched; // static, dynamic, guided, etc.
443  typename traits_t<T>::signed_t chunk; // chunk size associated with schedule
444  int length; // length of the kmp_hier_top_unit_t array
445 
446 #ifdef KMP_DEBUG
447  // Print this layer's information
448  void print() {
449  const char *t = __kmp_get_hier_str(type);
450  KD_TRACE(
451  10,
452  (" kmp_hier_layer_info_t: num_active:%d type:%s sched:%d chunk:%d "
453  "length:%d\n",
454  num_active, t, sched, chunk, length));
455  }
456 #endif
457 };
458 
459 /*
460  * Structure to implement entire hierarchy
461  *
462  * The hierarchy is kept as an array of arrays to represent the different
463  * layers. Layer 0 is the lowest layer to layer num_layers - 1 which is the
464  * highest layer.
465  * Example:
466  * [ 2 ] -> [ L3 | L3 ]
467  * [ 1 ] -> [ L2 | L2 | L2 | L2 ]
468  * [ 0 ] -> [ L1 | L1 | L1 | L1 | L1 | L1 | L1 | L1 ]
469  * There is also an array of layer_info_t which has information regarding
470  * each layer
471  */
472 template <typename T> struct kmp_hier_t {
473 public:
474  typedef typename traits_t<T>::unsigned_t UT;
475  typedef typename traits_t<T>::signed_t ST;
476 
477 private:
478  int next_recurse(ident_t *loc, int gtid, kmp_hier_top_unit_t<T> *current,
479  kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st,
480  kmp_int32 previous_id, int hier_level) {
481  int status;
482  kmp_info_t *th = __kmp_threads[gtid];
483  auto parent = current->get_parent();
484  bool last_layer = (hier_level == get_num_layers() - 1);
485  KMP_DEBUG_ASSERT(th);
486  kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[hier_level]);
487  KMP_DEBUG_ASSERT(current);
488  KMP_DEBUG_ASSERT(hier_level >= 0);
489  KMP_DEBUG_ASSERT(hier_level < get_num_layers());
490  KMP_DEBUG_ASSERT(tdata);
491  KMP_DEBUG_ASSERT(parent || last_layer);
492 
493  KD_TRACE(
494  1, ("kmp_hier_t.next_recurse(): T#%d (%d) called\n", gtid, hier_level));
495 
496  T hier_id = (T)current->get_hier_id();
497  // Attempt to grab next iteration range for this level
498  if (previous_id == 0) {
499  KD_TRACE(1, ("kmp_hier_t.next_recurse(): T#%d (%d) is primary of unit\n",
500  gtid, hier_level));
501  kmp_int32 contains_last;
502  T my_lb, my_ub;
503  ST my_st;
504  T nproc;
505  dispatch_shared_info_template<T> volatile *my_sh;
506  dispatch_private_info_template<T> *my_pr;
507  if (last_layer) {
508  // last layer below the very top uses the single shared buffer
509  // from the team struct.
510  KD_TRACE(10,
511  ("kmp_hier_t.next_recurse(): T#%d (%d) using top level sh\n",
512  gtid, hier_level));
513  my_sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
514  th->th.th_dispatch->th_dispatch_sh_current);
515  nproc = (T)get_top_level_nproc();
516  } else {
517  // middle layers use the shared buffer inside the kmp_hier_top_unit_t
518  // structure
519  KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) using hier sh\n",
520  gtid, hier_level));
521  my_sh =
522  parent->get_curr_sh(th->th.th_hier_bar_data[hier_level + 1].index);
523  nproc = (T)parent->get_num_active();
524  }
525  my_pr = current->get_my_pr();
526  KMP_DEBUG_ASSERT(my_sh);
527  KMP_DEBUG_ASSERT(my_pr);
528  enum sched_type schedule = get_sched(hier_level);
529  ST chunk = (ST)get_chunk(hier_level);
530  status = __kmp_dispatch_next_algorithm<T>(gtid, my_pr, my_sh,
531  &contains_last, &my_lb, &my_ub,
532  &my_st, nproc, hier_id);
533  KD_TRACE(
534  10,
535  ("kmp_hier_t.next_recurse(): T#%d (%d) next_pr_sh() returned %d\n",
536  gtid, hier_level, status));
537  // When no iterations are found (status == 0) and this is not the last
538  // layer, attempt to go up the hierarchy for more iterations
539  if (status == 0 && !last_layer) {
540  kmp_int32 hid;
541  __kmp_type_convert(hier_id, &hid);
542  status = next_recurse(loc, gtid, parent, &contains_last, &my_lb, &my_ub,
543  &my_st, hid, hier_level + 1);
544  KD_TRACE(
545  10,
546  ("kmp_hier_t.next_recurse(): T#%d (%d) hier_next() returned %d\n",
547  gtid, hier_level, status));
548  if (status == 1) {
549  kmp_hier_private_bdata_t *upper_tdata =
550  &(th->th.th_hier_bar_data[hier_level + 1]);
551  my_sh = parent->get_curr_sh(upper_tdata->index);
552  KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) about to init\n",
553  gtid, hier_level));
554  __kmp_dispatch_init_algorithm(loc, gtid, my_pr, schedule,
555  parent->get_curr_lb(upper_tdata->index),
556  parent->get_curr_ub(upper_tdata->index),
557  parent->get_curr_st(upper_tdata->index),
558 #if USE_ITT_BUILD
559  NULL,
560 #endif
561  chunk, nproc, hier_id);
562  status = __kmp_dispatch_next_algorithm<T>(
563  gtid, my_pr, my_sh, &contains_last, &my_lb, &my_ub, &my_st, nproc,
564  hier_id);
565  if (!status) {
566  KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) status not 1 "
567  "setting to 2!\n",
568  gtid, hier_level));
569  status = 2;
570  }
571  }
572  }
573  current->set_next(my_lb, my_ub, my_st, status, tdata->index);
574  // Propagate whether a unit holds the actual global last iteration
575  // The contains_last attribute is sent downwards from the top to the
576  // bottom of the hierarchy via the contains_last flag inside the
577  // private dispatch buffers in the hierarchy's middle layers
578  if (contains_last) {
579  // If the next_algorithm() method returns 1 for p_last and it is the
580  // last layer or our parent contains the last serial chunk, then the
581  // chunk must contain the last serial iteration.
582  if (last_layer || parent->hier_pr.flags.contains_last) {
583  KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) Setting this pr "
584  "to contain last.\n",
585  gtid, hier_level));
586  current->hier_pr.flags.contains_last = contains_last;
587  }
588  if (!current->hier_pr.flags.contains_last)
589  contains_last = FALSE;
590  }
591  if (p_last)
592  *p_last = contains_last;
593  } // if primary thread of this unit
594  if (hier_level > 0 || !__kmp_dispatch_hand_threading) {
595  KD_TRACE(10,
596  ("kmp_hier_t.next_recurse(): T#%d (%d) going into barrier.\n",
597  gtid, hier_level));
598  current->barrier(previous_id, tdata);
599  KD_TRACE(10,
600  ("kmp_hier_t.next_recurse(): T#%d (%d) released and exit %d\n",
601  gtid, hier_level, current->get_curr_status(tdata->index)));
602  } else {
603  KMP_DEBUG_ASSERT(previous_id == 0);
604  return status;
605  }
606  return current->get_curr_status(tdata->index);
607  }
608 
609 public:
610  int top_level_nproc;
611  int num_layers;
612  bool valid;
613  int type_size;
614  kmp_hier_layer_info_t<T> *info;
615  kmp_hier_top_unit_t<T> **layers;
616  // Deallocate all memory from this hierarchy
617  void deallocate() {
618  for (int i = 0; i < num_layers; ++i)
619  if (layers[i] != NULL) {
620  __kmp_free(layers[i]);
621  }
622  if (layers != NULL) {
623  __kmp_free(layers);
624  layers = NULL;
625  }
626  if (info != NULL) {
627  __kmp_free(info);
628  info = NULL;
629  }
630  num_layers = 0;
631  valid = false;
632  }
633  // Returns true if reallocation is needed else false
634  bool need_to_reallocate(int n, const kmp_hier_layer_e *new_layers,
635  const enum sched_type *new_scheds,
636  const ST *new_chunks) const {
637  if (!valid || layers == NULL || info == NULL ||
638  traits_t<T>::type_size != type_size || n != num_layers)
639  return true;
640  for (int i = 0; i < n; ++i) {
641  if (info[i].type != new_layers[i])
642  return true;
643  if (info[i].sched != new_scheds[i])
644  return true;
645  if (info[i].chunk != new_chunks[i])
646  return true;
647  }
648  return false;
649  }
650  // A single thread should call this function while the other threads wait
651  // create a new scheduling hierarchy consisting of new_layers, new_scheds
652  // and new_chunks. These should come pre-sorted according to
653  // kmp_hier_layer_e value. This function will try to avoid reallocation
654  // if it can
655  void allocate_hier(int n, const kmp_hier_layer_e *new_layers,
656  const enum sched_type *new_scheds, const ST *new_chunks) {
657  top_level_nproc = 0;
658  if (!need_to_reallocate(n, new_layers, new_scheds, new_chunks)) {
659  KD_TRACE(
660  10,
661  ("kmp_hier_t<T>::allocate_hier: T#0 do not need to reallocate\n"));
662  for (int i = 0; i < n; ++i) {
663  info[i].num_active = 0;
664  for (int j = 0; j < get_length(i); ++j)
665  layers[i][j].active = 0;
666  }
667  return;
668  }
669  KD_TRACE(10, ("kmp_hier_t<T>::allocate_hier: T#0 full alloc\n"));
670  deallocate();
671  type_size = traits_t<T>::type_size;
672  num_layers = n;
673  info = (kmp_hier_layer_info_t<T> *)__kmp_allocate(
674  sizeof(kmp_hier_layer_info_t<T>) * n);
675  layers = (kmp_hier_top_unit_t<T> **)__kmp_allocate(
676  sizeof(kmp_hier_top_unit_t<T> *) * n);
677  for (int i = 0; i < n; ++i) {
678  int max = 0;
679  kmp_hier_layer_e layer = new_layers[i];
680  info[i].num_active = 0;
681  info[i].type = layer;
682  info[i].sched = new_scheds[i];
683  info[i].chunk = new_chunks[i];
684  max = __kmp_hier_max_units[layer + 1];
685  if (max == 0) {
686  valid = false;
687  KMP_WARNING(HierSchedInvalid, __kmp_get_hier_str(layer));
688  deallocate();
689  return;
690  }
691  info[i].length = max;
692  layers[i] = (kmp_hier_top_unit_t<T> *)__kmp_allocate(
693  sizeof(kmp_hier_top_unit_t<T>) * max);
694  for (int j = 0; j < max; ++j) {
695  layers[i][j].active = 0;
696  layers[i][j].hier_pr.flags.use_hier = TRUE;
697  }
698  }
699  valid = true;
700  }
701  // loc - source file location
702  // gtid - global thread identifier
703  // pr - this thread's private dispatch buffer (corresponding with gtid)
704  // p_last (return value) - pointer to flag indicating this set of iterations
705  // contains last
706  // iteration
707  // p_lb (return value) - lower bound for this chunk of iterations
708  // p_ub (return value) - upper bound for this chunk of iterations
709  // p_st (return value) - stride for this chunk of iterations
710  //
711  // Returns 1 if there are more iterations to perform, 0 otherwise
712  int next(ident_t *loc, int gtid, dispatch_private_info_template<T> *pr,
713  kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st) {
714  int status;
715  kmp_int32 contains_last = 0;
716  kmp_info_t *th = __kmp_threads[gtid];
717  kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[0]);
718  auto parent = pr->get_parent();
719  KMP_DEBUG_ASSERT(parent);
720  KMP_DEBUG_ASSERT(th);
721  KMP_DEBUG_ASSERT(tdata);
722  KMP_DEBUG_ASSERT(parent);
723  T nproc = (T)parent->get_num_active();
724  T unit_id = (T)pr->get_hier_id();
725  KD_TRACE(
726  10,
727  ("kmp_hier_t.next(): T#%d THREAD LEVEL nproc:%d unit_id:%d called\n",
728  gtid, nproc, unit_id));
729  // Handthreading implementation
730  // Each iteration is performed by all threads on last unit (typically
731  // cores/tiles)
732  // e.g., threads 0,1,2,3 all execute iteration 0
733  // threads 0,1,2,3 all execute iteration 1
734  // threads 4,5,6,7 all execute iteration 2
735  // threads 4,5,6,7 all execute iteration 3
736  // ... etc.
737  if (__kmp_dispatch_hand_threading) {
738  KD_TRACE(10,
739  ("kmp_hier_t.next(): T#%d THREAD LEVEL using hand threading\n",
740  gtid));
741  if (unit_id == 0) {
742  // For hand threading, the sh buffer on the lowest level is only ever
743  // modified and read by the primary thread on that level. Because of
744  // this, we can always use the first sh buffer.
745  auto sh = &(parent->hier_barrier.sh[0]);
746  KMP_DEBUG_ASSERT(sh);
747  status = __kmp_dispatch_next_algorithm<T>(
748  gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
749  if (!status) {
750  bool done = false;
751  while (!done) {
752  done = true;
753  kmp_int32 uid;
754  __kmp_type_convert(unit_id, &uid);
755  status = next_recurse(loc, gtid, parent, &contains_last, p_lb, p_ub,
756  p_st, uid, 0);
757  if (status == 1) {
758  __kmp_dispatch_init_algorithm(loc, gtid, pr, pr->schedule,
759  parent->get_next_lb(tdata->index),
760  parent->get_next_ub(tdata->index),
761  parent->get_next_st(tdata->index),
762 #if USE_ITT_BUILD
763  NULL,
764 #endif
765  pr->u.p.parm1, nproc, unit_id);
766  sh->u.s.iteration = 0;
767  status = __kmp_dispatch_next_algorithm<T>(
768  gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc,
769  unit_id);
770  if (!status) {
771  KD_TRACE(10,
772  ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 0 "
773  "after next_pr_sh()"
774  "trying again.\n",
775  gtid));
776  done = false;
777  }
778  } else if (status == 2) {
779  KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 2 "
780  "trying again.\n",
781  gtid));
782  done = false;
783  }
784  }
785  }
786  parent->set_next_hand_thread(*p_lb, *p_ub, *p_st, status, tdata->index);
787  } // if primary thread of lowest unit level
788  parent->barrier(pr->get_hier_id(), tdata);
789  if (unit_id != 0) {
790  *p_lb = parent->get_curr_lb(tdata->index);
791  *p_ub = parent->get_curr_ub(tdata->index);
792  *p_st = parent->get_curr_st(tdata->index);
793  status = parent->get_curr_status(tdata->index);
794  }
795  } else {
796  // Normal implementation
797  // Each thread grabs an iteration chunk and executes it (no cooperation)
798  auto sh = parent->get_curr_sh(tdata->index);
799  KMP_DEBUG_ASSERT(sh);
800  status = __kmp_dispatch_next_algorithm<T>(
801  gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
802  KD_TRACE(10,
803  ("kmp_hier_t.next(): T#%d THREAD LEVEL next_algorithm status:%d "
804  "contains_last:%d p_lb:%d p_ub:%d p_st:%d\n",
805  gtid, status, contains_last, *p_lb, *p_ub, *p_st));
806  if (!status) {
807  bool done = false;
808  while (!done) {
809  done = true;
810  kmp_int32 uid;
811  __kmp_type_convert(unit_id, &uid);
812  status = next_recurse(loc, gtid, parent, &contains_last, p_lb, p_ub,
813  p_st, uid, 0);
814  if (status == 1) {
815  sh = parent->get_curr_sh(tdata->index);
816  __kmp_dispatch_init_algorithm(loc, gtid, pr, pr->schedule,
817  parent->get_curr_lb(tdata->index),
818  parent->get_curr_ub(tdata->index),
819  parent->get_curr_st(tdata->index),
820 #if USE_ITT_BUILD
821  NULL,
822 #endif
823  pr->u.p.parm1, nproc, unit_id);
824  status = __kmp_dispatch_next_algorithm<T>(
825  gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
826  if (!status) {
827  KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 0 "
828  "after next_pr_sh()"
829  "trying again.\n",
830  gtid));
831  done = false;
832  }
833  } else if (status == 2) {
834  KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 2 "
835  "trying again.\n",
836  gtid));
837  done = false;
838  }
839  }
840  }
841  }
842  if (contains_last && !parent->hier_pr.flags.contains_last) {
843  KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL resetting "
844  "contains_last to FALSE\n",
845  gtid));
846  contains_last = FALSE;
847  }
848  if (p_last)
849  *p_last = contains_last;
850  KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL exit status %d\n", gtid,
851  status));
852  return status;
853  }
854  // These functions probe the layer info structure
855  // Returns the type of topology unit given level
856  kmp_hier_layer_e get_type(int level) const {
857  KMP_DEBUG_ASSERT(level >= 0);
858  KMP_DEBUG_ASSERT(level < num_layers);
859  return info[level].type;
860  }
861  // Returns the schedule type at given level
862  enum sched_type get_sched(int level) const {
863  KMP_DEBUG_ASSERT(level >= 0);
864  KMP_DEBUG_ASSERT(level < num_layers);
865  return info[level].sched;
866  }
867  // Returns the chunk size at given level
868  ST get_chunk(int level) const {
869  KMP_DEBUG_ASSERT(level >= 0);
870  KMP_DEBUG_ASSERT(level < num_layers);
871  return info[level].chunk;
872  }
873  // Returns the number of active threads at given level
874  int get_num_active(int level) const {
875  KMP_DEBUG_ASSERT(level >= 0);
876  KMP_DEBUG_ASSERT(level < num_layers);
877  return info[level].num_active;
878  }
879  // Returns the length of topology unit array at given level
880  int get_length(int level) const {
881  KMP_DEBUG_ASSERT(level >= 0);
882  KMP_DEBUG_ASSERT(level < num_layers);
883  return info[level].length;
884  }
885  // Returns the topology unit given the level and index
886  kmp_hier_top_unit_t<T> *get_unit(int level, int index) {
887  KMP_DEBUG_ASSERT(level >= 0);
888  KMP_DEBUG_ASSERT(level < num_layers);
889  KMP_DEBUG_ASSERT(index >= 0);
890  KMP_DEBUG_ASSERT(index < get_length(level));
891  return &(layers[level][index]);
892  }
893  // Returns the number of layers in the hierarchy
894  int get_num_layers() const { return num_layers; }
895  // Returns the number of threads in the top layer
896  // This is necessary because we don't store a topology unit as
897  // the very top level and the scheduling algorithms need this information
898  int get_top_level_nproc() const { return top_level_nproc; }
899  // Return whether this hierarchy is valid or not
900  bool is_valid() const { return valid; }
901 #ifdef KMP_DEBUG
902  // Print the hierarchy
903  void print() {
904  KD_TRACE(10, ("kmp_hier_t:\n"));
905  for (int i = num_layers - 1; i >= 0; --i) {
906  KD_TRACE(10, ("Info[%d] = ", i));
907  info[i].print();
908  }
909  for (int i = num_layers - 1; i >= 0; --i) {
910  KD_TRACE(10, ("Layer[%d] =\n", i));
911  for (int j = 0; j < info[i].length; ++j) {
912  layers[i][j].print();
913  }
914  }
915  }
916 #endif
917 };
918 
919 template <typename T>
920 void __kmp_dispatch_init_hierarchy(ident_t *loc, int n,
921  kmp_hier_layer_e *new_layers,
922  enum sched_type *new_scheds,
923  typename traits_t<T>::signed_t *new_chunks,
924  T lb, T ub,
925  typename traits_t<T>::signed_t st) {
926  int tid, gtid, num_hw_threads, num_threads_per_layer1, active;
927  unsigned int my_buffer_index;
928  kmp_info_t *th;
929  kmp_team_t *team;
930  dispatch_private_info_template<T> *pr;
931  dispatch_shared_info_template<T> volatile *sh;
932  gtid = __kmp_entry_gtid();
933  tid = __kmp_tid_from_gtid(gtid);
934 #ifdef KMP_DEBUG
935  KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d called: %d layer(s)\n",
936  gtid, n));
937  for (int i = 0; i < n; ++i) {
938  const char *layer = __kmp_get_hier_str(new_layers[i]);
939  KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d: new_layers[%d] = %s, "
940  "new_scheds[%d] = %d, new_chunks[%d] = %u\n",
941  gtid, i, layer, i, (int)new_scheds[i], i, new_chunks[i]));
942  }
943 #endif // KMP_DEBUG
944  KMP_DEBUG_ASSERT(n > 0);
945  KMP_DEBUG_ASSERT(new_layers);
946  KMP_DEBUG_ASSERT(new_scheds);
947  KMP_DEBUG_ASSERT(new_chunks);
948  if (!TCR_4(__kmp_init_parallel))
949  __kmp_parallel_initialize();
950  __kmp_resume_if_soft_paused();
951 
952  th = __kmp_threads[gtid];
953  team = th->th.th_team;
954  active = !team->t.t_serialized;
955  th->th.th_ident = loc;
956  num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
957  KMP_DEBUG_ASSERT(th->th.th_dispatch ==
958  &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
959  my_buffer_index = th->th.th_dispatch->th_disp_index;
960  pr = reinterpret_cast<dispatch_private_info_template<T> *>(
961  &th->th.th_dispatch
962  ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
963  sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
964  &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
965  if (!active) {
966  KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d not active parallel. "
967  "Using normal dispatch functions.\n",
968  gtid));
969  KMP_DEBUG_ASSERT(pr);
970  pr->flags.use_hier = FALSE;
971  pr->flags.contains_last = FALSE;
972  return;
973  }
974  KMP_DEBUG_ASSERT(pr);
975  KMP_DEBUG_ASSERT(sh);
976  pr->flags.use_hier = TRUE;
977  pr->u.p.tc = 0;
978  // Have primary thread allocate the hierarchy
979  if (__kmp_tid_from_gtid(gtid) == 0) {
980  KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d pr:%p sh:%p allocating "
981  "hierarchy\n",
982  gtid, pr, sh));
983  if (sh->hier == NULL) {
984  sh->hier = (kmp_hier_t<T> *)__kmp_allocate(sizeof(kmp_hier_t<T>));
985  }
986  sh->hier->allocate_hier(n, new_layers, new_scheds, new_chunks);
987  sh->u.s.iteration = 0;
988  }
989  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
990  // Check to make sure the hierarchy is valid
991  kmp_hier_t<T> *hier = sh->hier;
992  if (!sh->hier->is_valid()) {
993  pr->flags.use_hier = FALSE;
994  return;
995  }
996  // Have threads allocate their thread-private barrier data if it hasn't
997  // already been allocated
998  if (th->th.th_hier_bar_data == NULL) {
999  th->th.th_hier_bar_data = (kmp_hier_private_bdata_t *)__kmp_allocate(
1000  sizeof(kmp_hier_private_bdata_t) * kmp_hier_layer_e::LAYER_LAST);
1001  }
1002  // Have threads "register" themselves by modifying the active count for each
1003  // level they are involved in. The active count will act as nthreads for that
1004  // level regarding the scheduling algorithms
1005  for (int i = 0; i < n; ++i) {
1006  int index = __kmp_dispatch_get_index(tid, hier->get_type(i));
1007  kmp_hier_top_unit_t<T> *my_unit = hier->get_unit(i, index);
1008  // Setup the thread's private dispatch buffer's hierarchy pointers
1009  if (i == 0)
1010  pr->hier_parent = my_unit;
1011  // If this unit is already active, then increment active count and wait
1012  if (my_unit->is_active()) {
1013  KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d my_unit (%p) "
1014  "is already active (%d)\n",
1015  gtid, my_unit, my_unit->active));
1016  KMP_TEST_THEN_INC32(&(my_unit->active));
1017  break;
1018  }
1019  // Flag that this unit is active
1020  if (KMP_COMPARE_AND_STORE_ACQ32(&(my_unit->active), 0, 1)) {
1021  // Do not setup parent pointer for top level unit since it has no parent
1022  if (i < n - 1) {
1023  // Setup middle layer pointers to parents
1024  my_unit->get_my_pr()->hier_id =
1025  index % __kmp_dispatch_get_t1_per_t2(hier->get_type(i),
1026  hier->get_type(i + 1));
1027  int parent_index = __kmp_dispatch_get_index(tid, hier->get_type(i + 1));
1028  my_unit->hier_parent = hier->get_unit(i + 1, parent_index);
1029  } else {
1030  // Setup top layer information (no parent pointers are set)
1031  my_unit->get_my_pr()->hier_id =
1032  index % __kmp_dispatch_get_t1_per_t2(hier->get_type(i),
1033  kmp_hier_layer_e::LAYER_LOOP);
1034  KMP_TEST_THEN_INC32(&(hier->top_level_nproc));
1035  my_unit->hier_parent = nullptr;
1036  }
1037  // Set trip count to 0 so that next() operation will initially climb up
1038  // the hierarchy to get more iterations (early exit in next() for tc == 0)
1039  my_unit->get_my_pr()->u.p.tc = 0;
1040  // Increment this layer's number of active units
1041  KMP_TEST_THEN_INC32(&(hier->info[i].num_active));
1042  KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d my_unit (%p) "
1043  "incrementing num_active\n",
1044  gtid, my_unit));
1045  } else {
1046  KMP_TEST_THEN_INC32(&(my_unit->active));
1047  break;
1048  }
1049  }
1050  // Set this thread's id
1051  num_threads_per_layer1 = __kmp_dispatch_get_t1_per_t2(
1052  kmp_hier_layer_e::LAYER_THREAD, hier->get_type(0));
1053  pr->hier_id = tid % num_threads_per_layer1;
1054  // For oversubscribed threads, increment their index within the lowest unit
1055  // This is done to prevent having two or more threads with id 0, id 1, etc.
1056  if (tid >= num_hw_threads)
1057  pr->hier_id += ((tid / num_hw_threads) * num_threads_per_layer1);
1058  KD_TRACE(
1059  10, ("__kmp_dispatch_init_hierarchy: T#%d setting lowest hier_id to %d\n",
1060  gtid, pr->hier_id));
1061 
1062  pr->flags.contains_last = FALSE;
1063  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1064 
1065  // Now that the number of active threads at each level is determined,
1066  // the barrier data for each unit can be initialized and the last layer's
1067  // loop information can be initialized.
1068  int prev_id = pr->get_hier_id();
1069  for (int i = 0; i < n; ++i) {
1070  if (prev_id != 0)
1071  break;
1072  int index = __kmp_dispatch_get_index(tid, hier->get_type(i));
1073  kmp_hier_top_unit_t<T> *my_unit = hier->get_unit(i, index);
1074  // Only primary threads of this unit within the hierarchy do initialization
1075  KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d (%d) prev_id is 0\n",
1076  gtid, i));
1077  my_unit->reset_shared_barrier();
1078  my_unit->hier_pr.flags.contains_last = FALSE;
1079  // Last layer, initialize the private buffers with entire loop information
1080  // Now the next next_algorithm() call will get the first chunk of
1081  // iterations properly
1082  if (i == n - 1) {
1083  __kmp_dispatch_init_algorithm<T>(
1084  loc, gtid, my_unit->get_my_pr(), hier->get_sched(i), lb, ub, st,
1085 #if USE_ITT_BUILD
1086  NULL,
1087 #endif
1088  hier->get_chunk(i), hier->get_num_active(i), my_unit->get_hier_id());
1089  }
1090  prev_id = my_unit->get_hier_id();
1091  }
1092  // Initialize each layer of the thread's private barrier data
1093  kmp_hier_top_unit_t<T> *unit = pr->hier_parent;
1094  for (int i = 0; i < n && unit; ++i, unit = unit->get_parent()) {
1095  kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[i]);
1096  unit->reset_private_barrier(tdata);
1097  }
1098  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1099 
1100 #ifdef KMP_DEBUG
1101  if (__kmp_tid_from_gtid(gtid) == 0) {
1102  for (int i = 0; i < n; ++i) {
1103  KD_TRACE(10,
1104  ("__kmp_dispatch_init_hierarchy: T#%d active count[%d] = %d\n",
1105  gtid, i, hier->get_num_active(i)));
1106  }
1107  hier->print();
1108  }
1109  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1110 #endif // KMP_DEBUG
1111 }
1112 #endif
sched_type
Definition: kmp.h:355
Definition: kmp.h:233