LLVM OpenMP* Runtime Library
kmp_gsupport.cpp
1 /*
2  * kmp_gsupport.cpp
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_atomic.h"
15 
16 #if OMPT_SUPPORT
17 #include "ompt-specific.h"
18 #endif
19 
20 enum {
21  KMP_GOMP_TASK_UNTIED_FLAG = 1,
22  KMP_GOMP_TASK_FINAL_FLAG = 2,
23  KMP_GOMP_TASK_DEPENDS_FLAG = 8
24 };
25 
26 // This class helps convert gomp dependency info into
27 // kmp_depend_info_t structures
28 class kmp_gomp_depends_info_t {
29  void **depend;
30  kmp_int32 num_deps;
31  size_t num_out, num_mutexinout, num_in;
32  size_t offset;
33 
34 public:
35  kmp_gomp_depends_info_t(void **depend) : depend(depend) {
36  size_t ndeps = (kmp_intptr_t)depend[0];
37  size_t num_doable;
38  // GOMP taskdep structure:
39  // if depend[0] != 0:
40  // depend = [ ndeps | nout | &out | ... | &out | &in | ... | &in ]
41  //
42  // if depend[0] == 0:
43  // depend = [ 0 | ndeps | nout | nmtx | nin | &out | ... | &out | &mtx |
44  // ... | &mtx | &in | ... | &in | &depobj | ... | &depobj ]
45  if (ndeps) {
46  num_out = (kmp_intptr_t)depend[1];
47  num_in = ndeps - num_out;
48  num_mutexinout = 0;
49  num_doable = ndeps;
50  offset = 2;
51  } else {
52  ndeps = (kmp_intptr_t)depend[1];
53  num_out = (kmp_intptr_t)depend[2];
54  num_mutexinout = (kmp_intptr_t)depend[3];
55  num_in = (kmp_intptr_t)depend[4];
56  num_doable = num_out + num_mutexinout + num_in;
57  offset = 5;
58  }
59  // TODO: Support gomp depobj
60  if (ndeps != num_doable) {
61  KMP_FATAL(GompFeatureNotSupported, "depobj");
62  }
63  num_deps = static_cast<kmp_int32>(ndeps);
64  }
65  kmp_int32 get_num_deps() const { return num_deps; }
66  kmp_depend_info_t get_kmp_depend(size_t index) const {
67  kmp_depend_info_t retval;
68  memset(&retval, '\0', sizeof(retval));
69  KMP_ASSERT(index < (size_t)num_deps);
70  retval.base_addr = (kmp_intptr_t)depend[offset + index];
71  retval.len = 0;
72  // Because inout and out are logically equivalent,
73  // use inout and in dependency flags. GOMP does not provide a
74  // way to distinguish if user specified out vs. inout.
75  if (index < num_out) {
76  retval.flags.in = 1;
77  retval.flags.out = 1;
78  } else if (index >= num_out && index < (num_out + num_mutexinout)) {
79  retval.flags.mtx = 1;
80  } else {
81  retval.flags.in = 1;
82  }
83  return retval;
84  }
85 };
86 
87 #ifdef __cplusplus
88 extern "C" {
89 #endif // __cplusplus
90 
91 #define MKLOC(loc, routine) \
92  static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
93 
94 #include "kmp_ftn_os.h"
95 
96 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) {
97  int gtid = __kmp_entry_gtid();
98  MKLOC(loc, "GOMP_barrier");
99  KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
100 #if OMPT_SUPPORT && OMPT_OPTIONAL
101  ompt_frame_t *ompt_frame;
102  if (ompt_enabled.enabled) {
103  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
104  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
105  }
106  OMPT_STORE_RETURN_ADDRESS(gtid);
107 #endif
108  __kmpc_barrier(&loc, gtid);
109 #if OMPT_SUPPORT && OMPT_OPTIONAL
110  if (ompt_enabled.enabled) {
111  ompt_frame->enter_frame = ompt_data_none;
112  }
113 #endif
114 }
115 
116 // Mutual exclusion
117 
118 // The symbol that icc/ifort generates for unnamed for unnamed critical sections
119 // - .gomp_critical_user_ - is defined using .comm in any objects reference it.
120 // We can't reference it directly here in C code, as the symbol contains a ".".
121 //
122 // The RTL contains an assembly language definition of .gomp_critical_user_
123 // with another symbol __kmp_unnamed_critical_addr initialized with it's
124 // address.
125 extern kmp_critical_name *__kmp_unnamed_critical_addr;
126 
127 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_START)(void) {
128  int gtid = __kmp_entry_gtid();
129  MKLOC(loc, "GOMP_critical_start");
130  KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));
131 #if OMPT_SUPPORT && OMPT_OPTIONAL
132  OMPT_STORE_RETURN_ADDRESS(gtid);
133 #endif
134  __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);
135 }
136 
137 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) {
138  int gtid = __kmp_get_gtid();
139  MKLOC(loc, "GOMP_critical_end");
140  KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));
141 #if OMPT_SUPPORT && OMPT_OPTIONAL
142  OMPT_STORE_RETURN_ADDRESS(gtid);
143 #endif
144  __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);
145 }
146 
147 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) {
148  int gtid = __kmp_entry_gtid();
149  MKLOC(loc, "GOMP_critical_name_start");
150  KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid));
151  __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr);
152 }
153 
154 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) {
155  int gtid = __kmp_get_gtid();
156  MKLOC(loc, "GOMP_critical_name_end");
157  KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid));
158  __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr);
159 }
160 
161 // The Gnu codegen tries to use locked operations to perform atomic updates
162 // inline. If it can't, then it calls GOMP_atomic_start() before performing
163 // the update and GOMP_atomic_end() afterward, regardless of the data type.
164 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_START)(void) {
165  int gtid = __kmp_entry_gtid();
166  KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
167 
168 #if OMPT_SUPPORT
169  __ompt_thread_assign_wait_id(0);
170 #endif
171 
172  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
173 }
174 
175 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_END)(void) {
176  int gtid = __kmp_get_gtid();
177  KA_TRACE(20, ("GOMP_atomic_end: T#%d\n", gtid));
178  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
179 }
180 
181 int KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_START)(void) {
182  int gtid = __kmp_entry_gtid();
183  MKLOC(loc, "GOMP_single_start");
184  KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid));
185 
186  if (!TCR_4(__kmp_init_parallel))
187  __kmp_parallel_initialize();
188  __kmp_resume_if_soft_paused();
189 
190  // 3rd parameter == FALSE prevents kmp_enter_single from pushing a
191  // workshare when USE_CHECKS is defined. We need to avoid the push,
192  // as there is no corresponding GOMP_single_end() call.
193  kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE);
194 
195 #if OMPT_SUPPORT && OMPT_OPTIONAL
196  kmp_info_t *this_thr = __kmp_threads[gtid];
197  kmp_team_t *team = this_thr->th.th_team;
198  int tid = __kmp_tid_from_gtid(gtid);
199 
200  if (ompt_enabled.enabled) {
201  if (rc) {
202  if (ompt_enabled.ompt_callback_work) {
203  ompt_callbacks.ompt_callback(ompt_callback_work)(
204  ompt_work_single_executor, ompt_scope_begin,
205  &(team->t.ompt_team_info.parallel_data),
206  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
207  1, OMPT_GET_RETURN_ADDRESS(0));
208  }
209  } else {
210  if (ompt_enabled.ompt_callback_work) {
211  ompt_callbacks.ompt_callback(ompt_callback_work)(
212  ompt_work_single_other, ompt_scope_begin,
213  &(team->t.ompt_team_info.parallel_data),
214  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
215  1, OMPT_GET_RETURN_ADDRESS(0));
216  ompt_callbacks.ompt_callback(ompt_callback_work)(
217  ompt_work_single_other, ompt_scope_end,
218  &(team->t.ompt_team_info.parallel_data),
219  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
220  1, OMPT_GET_RETURN_ADDRESS(0));
221  }
222  }
223  }
224 #endif
225 
226  return rc;
227 }
228 
229 void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
230  void *retval;
231  int gtid = __kmp_entry_gtid();
232  MKLOC(loc, "GOMP_single_copy_start");
233  KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid));
234 
235  if (!TCR_4(__kmp_init_parallel))
236  __kmp_parallel_initialize();
237  __kmp_resume_if_soft_paused();
238 
239  // If this is the first thread to enter, return NULL. The generated code will
240  // then call GOMP_single_copy_end() for this thread only, with the
241  // copyprivate data pointer as an argument.
242  if (__kmp_enter_single(gtid, &loc, FALSE))
243  return NULL;
244 
245  // Wait for the first thread to set the copyprivate data pointer,
246  // and for all other threads to reach this point.
247 
248 #if OMPT_SUPPORT && OMPT_OPTIONAL
249  ompt_frame_t *ompt_frame;
250  if (ompt_enabled.enabled) {
251  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
252  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
253  }
254  OMPT_STORE_RETURN_ADDRESS(gtid);
255 #endif
256  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
257 
258  // Retrieve the value of the copyprivate data point, and wait for all
259  // threads to do likewise, then return.
260  retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;
261  {
262 #if OMPT_SUPPORT && OMPT_OPTIONAL
263  OMPT_STORE_RETURN_ADDRESS(gtid);
264 #endif
265  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
266  }
267 #if OMPT_SUPPORT && OMPT_OPTIONAL
268  if (ompt_enabled.enabled) {
269  ompt_frame->enter_frame = ompt_data_none;
270  }
271 #endif
272  return retval;
273 }
274 
275 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {
276  int gtid = __kmp_get_gtid();
277  KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid));
278 
279  // Set the copyprivate data pointer fo the team, then hit the barrier so that
280  // the other threads will continue on and read it. Hit another barrier before
281  // continuing, so that the know that the copyprivate data pointer has been
282  // propagated to all threads before trying to reuse the t_copypriv_data field.
283  __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
284 #if OMPT_SUPPORT && OMPT_OPTIONAL
285  ompt_frame_t *ompt_frame;
286  if (ompt_enabled.enabled) {
287  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
288  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
289  }
290  OMPT_STORE_RETURN_ADDRESS(gtid);
291 #endif
292  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
293  {
294 #if OMPT_SUPPORT && OMPT_OPTIONAL
295  OMPT_STORE_RETURN_ADDRESS(gtid);
296 #endif
297  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
298  }
299 #if OMPT_SUPPORT && OMPT_OPTIONAL
300  if (ompt_enabled.enabled) {
301  ompt_frame->enter_frame = ompt_data_none;
302  }
303 #endif
304 }
305 
306 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_START)(void) {
307  int gtid = __kmp_entry_gtid();
308  MKLOC(loc, "GOMP_ordered_start");
309  KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
310 #if OMPT_SUPPORT && OMPT_OPTIONAL
311  OMPT_STORE_RETURN_ADDRESS(gtid);
312 #endif
313  __kmpc_ordered(&loc, gtid);
314 }
315 
316 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) {
317  int gtid = __kmp_get_gtid();
318  MKLOC(loc, "GOMP_ordered_end");
319  KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
320 #if OMPT_SUPPORT && OMPT_OPTIONAL
321  OMPT_STORE_RETURN_ADDRESS(gtid);
322 #endif
323  __kmpc_end_ordered(&loc, gtid);
324 }
325 
326 // Dispatch macro defs
327 //
328 // They come in two flavors: 64-bit unsigned, and either 32-bit signed
329 // (IA-32 architecture) or 64-bit signed (Intel(R) 64).
330 
331 #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
332 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4
333 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4
334 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4
335 #else
336 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8
337 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8
338 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8
339 #endif /* KMP_ARCH_X86 */
340 
341 #define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u
342 #define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u
343 #define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u
344 
345 // The parallel construct
346 
347 #ifndef KMP_DEBUG
348 static
349 #endif /* KMP_DEBUG */
350  void
351  __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *),
352  void *data) {
353 #if OMPT_SUPPORT
354  kmp_info_t *thr;
355  ompt_frame_t *ompt_frame;
356  ompt_state_t enclosing_state;
357 
358  if (ompt_enabled.enabled) {
359  // get pointer to thread data structure
360  thr = __kmp_threads[*gtid];
361 
362  // save enclosing task state; set current state for task
363  enclosing_state = thr->th.ompt_thread_info.state;
364  thr->th.ompt_thread_info.state = ompt_state_work_parallel;
365 
366  // set task frame
367  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
368  ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
369  }
370 #endif
371 
372  task(data);
373 
374 #if OMPT_SUPPORT
375  if (ompt_enabled.enabled) {
376  // clear task frame
377  ompt_frame->exit_frame = ompt_data_none;
378 
379  // restore enclosing state
380  thr->th.ompt_thread_info.state = enclosing_state;
381  }
382 #endif
383 }
384 
385 #ifndef KMP_DEBUG
386 static
387 #endif /* KMP_DEBUG */
388  void
389  __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,
390  void (*task)(void *), void *data,
391  unsigned num_threads, ident_t *loc,
392  enum sched_type schedule, long start,
393  long end, long incr,
394  long chunk_size) {
395  // Initialize the loop worksharing construct.
396 
397  KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
398  schedule != kmp_sch_static);
399 
400 #if OMPT_SUPPORT
401  kmp_info_t *thr;
402  ompt_frame_t *ompt_frame;
403  ompt_state_t enclosing_state;
404 
405  if (ompt_enabled.enabled) {
406  thr = __kmp_threads[*gtid];
407  // save enclosing task state; set current state for task
408  enclosing_state = thr->th.ompt_thread_info.state;
409  thr->th.ompt_thread_info.state = ompt_state_work_parallel;
410 
411  // set task frame
412  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
413  ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
414  }
415 #endif
416 
417  // Now invoke the microtask.
418  task(data);
419 
420 #if OMPT_SUPPORT
421  if (ompt_enabled.enabled) {
422  // clear task frame
423  ompt_frame->exit_frame = ompt_data_none;
424 
425  // reset enclosing state
426  thr->th.ompt_thread_info.state = enclosing_state;
427  }
428 #endif
429 }
430 
431 static void __kmp_GOMP_fork_call(ident_t *loc, int gtid, unsigned num_threads,
432  unsigned flags, void (*unwrapped_task)(void *),
433  microtask_t wrapper, int argc, ...) {
434  int rc;
435  kmp_info_t *thr = __kmp_threads[gtid];
436  kmp_team_t *team = thr->th.th_team;
437  int tid = __kmp_tid_from_gtid(gtid);
438 
439  va_list ap;
440  va_start(ap, argc);
441 
442  if (num_threads != 0)
443  __kmp_push_num_threads(loc, gtid, num_threads);
444  if (flags != 0)
445  __kmp_push_proc_bind(loc, gtid, (kmp_proc_bind_t)flags);
446  rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,
447  __kmp_invoke_task_func, kmp_va_addr_of(ap));
448 
449  va_end(ap);
450 
451  if (rc) {
452  __kmp_run_before_invoked_task(gtid, tid, thr, team);
453  }
454 
455 #if OMPT_SUPPORT
456  int ompt_team_size;
457  if (ompt_enabled.enabled) {
458  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
459  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
460 
461  // implicit task callback
462  if (ompt_enabled.ompt_callback_implicit_task) {
463  ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;
464  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
465  ompt_scope_begin, &(team_info->parallel_data),
466  &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid),
467  ompt_task_implicit); // TODO: Can this be ompt_task_initial?
468  task_info->thread_num = __kmp_tid_from_gtid(gtid);
469  }
470  thr->th.ompt_thread_info.state = ompt_state_work_parallel;
471  }
472 #endif
473 }
474 
475 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
476  void *data,
477  unsigned num_threads) {
478  int gtid = __kmp_entry_gtid();
479 
480 #if OMPT_SUPPORT
481  ompt_frame_t *parent_frame, *frame;
482 
483  if (ompt_enabled.enabled) {
484  __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
485  parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
486  }
487  OMPT_STORE_RETURN_ADDRESS(gtid);
488 #endif
489 
490  MKLOC(loc, "GOMP_parallel_start");
491  KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
492  __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
493  (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
494  data);
495 #if OMPT_SUPPORT
496  if (ompt_enabled.enabled) {
497  __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
498  frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
499  }
500 #endif
501 #if OMPD_SUPPORT
502  if (ompd_state & OMPD_ENABLE_BP)
503  ompd_bp_parallel_begin();
504 #endif
505 }
506 
507 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
508  int gtid = __kmp_get_gtid();
509  kmp_info_t *thr;
510 
511  thr = __kmp_threads[gtid];
512 
513  MKLOC(loc, "GOMP_parallel_end");
514  KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
515 
516  if (!thr->th.th_team->t.t_serialized) {
517  __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
518  thr->th.th_team);
519  }
520 #if OMPT_SUPPORT
521  if (ompt_enabled.enabled) {
522  // Implicit task is finished here, in the barrier we might schedule
523  // deferred tasks,
524  // these don't see the implicit task on the stack
525  OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
526  }
527 #endif
528 
529  __kmp_join_call(&loc, gtid
530 #if OMPT_SUPPORT
531  ,
532  fork_context_gnu
533 #endif
534  );
535 #if OMPD_SUPPORT
536  if (ompd_state & OMPD_ENABLE_BP)
537  ompd_bp_parallel_end();
538 #endif
539 }
540 
541 // Loop worksharing constructs
542 
543 // The Gnu codegen passes in an exclusive upper bound for the overall range,
544 // but the libguide dispatch code expects an inclusive upper bound, hence the
545 // "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th
546 // argument to __kmp_GOMP_fork_call).
547 //
548 // Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub,
549 // but the Gnu codegen expects an exclusive upper bound, so the adjustment
550 // "*p_ub += stride" compensates for the discrepancy.
551 //
552 // Correction: the gnu codegen always adjusts the upper bound by +-1, not the
553 // stride value. We adjust the dispatch parameters accordingly (by +-1), but
554 // we still adjust p_ub by the actual stride value.
555 //
556 // The "runtime" versions do not take a chunk_sz parameter.
557 //
558 // The profile lib cannot support construct checking of unordered loops that
559 // are predetermined by the compiler to be statically scheduled, as the gcc
560 // codegen will not always emit calls to GOMP_loop_static_next() to get the
561 // next iteration. Instead, it emits inline code to call omp_get_thread_num()
562 // num and calculate the iteration space using the result. It doesn't do this
563 // with ordered static loop, so they can be checked.
564 
565 #if OMPT_SUPPORT
566 #define IF_OMPT_SUPPORT(code) code
567 #else
568 #define IF_OMPT_SUPPORT(code)
569 #endif
570 
571 #define LOOP_START(func, schedule) \
572  int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \
573  long *p_ub) { \
574  int status; \
575  long stride; \
576  int gtid = __kmp_entry_gtid(); \
577  MKLOC(loc, KMP_STR(func)); \
578  KA_TRACE( \
579  20, \
580  (KMP_STR( \
581  func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \
582  gtid, lb, ub, str, chunk_sz)); \
583  \
584  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
585  { \
586  IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
587  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
588  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
589  (schedule) != kmp_sch_static); \
590  } \
591  { \
592  IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
593  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
594  (kmp_int *)p_ub, (kmp_int *)&stride); \
595  } \
596  if (status) { \
597  KMP_DEBUG_ASSERT(stride == str); \
598  *p_ub += (str > 0) ? 1 : -1; \
599  } \
600  } else { \
601  status = 0; \
602  } \
603  \
604  KA_TRACE( \
605  20, \
606  (KMP_STR( \
607  func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \
608  gtid, *p_lb, *p_ub, status)); \
609  return status; \
610  }
611 
612 #define LOOP_RUNTIME_START(func, schedule) \
613  int func(long lb, long ub, long str, long *p_lb, long *p_ub) { \
614  int status; \
615  long stride; \
616  long chunk_sz = 0; \
617  int gtid = __kmp_entry_gtid(); \
618  MKLOC(loc, KMP_STR(func)); \
619  KA_TRACE( \
620  20, \
621  (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
622  gtid, lb, ub, str, chunk_sz)); \
623  \
624  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
625  { \
626  IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
627  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
628  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
629  TRUE); \
630  } \
631  { \
632  IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
633  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
634  (kmp_int *)p_ub, (kmp_int *)&stride); \
635  } \
636  if (status) { \
637  KMP_DEBUG_ASSERT(stride == str); \
638  *p_ub += (str > 0) ? 1 : -1; \
639  } \
640  } else { \
641  status = 0; \
642  } \
643  \
644  KA_TRACE( \
645  20, \
646  (KMP_STR( \
647  func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \
648  gtid, *p_lb, *p_ub, status)); \
649  return status; \
650  }
651 
652 #define KMP_DOACROSS_FINI(status, gtid) \
653  if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) { \
654  __kmpc_doacross_fini(NULL, gtid); \
655  }
656 
657 #define LOOP_NEXT(func, fini_code) \
658  int func(long *p_lb, long *p_ub) { \
659  int status; \
660  long stride; \
661  int gtid = __kmp_get_gtid(); \
662  MKLOC(loc, KMP_STR(func)); \
663  KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid)); \
664  \
665  IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
666  fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
667  (kmp_int *)p_ub, (kmp_int *)&stride); \
668  if (status) { \
669  *p_ub += (stride > 0) ? 1 : -1; \
670  } \
671  KMP_DOACROSS_FINI(status, gtid) \
672  \
673  KA_TRACE( \
674  20, \
675  (KMP_STR(func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \
676  "returning %d\n", \
677  gtid, *p_lb, *p_ub, stride, status)); \
678  return status; \
679  }
680 
681 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
682 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
683 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START),
684  kmp_sch_dynamic_chunked)
685 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START),
686  kmp_sch_dynamic_chunked)
687 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
688 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {})
689 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START),
691 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START),
693 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
694 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {})
695 LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),
696  kmp_sch_runtime)
697 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
698 LOOP_RUNTIME_START(
699  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START),
700  kmp_sch_runtime)
701 LOOP_RUNTIME_START(
702  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START),
703  kmp_sch_runtime)
704 LOOP_NEXT(
705  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT), {})
706 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT), {})
707 
708 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START),
710 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT),
711  { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
712 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START),
713  kmp_ord_dynamic_chunked)
714 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT),
715  { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
716 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START),
717  kmp_ord_guided_chunked)
718 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT),
719  { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
720 LOOP_RUNTIME_START(
721  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START),
722  kmp_ord_runtime)
723 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT),
724  { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
725 
726 #define LOOP_DOACROSS_START(func, schedule) \
727  bool func(unsigned ncounts, long *counts, long chunk_sz, long *p_lb, \
728  long *p_ub) { \
729  int status; \
730  long stride, lb, ub, str; \
731  int gtid = __kmp_entry_gtid(); \
732  struct kmp_dim *dims = \
733  (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \
734  MKLOC(loc, KMP_STR(func)); \
735  for (unsigned i = 0; i < ncounts; ++i) { \
736  dims[i].lo = 0; \
737  dims[i].up = counts[i] - 1; \
738  dims[i].st = 1; \
739  } \
740  __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \
741  lb = 0; \
742  ub = counts[0]; \
743  str = 1; \
744  KA_TRACE(20, (KMP_STR(func) ": T#%d, ncounts %u, lb 0x%lx, ub 0x%lx, str " \
745  "0x%lx, chunk_sz " \
746  "0x%lx\n", \
747  gtid, ncounts, lb, ub, str, chunk_sz)); \
748  \
749  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
750  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
751  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
752  (schedule) != kmp_sch_static); \
753  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
754  (kmp_int *)p_ub, (kmp_int *)&stride); \
755  if (status) { \
756  KMP_DEBUG_ASSERT(stride == str); \
757  *p_ub += (str > 0) ? 1 : -1; \
758  } \
759  } else { \
760  status = 0; \
761  } \
762  KMP_DOACROSS_FINI(status, gtid); \
763  \
764  KA_TRACE( \
765  20, \
766  (KMP_STR( \
767  func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \
768  gtid, *p_lb, *p_ub, status)); \
769  __kmp_free(dims); \
770  return status; \
771  }
772 
773 #define LOOP_DOACROSS_RUNTIME_START(func, schedule) \
774  int func(unsigned ncounts, long *counts, long *p_lb, long *p_ub) { \
775  int status; \
776  long stride, lb, ub, str; \
777  long chunk_sz = 0; \
778  int gtid = __kmp_entry_gtid(); \
779  struct kmp_dim *dims = \
780  (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \
781  MKLOC(loc, KMP_STR(func)); \
782  for (unsigned i = 0; i < ncounts; ++i) { \
783  dims[i].lo = 0; \
784  dims[i].up = counts[i] - 1; \
785  dims[i].st = 1; \
786  } \
787  __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \
788  lb = 0; \
789  ub = counts[0]; \
790  str = 1; \
791  KA_TRACE( \
792  20, \
793  (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
794  gtid, lb, ub, str, chunk_sz)); \
795  \
796  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
797  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
798  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
799  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
800  (kmp_int *)p_ub, (kmp_int *)&stride); \
801  if (status) { \
802  KMP_DEBUG_ASSERT(stride == str); \
803  *p_ub += (str > 0) ? 1 : -1; \
804  } \
805  } else { \
806  status = 0; \
807  } \
808  KMP_DOACROSS_FINI(status, gtid); \
809  \
810  KA_TRACE( \
811  20, \
812  (KMP_STR( \
813  func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \
814  gtid, *p_lb, *p_ub, status)); \
815  __kmp_free(dims); \
816  return status; \
817  }
818 
819 LOOP_DOACROSS_START(
820  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START),
822 LOOP_DOACROSS_START(
823  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START),
824  kmp_sch_dynamic_chunked)
825 LOOP_DOACROSS_START(
826  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START),
828 LOOP_DOACROSS_RUNTIME_START(
829  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START),
830  kmp_sch_runtime)
831 
832 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) {
833  int gtid = __kmp_get_gtid();
834  KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
835 
836 #if OMPT_SUPPORT && OMPT_OPTIONAL
837  ompt_frame_t *ompt_frame;
838  if (ompt_enabled.enabled) {
839  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
840  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
841  OMPT_STORE_RETURN_ADDRESS(gtid);
842  }
843 #endif
844  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
845 #if OMPT_SUPPORT && OMPT_OPTIONAL
846  if (ompt_enabled.enabled) {
847  ompt_frame->enter_frame = ompt_data_none;
848  }
849 #endif
850 
851  KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))
852 }
853 
854 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) {
855  KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))
856 }
857 
858 // Unsigned long long loop worksharing constructs
859 //
860 // These are new with gcc 4.4
861 
862 #define LOOP_START_ULL(func, schedule) \
863  int func(int up, unsigned long long lb, unsigned long long ub, \
864  unsigned long long str, unsigned long long chunk_sz, \
865  unsigned long long *p_lb, unsigned long long *p_ub) { \
866  int status; \
867  long long str2 = up ? ((long long)str) : -((long long)str); \
868  long long stride; \
869  int gtid = __kmp_entry_gtid(); \
870  MKLOC(loc, KMP_STR(func)); \
871  \
872  KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str " \
873  "0x%llx, chunk_sz 0x%llx\n", \
874  gtid, up, lb, ub, str, chunk_sz)); \
875  \
876  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
877  KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \
878  (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \
879  (schedule) != kmp_sch_static); \
880  status = \
881  KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \
882  (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
883  if (status) { \
884  KMP_DEBUG_ASSERT(stride == str2); \
885  *p_ub += (str > 0) ? 1 : -1; \
886  } \
887  } else { \
888  status = 0; \
889  } \
890  \
891  KA_TRACE( \
892  20, \
893  (KMP_STR( \
894  func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \
895  gtid, *p_lb, *p_ub, status)); \
896  return status; \
897  }
898 
899 #define LOOP_RUNTIME_START_ULL(func, schedule) \
900  int func(int up, unsigned long long lb, unsigned long long ub, \
901  unsigned long long str, unsigned long long *p_lb, \
902  unsigned long long *p_ub) { \
903  int status; \
904  long long str2 = up ? ((long long)str) : -((long long)str); \
905  unsigned long long stride; \
906  unsigned long long chunk_sz = 0; \
907  int gtid = __kmp_entry_gtid(); \
908  MKLOC(loc, KMP_STR(func)); \
909  \
910  KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str " \
911  "0x%llx, chunk_sz 0x%llx\n", \
912  gtid, up, lb, ub, str, chunk_sz)); \
913  \
914  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
915  KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \
916  (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \
917  TRUE); \
918  status = \
919  KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \
920  (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
921  if (status) { \
922  KMP_DEBUG_ASSERT((long long)stride == str2); \
923  *p_ub += (str > 0) ? 1 : -1; \
924  } \
925  } else { \
926  status = 0; \
927  } \
928  \
929  KA_TRACE( \
930  20, \
931  (KMP_STR( \
932  func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \
933  gtid, *p_lb, *p_ub, status)); \
934  return status; \
935  }
936 
937 #define LOOP_NEXT_ULL(func, fini_code) \
938  int func(unsigned long long *p_lb, unsigned long long *p_ub) { \
939  int status; \
940  long long stride; \
941  int gtid = __kmp_get_gtid(); \
942  MKLOC(loc, KMP_STR(func)); \
943  KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid)); \
944  \
945  fini_code status = \
946  KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \
947  (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
948  if (status) { \
949  *p_ub += (stride > 0) ? 1 : -1; \
950  } \
951  \
952  KA_TRACE( \
953  20, \
954  (KMP_STR( \
955  func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \
956  "returning %d\n", \
957  gtid, *p_lb, *p_ub, stride, status)); \
958  return status; \
959  }
960 
961 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START),
963 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {})
964 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START),
965  kmp_sch_dynamic_chunked)
966 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
967 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START),
969 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
970 LOOP_START_ULL(
971  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START),
972  kmp_sch_dynamic_chunked)
973 LOOP_NEXT_ULL(
974  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {})
975 LOOP_START_ULL(
976  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START),
978 LOOP_NEXT_ULL(
979  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {})
980 LOOP_RUNTIME_START_ULL(
981  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
982 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
983 LOOP_RUNTIME_START_ULL(
984  KMP_EXPAND_NAME(
985  KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START),
986  kmp_sch_runtime)
987 LOOP_RUNTIME_START_ULL(
988  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START),
989  kmp_sch_runtime)
990 LOOP_NEXT_ULL(
991  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT),
992  {})
993 LOOP_NEXT_ULL(
994  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT), {})
995 
996 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START),
998 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT),
999  { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1000 LOOP_START_ULL(
1001  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START),
1002  kmp_ord_dynamic_chunked)
1003 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT),
1004  { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1005 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START),
1006  kmp_ord_guided_chunked)
1007 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT),
1008  { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1009 LOOP_RUNTIME_START_ULL(
1010  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START),
1011  kmp_ord_runtime)
1012 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT),
1013  { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1014 
1015 #define LOOP_DOACROSS_START_ULL(func, schedule) \
1016  int func(unsigned ncounts, unsigned long long *counts, \
1017  unsigned long long chunk_sz, unsigned long long *p_lb, \
1018  unsigned long long *p_ub) { \
1019  int status; \
1020  long long stride, str, lb, ub; \
1021  int gtid = __kmp_entry_gtid(); \
1022  struct kmp_dim *dims = \
1023  (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \
1024  MKLOC(loc, KMP_STR(func)); \
1025  for (unsigned i = 0; i < ncounts; ++i) { \
1026  dims[i].lo = 0; \
1027  dims[i].up = counts[i] - 1; \
1028  dims[i].st = 1; \
1029  } \
1030  __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \
1031  lb = 0; \
1032  ub = counts[0]; \
1033  str = 1; \
1034  \
1035  KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str " \
1036  "0x%llx, chunk_sz 0x%llx\n", \
1037  gtid, lb, ub, str, chunk_sz)); \
1038  \
1039  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
1040  KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \
1041  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
1042  (schedule) != kmp_sch_static); \
1043  status = \
1044  KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \
1045  (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
1046  if (status) { \
1047  KMP_DEBUG_ASSERT(stride == str); \
1048  *p_ub += (str > 0) ? 1 : -1; \
1049  } \
1050  } else { \
1051  status = 0; \
1052  } \
1053  KMP_DOACROSS_FINI(status, gtid); \
1054  \
1055  KA_TRACE( \
1056  20, \
1057  (KMP_STR( \
1058  func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \
1059  gtid, *p_lb, *p_ub, status)); \
1060  __kmp_free(dims); \
1061  return status; \
1062  }
1063 
1064 #define LOOP_DOACROSS_RUNTIME_START_ULL(func, schedule) \
1065  int func(unsigned ncounts, unsigned long long *counts, \
1066  unsigned long long *p_lb, unsigned long long *p_ub) { \
1067  int status; \
1068  unsigned long long stride, str, lb, ub; \
1069  unsigned long long chunk_sz = 0; \
1070  int gtid = __kmp_entry_gtid(); \
1071  struct kmp_dim *dims = \
1072  (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \
1073  MKLOC(loc, KMP_STR(func)); \
1074  for (unsigned i = 0; i < ncounts; ++i) { \
1075  dims[i].lo = 0; \
1076  dims[i].up = counts[i] - 1; \
1077  dims[i].st = 1; \
1078  } \
1079  __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \
1080  lb = 0; \
1081  ub = counts[0]; \
1082  str = 1; \
1083  KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str " \
1084  "0x%llx, chunk_sz 0x%llx\n", \
1085  gtid, lb, ub, str, chunk_sz)); \
1086  \
1087  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
1088  KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \
1089  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
1090  TRUE); \
1091  status = \
1092  KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \
1093  (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
1094  if (status) { \
1095  KMP_DEBUG_ASSERT(stride == str); \
1096  *p_ub += (str > 0) ? 1 : -1; \
1097  } \
1098  } else { \
1099  status = 0; \
1100  } \
1101  KMP_DOACROSS_FINI(status, gtid); \
1102  \
1103  KA_TRACE( \
1104  20, \
1105  (KMP_STR( \
1106  func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \
1107  gtid, *p_lb, *p_ub, status)); \
1108  __kmp_free(dims); \
1109  return status; \
1110  }
1111 
1112 LOOP_DOACROSS_START_ULL(
1113  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START),
1115 LOOP_DOACROSS_START_ULL(
1116  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START),
1117  kmp_sch_dynamic_chunked)
1118 LOOP_DOACROSS_START_ULL(
1119  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START),
1121 LOOP_DOACROSS_RUNTIME_START_ULL(
1122  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START),
1123  kmp_sch_runtime)
1124 
1125 // Combined parallel / loop worksharing constructs
1126 //
1127 // There are no ull versions (yet).
1128 
1129 #define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \
1130  void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \
1131  long ub, long str, long chunk_sz) { \
1132  int gtid = __kmp_entry_gtid(); \
1133  MKLOC(loc, KMP_STR(func)); \
1134  KA_TRACE( \
1135  20, \
1136  (KMP_STR( \
1137  func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \
1138  gtid, lb, ub, str, chunk_sz)); \
1139  \
1140  ompt_pre(); \
1141  \
1142  __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, \
1143  (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
1144  9, task, data, num_threads, &loc, (schedule), lb, \
1145  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
1146  IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \
1147  \
1148  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
1149  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
1150  (schedule) != kmp_sch_static); \
1151  \
1152  ompt_post(); \
1153  \
1154  KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid)); \
1155  }
1156 
1157 #if OMPT_SUPPORT && OMPT_OPTIONAL
1158 
1159 #define OMPT_LOOP_PRE() \
1160  ompt_frame_t *parent_frame; \
1161  if (ompt_enabled.enabled) { \
1162  __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \
1163  parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \
1164  OMPT_STORE_RETURN_ADDRESS(gtid); \
1165  }
1166 
1167 #define OMPT_LOOP_POST() \
1168  if (ompt_enabled.enabled) { \
1169  parent_frame->enter_frame = ompt_data_none; \
1170  }
1171 
1172 #else
1173 
1174 #define OMPT_LOOP_PRE()
1175 
1176 #define OMPT_LOOP_POST()
1177 
1178 #endif
1179 
1180 PARALLEL_LOOP_START(
1181  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START),
1182  kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1183 PARALLEL_LOOP_START(
1184  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START),
1185  kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1186 PARALLEL_LOOP_START(
1187  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START),
1188  kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1189 PARALLEL_LOOP_START(
1190  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START),
1191  kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1192 
1193 // Tasking constructs
1194 
1195 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
1196  void (*copy_func)(void *, void *),
1197  long arg_size, long arg_align,
1198  bool if_cond, unsigned gomp_flags,
1199  void **depend) {
1200  MKLOC(loc, "GOMP_task");
1201  int gtid = __kmp_entry_gtid();
1202  kmp_int32 flags = 0;
1203  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1204 
1205  KA_TRACE(20, ("GOMP_task: T#%d\n", gtid));
1206 
1207  // The low-order bit is the "untied" flag
1208  if (!(gomp_flags & KMP_GOMP_TASK_UNTIED_FLAG)) {
1209  input_flags->tiedness = 1;
1210  }
1211  // The second low-order bit is the "final" flag
1212  if (gomp_flags & KMP_GOMP_TASK_FINAL_FLAG) {
1213  input_flags->final = 1;
1214  }
1215  input_flags->native = 1;
1216  // __kmp_task_alloc() sets up all other flags
1217 
1218  if (!if_cond) {
1219  arg_size = 0;
1220  }
1221 
1222  kmp_task_t *task = __kmp_task_alloc(
1223  &loc, gtid, input_flags, sizeof(kmp_task_t),
1224  arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func);
1225 
1226  if (arg_size > 0) {
1227  if (arg_align > 0) {
1228  task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /
1229  arg_align * arg_align);
1230  }
1231  // else error??
1232 
1233  if (copy_func) {
1234  (*copy_func)(task->shareds, data);
1235  } else {
1236  KMP_MEMCPY(task->shareds, data, arg_size);
1237  }
1238  }
1239 
1240 #if OMPT_SUPPORT
1241  kmp_taskdata_t *current_task;
1242  if (ompt_enabled.enabled) {
1243  current_task = __kmp_threads[gtid]->th.th_current_task;
1244  current_task->ompt_task_info.frame.enter_frame.ptr =
1245  OMPT_GET_FRAME_ADDRESS(0);
1246  }
1247  OMPT_STORE_RETURN_ADDRESS(gtid);
1248 #endif
1249 
1250  if (if_cond) {
1251  if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) {
1252  KMP_ASSERT(depend);
1253  kmp_gomp_depends_info_t gomp_depends(depend);
1254  kmp_int32 ndeps = gomp_depends.get_num_deps();
1255  kmp_depend_info_t dep_list[ndeps];
1256  for (kmp_int32 i = 0; i < ndeps; i++)
1257  dep_list[i] = gomp_depends.get_kmp_depend(i);
1258  kmp_int32 ndeps_cnv;
1259  __kmp_type_convert(ndeps, &ndeps_cnv);
1260  __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps_cnv, dep_list, 0, NULL);
1261  } else {
1262  __kmpc_omp_task(&loc, gtid, task);
1263  }
1264  } else {
1265 #if OMPT_SUPPORT
1266  ompt_thread_info_t oldInfo;
1267  kmp_info_t *thread;
1268  kmp_taskdata_t *taskdata;
1269  if (ompt_enabled.enabled) {
1270  // Store the threads states and restore them after the task
1271  thread = __kmp_threads[gtid];
1272  taskdata = KMP_TASK_TO_TASKDATA(task);
1273  oldInfo = thread->th.ompt_thread_info;
1274  thread->th.ompt_thread_info.wait_id = 0;
1275  thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1276  taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1277  }
1278  OMPT_STORE_RETURN_ADDRESS(gtid);
1279 #endif
1280  if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) {
1281  KMP_ASSERT(depend);
1282  kmp_gomp_depends_info_t gomp_depends(depend);
1283  kmp_int32 ndeps = gomp_depends.get_num_deps();
1284  kmp_depend_info_t dep_list[ndeps];
1285  for (kmp_int32 i = 0; i < ndeps; i++)
1286  dep_list[i] = gomp_depends.get_kmp_depend(i);
1287  __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL);
1288  }
1289 
1290  __kmpc_omp_task_begin_if0(&loc, gtid, task);
1291  func(data);
1292  __kmpc_omp_task_complete_if0(&loc, gtid, task);
1293 
1294 #if OMPT_SUPPORT
1295  if (ompt_enabled.enabled) {
1296  thread->th.ompt_thread_info = oldInfo;
1297  taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1298  }
1299 #endif
1300  }
1301 #if OMPT_SUPPORT
1302  if (ompt_enabled.enabled) {
1303  current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
1304  }
1305 #endif
1306 
1307  KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
1308 }
1309 
1310 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT)(void) {
1311  MKLOC(loc, "GOMP_taskwait");
1312  int gtid = __kmp_entry_gtid();
1313 
1314 #if OMPT_SUPPORT
1315  OMPT_STORE_RETURN_ADDRESS(gtid);
1316 #endif
1317 
1318  KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));
1319 
1320  __kmpc_omp_taskwait(&loc, gtid);
1321 
1322  KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid));
1323 }
1324 
1325 // Sections worksharing constructs
1326 //
1327 // For the sections construct, we initialize a dynamically scheduled loop
1328 // worksharing construct with lb 1 and stride 1, and use the iteration #'s
1329 // that its returns as sections ids.
1330 //
1331 // There are no special entry points for ordered sections, so we always use
1332 // the dynamically scheduled workshare, even if the sections aren't ordered.
1333 
1334 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) {
1335  int status;
1336  kmp_int lb, ub, stride;
1337  int gtid = __kmp_entry_gtid();
1338  MKLOC(loc, "GOMP_sections_start");
1339  KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid));
1340 
1341  KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1342 
1343  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
1344  if (status) {
1345  KMP_DEBUG_ASSERT(stride == 1);
1346  KMP_DEBUG_ASSERT(lb > 0);
1347  KMP_ASSERT(lb == ub);
1348  } else {
1349  lb = 0;
1350  }
1351 
1352  KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid,
1353  (unsigned)lb));
1354  return (unsigned)lb;
1355 }
1356 
1357 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) {
1358  int status;
1359  kmp_int lb, ub, stride;
1360  int gtid = __kmp_get_gtid();
1361  MKLOC(loc, "GOMP_sections_next");
1362  KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid));
1363 
1364 #if OMPT_SUPPORT
1365  OMPT_STORE_RETURN_ADDRESS(gtid);
1366 #endif
1367 
1368  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
1369  if (status) {
1370  KMP_DEBUG_ASSERT(stride == 1);
1371  KMP_DEBUG_ASSERT(lb > 0);
1372  KMP_ASSERT(lb == ub);
1373  } else {
1374  lb = 0;
1375  }
1376 
1377  KA_TRACE(
1378  20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, (unsigned)lb));
1379  return (unsigned)lb;
1380 }
1381 
1382 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(
1383  void (*task)(void *), void *data, unsigned num_threads, unsigned count) {
1384  int gtid = __kmp_entry_gtid();
1385 
1386 #if OMPT_SUPPORT
1387  ompt_frame_t *parent_frame;
1388 
1389  if (ompt_enabled.enabled) {
1390  __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
1391  parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1392  }
1393  OMPT_STORE_RETURN_ADDRESS(gtid);
1394 #endif
1395 
1396  MKLOC(loc, "GOMP_parallel_sections_start");
1397  KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
1398 
1399  __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
1400  (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1401  task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1402  (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1403 
1404 #if OMPT_SUPPORT
1405  if (ompt_enabled.enabled) {
1406  parent_frame->enter_frame = ompt_data_none;
1407  }
1408 #endif
1409 
1410  KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1411 
1412  KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid));
1413 }
1414 
1415 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) {
1416  int gtid = __kmp_get_gtid();
1417  KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
1418 
1419 #if OMPT_SUPPORT
1420  ompt_frame_t *ompt_frame;
1421  if (ompt_enabled.enabled) {
1422  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1423  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1424  }
1425  OMPT_STORE_RETURN_ADDRESS(gtid);
1426 #endif
1427  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1428 #if OMPT_SUPPORT
1429  if (ompt_enabled.enabled) {
1430  ompt_frame->enter_frame = ompt_data_none;
1431  }
1432 #endif
1433 
1434  KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))
1435 }
1436 
1437 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) {
1438  KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))
1439 }
1440 
1441 // libgomp has an empty function for GOMP_taskyield as of 2013-10-10
1442 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKYIELD)(void) {
1443  KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid()))
1444  return;
1445 }
1446 
1447 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
1448  void *data,
1449  unsigned num_threads,
1450  unsigned int flags) {
1451  int gtid = __kmp_entry_gtid();
1452  MKLOC(loc, "GOMP_parallel");
1453  KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid));
1454 
1455 #if OMPT_SUPPORT
1456  ompt_task_info_t *parent_task_info, *task_info;
1457  if (ompt_enabled.enabled) {
1458  parent_task_info = __ompt_get_task_info_object(0);
1459  parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1460  }
1461  OMPT_STORE_RETURN_ADDRESS(gtid);
1462 #endif
1463  __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
1464  (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
1465  data);
1466 #if OMPT_SUPPORT
1467  if (ompt_enabled.enabled) {
1468  task_info = __ompt_get_task_info_object(0);
1469  task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1470  }
1471 #endif
1472  task(data);
1473  {
1474 #if OMPT_SUPPORT
1475  OMPT_STORE_RETURN_ADDRESS(gtid);
1476 #endif
1477  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
1478  }
1479 #if OMPT_SUPPORT
1480  if (ompt_enabled.enabled) {
1481  task_info->frame.exit_frame = ompt_data_none;
1482  parent_task_info->frame.enter_frame = ompt_data_none;
1483  }
1484 #endif
1485 }
1486 
1487 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
1488  void *data,
1489  unsigned num_threads,
1490  unsigned count,
1491  unsigned flags) {
1492  int gtid = __kmp_entry_gtid();
1493  MKLOC(loc, "GOMP_parallel_sections");
1494  KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));
1495 
1496 #if OMPT_SUPPORT
1497  OMPT_STORE_RETURN_ADDRESS(gtid);
1498 #endif
1499 
1500  __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
1501  (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1502  task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1503  (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1504 
1505  {
1506 #if OMPT_SUPPORT
1507  OMPT_STORE_RETURN_ADDRESS(gtid);
1508 #endif
1509 
1510  KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1511  }
1512  task(data);
1513  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
1514  KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
1515 }
1516 
1517 #define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \
1518  void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \
1519  long ub, long str, long chunk_sz, unsigned flags) { \
1520  int gtid = __kmp_entry_gtid(); \
1521  MKLOC(loc, KMP_STR(func)); \
1522  KA_TRACE( \
1523  20, \
1524  (KMP_STR( \
1525  func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \
1526  gtid, lb, ub, str, chunk_sz)); \
1527  \
1528  ompt_pre(); \
1529  IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
1530  __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, \
1531  (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
1532  9, task, data, num_threads, &loc, (schedule), lb, \
1533  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
1534  \
1535  { \
1536  IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
1537  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
1538  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
1539  (schedule) != kmp_sch_static); \
1540  } \
1541  task(data); \
1542  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); \
1543  ompt_post(); \
1544  \
1545  KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid)); \
1546  }
1547 
1548 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC),
1549  kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1550 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC),
1551  kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1552 PARALLEL_LOOP(
1553  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED),
1554  kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1555 PARALLEL_LOOP(
1556  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC),
1557  kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1558 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),
1559  kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1560 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),
1561  kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1562 PARALLEL_LOOP(
1563  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME),
1564  kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1565 PARALLEL_LOOP(
1566  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME),
1567  kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1568 
1569 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_START)(void) {
1570  int gtid = __kmp_entry_gtid();
1571  MKLOC(loc, "GOMP_taskgroup_start");
1572  KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid));
1573 
1574 #if OMPT_SUPPORT
1575  OMPT_STORE_RETURN_ADDRESS(gtid);
1576 #endif
1577 
1578  __kmpc_taskgroup(&loc, gtid);
1579 
1580  return;
1581 }
1582 
1583 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_END)(void) {
1584  int gtid = __kmp_get_gtid();
1585  MKLOC(loc, "GOMP_taskgroup_end");
1586  KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid));
1587 
1588 #if OMPT_SUPPORT
1589  OMPT_STORE_RETURN_ADDRESS(gtid);
1590 #endif
1591 
1592  __kmpc_end_taskgroup(&loc, gtid);
1593 
1594  return;
1595 }
1596 
1597 static kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) {
1598  kmp_int32 cncl_kind = 0;
1599  switch (gomp_kind) {
1600  case 1:
1601  cncl_kind = cancel_parallel;
1602  break;
1603  case 2:
1604  cncl_kind = cancel_loop;
1605  break;
1606  case 4:
1607  cncl_kind = cancel_sections;
1608  break;
1609  case 8:
1610  cncl_kind = cancel_taskgroup;
1611  break;
1612  }
1613  return cncl_kind;
1614 }
1615 
1616 // Return true if cancellation should take place, false otherwise
1617 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) {
1618  int gtid = __kmp_get_gtid();
1619  MKLOC(loc, "GOMP_cancellation_point");
1620  KA_TRACE(20, ("GOMP_cancellation_point: T#%d which:%d\n", gtid, which));
1621  kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
1622  return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);
1623 }
1624 
1625 // Return true if cancellation should take place, false otherwise
1626 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) {
1627  int gtid = __kmp_get_gtid();
1628  MKLOC(loc, "GOMP_cancel");
1629  KA_TRACE(20, ("GOMP_cancel: T#%d which:%d do_cancel:%d\n", gtid, which,
1630  (int)do_cancel));
1631  kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
1632 
1633  if (do_cancel == FALSE) {
1634  return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);
1635  } else {
1636  return __kmpc_cancel(&loc, gtid, cncl_kind);
1637  }
1638 }
1639 
1640 // Return true if cancellation should take place, false otherwise
1641 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) {
1642  int gtid = __kmp_get_gtid();
1643  KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid));
1644  return __kmp_barrier_gomp_cancel(gtid);
1645 }
1646 
1647 // Return true if cancellation should take place, false otherwise
1648 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) {
1649  int gtid = __kmp_get_gtid();
1650  KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid));
1651  return __kmp_barrier_gomp_cancel(gtid);
1652 }
1653 
1654 // Return true if cancellation should take place, false otherwise
1655 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) {
1656  int gtid = __kmp_get_gtid();
1657  KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid));
1658  return __kmp_barrier_gomp_cancel(gtid);
1659 }
1660 
1661 // All target functions are empty as of 2014-05-29
1662 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn)(void *),
1663  const void *openmp_target,
1664  size_t mapnum, void **hostaddrs,
1665  size_t *sizes,
1666  unsigned char *kinds) {
1667  return;
1668 }
1669 
1670 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_DATA)(
1671  int device, const void *openmp_target, size_t mapnum, void **hostaddrs,
1672  size_t *sizes, unsigned char *kinds) {
1673  return;
1674 }
1675 
1676 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) { return; }
1677 
1678 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_UPDATE)(
1679  int device, const void *openmp_target, size_t mapnum, void **hostaddrs,
1680  size_t *sizes, unsigned char *kinds) {
1681  return;
1682 }
1683 
1684 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams,
1685  unsigned int thread_limit) {
1686  return;
1687 }
1688 
1689 // Task duplication function which copies src to dest (both are
1690 // preallocated task structures)
1691 static void __kmp_gomp_task_dup(kmp_task_t *dest, kmp_task_t *src,
1692  kmp_int32 last_private) {
1693  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(src);
1694  if (taskdata->td_copy_func) {
1695  (taskdata->td_copy_func)(dest->shareds, src->shareds);
1696  }
1697 }
1698 
1699 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(
1700  uintptr_t *);
1701 
1702 #ifdef __cplusplus
1703 } // extern "C"
1704 #endif
1705 
1706 template <typename T>
1707 void __GOMP_taskloop(void (*func)(void *), void *data,
1708  void (*copy_func)(void *, void *), long arg_size,
1709  long arg_align, unsigned gomp_flags,
1710  unsigned long num_tasks, int priority, T start, T end,
1711  T step) {
1712  typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
1713  MKLOC(loc, "GOMP_taskloop");
1714  int sched;
1715  T *loop_bounds;
1716  int gtid = __kmp_entry_gtid();
1717  kmp_int32 flags = 0;
1718  int if_val = gomp_flags & (1u << 10);
1719  int nogroup = gomp_flags & (1u << 11);
1720  int up = gomp_flags & (1u << 8);
1721  int reductions = gomp_flags & (1u << 12);
1722  p_task_dup_t task_dup = NULL;
1723  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1724 #ifdef KMP_DEBUG
1725  {
1726  char *buff;
1727  buff = __kmp_str_format(
1728  "GOMP_taskloop: T#%%d: func:%%p data:%%p copy_func:%%p "
1729  "arg_size:%%ld arg_align:%%ld gomp_flags:0x%%x num_tasks:%%lu "
1730  "priority:%%d start:%%%s end:%%%s step:%%%s\n",
1731  traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
1732  KA_TRACE(20, (buff, gtid, func, data, copy_func, arg_size, arg_align,
1733  gomp_flags, num_tasks, priority, start, end, step));
1734  __kmp_str_free(&buff);
1735  }
1736 #endif
1737  KMP_ASSERT((size_t)arg_size >= 2 * sizeof(T));
1738  KMP_ASSERT(arg_align > 0);
1739  // The low-order bit is the "untied" flag
1740  if (!(gomp_flags & 1)) {
1741  input_flags->tiedness = 1;
1742  }
1743  // The second low-order bit is the "final" flag
1744  if (gomp_flags & 2) {
1745  input_flags->final = 1;
1746  }
1747  // Negative step flag
1748  if (!up) {
1749  // If step is flagged as negative, but isn't properly sign extended
1750  // Then manually sign extend it. Could be a short, int, char embedded
1751  // in a long. So cannot assume any cast.
1752  if (step > 0) {
1753  for (int i = sizeof(T) * CHAR_BIT - 1; i >= 0L; --i) {
1754  // break at the first 1 bit
1755  if (step & ((T)1 << i))
1756  break;
1757  step |= ((T)1 << i);
1758  }
1759  }
1760  }
1761  input_flags->native = 1;
1762  // Figure out if none/grainsize/num_tasks clause specified
1763  if (num_tasks > 0) {
1764  if (gomp_flags & (1u << 9))
1765  sched = 1; // grainsize specified
1766  else
1767  sched = 2; // num_tasks specified
1768  // neither grainsize nor num_tasks specified
1769  } else {
1770  sched = 0;
1771  }
1772 
1773  // __kmp_task_alloc() sets up all other flags
1774  kmp_task_t *task =
1775  __kmp_task_alloc(&loc, gtid, input_flags, sizeof(kmp_task_t),
1776  arg_size + arg_align - 1, (kmp_routine_entry_t)func);
1777  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1778  taskdata->td_copy_func = copy_func;
1779  taskdata->td_size_loop_bounds = sizeof(T);
1780 
1781  // re-align shareds if needed and setup firstprivate copy constructors
1782  // through the task_dup mechanism
1783  task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /
1784  arg_align * arg_align);
1785  if (copy_func) {
1786  task_dup = __kmp_gomp_task_dup;
1787  }
1788  KMP_MEMCPY(task->shareds, data, arg_size);
1789 
1790  loop_bounds = (T *)task->shareds;
1791  loop_bounds[0] = start;
1792  loop_bounds[1] = end + (up ? -1 : 1);
1793 
1794  if (!nogroup) {
1795 #if OMPT_SUPPORT && OMPT_OPTIONAL
1796  OMPT_STORE_RETURN_ADDRESS(gtid);
1797 #endif
1798  __kmpc_taskgroup(&loc, gtid);
1799  if (reductions) {
1800  // The data pointer points to lb, ub, then reduction data
1801  struct data_t {
1802  T a, b;
1803  uintptr_t *d;
1804  };
1805  uintptr_t *d = ((data_t *)data)->d;
1806  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(d);
1807  }
1808  }
1809  __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]),
1810  (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, 1, sched,
1811  (kmp_uint64)num_tasks, (void *)task_dup);
1812  if (!nogroup) {
1813 #if OMPT_SUPPORT && OMPT_OPTIONAL
1814  OMPT_STORE_RETURN_ADDRESS(gtid);
1815 #endif
1816  __kmpc_end_taskgroup(&loc, gtid);
1817  }
1818 }
1819 
1820 // 4 byte version of GOMP_doacross_post
1821 // This verison needs to create a temporary array which converts 4 byte
1822 // integers into 8 byte integers
1823 template <typename T, bool need_conversion = (sizeof(long) == 4)>
1824 void __kmp_GOMP_doacross_post(T *count);
1825 
1826 template <> void __kmp_GOMP_doacross_post<long, true>(long *count) {
1827  int gtid = __kmp_entry_gtid();
1828  kmp_info_t *th = __kmp_threads[gtid];
1829  MKLOC(loc, "GOMP_doacross_post");
1830  kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];
1831  kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc(
1832  th, (size_t)(sizeof(kmp_int64) * num_dims));
1833  for (kmp_int64 i = 0; i < num_dims; ++i) {
1834  vec[i] = (kmp_int64)count[i];
1835  }
1836  __kmpc_doacross_post(&loc, gtid, vec);
1837  __kmp_thread_free(th, vec);
1838 }
1839 
1840 // 8 byte versions of GOMP_doacross_post
1841 // This version can just pass in the count array directly instead of creating
1842 // a temporary array
1843 template <> void __kmp_GOMP_doacross_post<long, false>(long *count) {
1844  int gtid = __kmp_entry_gtid();
1845  MKLOC(loc, "GOMP_doacross_post");
1846  __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));
1847 }
1848 
1849 template <typename T> void __kmp_GOMP_doacross_wait(T first, va_list args) {
1850  int gtid = __kmp_entry_gtid();
1851  kmp_info_t *th = __kmp_threads[gtid];
1852  MKLOC(loc, "GOMP_doacross_wait");
1853  kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];
1854  kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc(
1855  th, (size_t)(sizeof(kmp_int64) * num_dims));
1856  vec[0] = (kmp_int64)first;
1857  for (kmp_int64 i = 1; i < num_dims; ++i) {
1858  T item = va_arg(args, T);
1859  vec[i] = (kmp_int64)item;
1860  }
1861  __kmpc_doacross_wait(&loc, gtid, vec);
1862  __kmp_thread_free(th, vec);
1863  return;
1864 }
1865 
1866 #ifdef __cplusplus
1867 extern "C" {
1868 #endif // __cplusplus
1869 
1870 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP)(
1871  void (*func)(void *), void *data, void (*copy_func)(void *, void *),
1872  long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,
1873  int priority, long start, long end, long step) {
1874  __GOMP_taskloop<long>(func, data, copy_func, arg_size, arg_align, gomp_flags,
1875  num_tasks, priority, start, end, step);
1876 }
1877 
1878 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP_ULL)(
1879  void (*func)(void *), void *data, void (*copy_func)(void *, void *),
1880  long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,
1881  int priority, unsigned long long start, unsigned long long end,
1882  unsigned long long step) {
1883  __GOMP_taskloop<unsigned long long>(func, data, copy_func, arg_size,
1884  arg_align, gomp_flags, num_tasks,
1885  priority, start, end, step);
1886 }
1887 
1888 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_POST)(long *count) {
1889  __kmp_GOMP_doacross_post(count);
1890 }
1891 
1892 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_WAIT)(long first, ...) {
1893  va_list args;
1894  va_start(args, first);
1895  __kmp_GOMP_doacross_wait<long>(first, args);
1896  va_end(args);
1897 }
1898 
1899 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_POST)(
1900  unsigned long long *count) {
1901  int gtid = __kmp_entry_gtid();
1902  MKLOC(loc, "GOMP_doacross_ull_post");
1903  __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));
1904 }
1905 
1906 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)(
1907  unsigned long long first, ...) {
1908  va_list args;
1909  va_start(args, first);
1910  __kmp_GOMP_doacross_wait<unsigned long long>(first, args);
1911  va_end(args);
1912 }
1913 
1914 // fn: the function each primary thread of new team will call
1915 // data: argument to fn
1916 // num_teams, thread_limit: max bounds on respective ICV
1917 // flags: unused
1918 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS_REG)(void (*fn)(void *),
1919  void *data,
1920  unsigned num_teams,
1921  unsigned thread_limit,
1922  unsigned flags) {
1923  MKLOC(loc, "GOMP_teams_reg");
1924  int gtid = __kmp_entry_gtid();
1925  KA_TRACE(20, ("GOMP_teams_reg: T#%d num_teams=%u thread_limit=%u flag=%u\n",
1926  gtid, num_teams, thread_limit, flags));
1927  __kmpc_push_num_teams(&loc, gtid, num_teams, thread_limit);
1928  __kmpc_fork_teams(&loc, 2, (microtask_t)__kmp_GOMP_microtask_wrapper, fn,
1929  data);
1930  KA_TRACE(20, ("GOMP_teams_reg exit: T#%d\n", gtid));
1931 }
1932 
1933 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT_DEPEND)(void **depend) {
1934  MKLOC(loc, "GOMP_taskwait_depend");
1935  int gtid = __kmp_entry_gtid();
1936  KA_TRACE(20, ("GOMP_taskwait_depend: T#%d\n", gtid));
1937  kmp_gomp_depends_info_t gomp_depends(depend);
1938  kmp_int32 ndeps = gomp_depends.get_num_deps();
1939  kmp_depend_info_t dep_list[ndeps];
1940  for (kmp_int32 i = 0; i < ndeps; i++)
1941  dep_list[i] = gomp_depends.get_kmp_depend(i);
1942 #if OMPT_SUPPORT
1943  OMPT_STORE_RETURN_ADDRESS(gtid);
1944 #endif
1945  __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL);
1946  KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid));
1947 }
1948 
1949 static inline void
1950 __kmp_GOMP_taskgroup_reduction_register(uintptr_t *data, kmp_taskgroup_t *tg,
1951  int nthreads,
1952  uintptr_t *allocated = nullptr) {
1953  KMP_ASSERT(data);
1954  KMP_ASSERT(nthreads > 0);
1955  // Have private copy pointers point to previously allocated
1956  // reduction data or allocate new data here
1957  if (allocated) {
1958  data[2] = allocated[2];
1959  data[6] = allocated[6];
1960  } else {
1961  data[2] = (uintptr_t)__kmp_allocate(nthreads * data[1]);
1962  data[6] = data[2] + (nthreads * data[1]);
1963  }
1964  if (tg)
1965  tg->gomp_data = data;
1966 }
1967 
1968 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(
1969  uintptr_t *data) {
1970  int gtid = __kmp_entry_gtid();
1971  KA_TRACE(20, ("GOMP_taskgroup_reduction_register: T#%d\n", gtid));
1972  kmp_info_t *thread = __kmp_threads[gtid];
1973  kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
1974  int nthreads = thread->th.th_team_nproc;
1975  __kmp_GOMP_taskgroup_reduction_register(data, tg, nthreads);
1976 }
1977 
1978 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)(
1979  uintptr_t *data) {
1980  KA_TRACE(20,
1981  ("GOMP_taskgroup_reduction_unregister: T#%d\n", __kmp_get_gtid()));
1982  KMP_ASSERT(data && data[2]);
1983  __kmp_free((void *)data[2]);
1984 }
1985 
1986 // Search through reduction data and set ptrs[] elements
1987 // to proper privatized copy address
1988 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP)(size_t cnt,
1989  size_t cntorig,
1990  void **ptrs) {
1991  int gtid = __kmp_entry_gtid();
1992  KA_TRACE(20, ("GOMP_task_reduction_remap: T#%d\n", gtid));
1993  kmp_info_t *thread = __kmp_threads[gtid];
1994  kmp_int32 tid = __kmp_get_tid();
1995  for (size_t i = 0; i < cnt; ++i) {
1996  uintptr_t address = (uintptr_t)ptrs[i];
1997  void *propagated_address = NULL;
1998  void *mapped_address = NULL;
1999  // Check taskgroups reduce data
2000  kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2001  while (tg) {
2002  uintptr_t *gomp_data = tg->gomp_data;
2003  if (!gomp_data) {
2004  tg = tg->parent;
2005  continue;
2006  }
2007  // Check the shared addresses list
2008  size_t num_vars = (size_t)gomp_data[0];
2009  uintptr_t per_thread_size = gomp_data[1];
2010  uintptr_t reduce_data = gomp_data[2];
2011  uintptr_t end_reduce_data = gomp_data[6];
2012  for (size_t j = 0; j < num_vars; ++j) {
2013  uintptr_t *entry = gomp_data + 7 + 3 * j;
2014  if (entry[0] == address) {
2015  uintptr_t offset = entry[1];
2016  mapped_address =
2017  (void *)(reduce_data + tid * per_thread_size + offset);
2018  if (i < cntorig)
2019  propagated_address = (void *)entry[0];
2020  break;
2021  }
2022  }
2023  if (mapped_address)
2024  break;
2025  // Check if address is within privatized copies range
2026  if (!mapped_address && address >= reduce_data &&
2027  address < end_reduce_data) {
2028  uintptr_t offset = (address - reduce_data) % per_thread_size;
2029  mapped_address = (void *)(reduce_data + tid * per_thread_size + offset);
2030  if (i < cntorig) {
2031  for (size_t j = 0; j < num_vars; ++j) {
2032  uintptr_t *entry = gomp_data + 7 + 3 * j;
2033  if (entry[1] == offset) {
2034  propagated_address = (void *)entry[0];
2035  break;
2036  }
2037  }
2038  }
2039  }
2040  if (mapped_address)
2041  break;
2042  tg = tg->parent;
2043  }
2044  KMP_ASSERT(mapped_address);
2045  ptrs[i] = mapped_address;
2046  if (i < cntorig) {
2047  KMP_ASSERT(propagated_address);
2048  ptrs[cnt + i] = propagated_address;
2049  }
2050  }
2051 }
2052 
2053 static void __kmp_GOMP_init_reductions(int gtid, uintptr_t *data, int is_ws) {
2054  kmp_info_t *thr = __kmp_threads[gtid];
2055  kmp_team_t *team = thr->th.th_team;
2056  // First start a taskgroup
2057  __kmpc_taskgroup(NULL, gtid);
2058  // Then setup reduction data
2059  void *reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2060  if (reduce_data == NULL &&
2061  __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2062  (void *)1)) {
2063  // Single thread enters this block to initialize common reduction data
2064  KMP_DEBUG_ASSERT(reduce_data == NULL);
2065  __kmp_GOMP_taskgroup_reduction_register(data, NULL, thr->th.th_team_nproc);
2066  KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[is_ws], 0);
2067  KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], (void *)data);
2068  } else {
2069  // Wait for task reduction initialization
2070  while ((reduce_data = KMP_ATOMIC_LD_ACQ(
2071  &team->t.t_tg_reduce_data[is_ws])) == (void *)1) {
2072  KMP_CPU_PAUSE();
2073  }
2074  KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here
2075  }
2076  // For worksharing constructs, each thread has its own reduction structure.
2077  // Have each reduction structure point to same privatized copies of vars.
2078  // For parallel, each thread points to same reduction structure and privatized
2079  // copies of vars
2080  if (is_ws) {
2081  __kmp_GOMP_taskgroup_reduction_register(
2082  data, NULL, thr->th.th_team_nproc,
2083  (uintptr_t *)KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws]));
2084  }
2085  kmp_taskgroup_t *tg = thr->th.th_current_task->td_taskgroup;
2086  tg->gomp_data = data;
2087 }
2088 
2089 static unsigned
2090 __kmp_GOMP_par_reductions_microtask_wrapper(int *gtid, int *npr,
2091  void (*task)(void *), void *data) {
2092  kmp_info_t *thr = __kmp_threads[*gtid];
2093  kmp_team_t *team = thr->th.th_team;
2094  uintptr_t *reduce_data = *(uintptr_t **)data;
2095  __kmp_GOMP_init_reductions(*gtid, reduce_data, 0);
2096 
2097 #if OMPT_SUPPORT
2098  ompt_frame_t *ompt_frame;
2099  ompt_state_t enclosing_state;
2100 
2101  if (ompt_enabled.enabled) {
2102  // save enclosing task state; set current state for task
2103  enclosing_state = thr->th.ompt_thread_info.state;
2104  thr->th.ompt_thread_info.state = ompt_state_work_parallel;
2105 
2106  // set task frame
2107  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2108  ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2109  }
2110 #endif
2111 
2112  task(data);
2113 
2114 #if OMPT_SUPPORT
2115  if (ompt_enabled.enabled) {
2116  // clear task frame
2117  ompt_frame->exit_frame = ompt_data_none;
2118 
2119  // restore enclosing state
2120  thr->th.ompt_thread_info.state = enclosing_state;
2121  }
2122 #endif
2123  __kmpc_end_taskgroup(NULL, *gtid);
2124  // if last thread out, then reset the team's reduce data
2125  // the GOMP_taskgroup_reduction_unregister() function will deallocate
2126  // private copies after reduction calculations take place.
2127  int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[0]);
2128  if (count == thr->th.th_team_nproc - 1) {
2129  KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[0], NULL);
2130  KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[0], 0);
2131  }
2132  return (unsigned)thr->th.th_team_nproc;
2133 }
2134 
2135 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS)(
2136  void (*task)(void *), void *data, unsigned num_threads,
2137  unsigned int flags) {
2138  MKLOC(loc, "GOMP_parallel_reductions");
2139  int gtid = __kmp_entry_gtid();
2140  KA_TRACE(20, ("GOMP_parallel_reductions: T#%d\n", gtid));
2141  __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
2142  (microtask_t)__kmp_GOMP_par_reductions_microtask_wrapper,
2143  2, task, data);
2144  unsigned retval =
2145  __kmp_GOMP_par_reductions_microtask_wrapper(&gtid, NULL, task, data);
2146  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
2147  KA_TRACE(20, ("GOMP_parallel_reductions exit: T#%d\n", gtid));
2148  return retval;
2149 }
2150 
2151 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_START)(
2152  long start, long end, long incr, long sched, long chunk_size, long *istart,
2153  long *iend, uintptr_t *reductions, void **mem) {
2154  int status = 0;
2155  int gtid = __kmp_entry_gtid();
2156  KA_TRACE(20, ("GOMP_loop_start: T#%d, reductions: %p\n", gtid, reductions));
2157  if (reductions)
2158  __kmp_GOMP_init_reductions(gtid, reductions, 1);
2159  if (mem)
2160  KMP_FATAL(GompFeatureNotSupported, "scan");
2161  if (istart == NULL)
2162  return true;
2163  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2164  long monotonic = sched & MONOTONIC_FLAG;
2165  sched &= ~MONOTONIC_FLAG;
2166  if (sched == 0) {
2167  if (monotonic)
2168  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START)(
2169  start, end, incr, istart, iend);
2170  else
2171  status = KMP_EXPAND_NAME(
2172  KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START)(
2173  start, end, incr, istart, iend);
2174  } else if (sched == 1) {
2175  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START)(
2176  start, end, incr, chunk_size, istart, iend);
2177  } else if (sched == 2) {
2178  if (monotonic)
2179  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START)(
2180  start, end, incr, chunk_size, istart, iend);
2181  else
2182  status =
2183  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START)(
2184  start, end, incr, chunk_size, istart, iend);
2185  } else if (sched == 3) {
2186  if (monotonic)
2187  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START)(
2188  start, end, incr, chunk_size, istart, iend);
2189  else
2190  status =
2191  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START)(
2192  start, end, incr, chunk_size, istart, iend);
2193  } else if (sched == 4) {
2194  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START)(
2195  start, end, incr, istart, iend);
2196  } else {
2197  KMP_ASSERT(0);
2198  }
2199  return status;
2200 }
2201 
2202 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_START)(
2203  bool up, unsigned long long start, unsigned long long end,
2204  unsigned long long incr, long sched, unsigned long long chunk_size,
2205  unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,
2206  void **mem) {
2207  int status = 0;
2208  int gtid = __kmp_entry_gtid();
2209  KA_TRACE(20,
2210  ("GOMP_loop_ull_start: T#%d, reductions: %p\n", gtid, reductions));
2211  if (reductions)
2212  __kmp_GOMP_init_reductions(gtid, reductions, 1);
2213  if (mem)
2214  KMP_FATAL(GompFeatureNotSupported, "scan");
2215  if (istart == NULL)
2216  return true;
2217  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2218  long monotonic = sched & MONOTONIC_FLAG;
2219  sched &= ~MONOTONIC_FLAG;
2220  if (sched == 0) {
2221  if (monotonic)
2222  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START)(
2223  up, start, end, incr, istart, iend);
2224  else
2225  status = KMP_EXPAND_NAME(
2226  KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START)(
2227  up, start, end, incr, istart, iend);
2228  } else if (sched == 1) {
2229  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START)(
2230  up, start, end, incr, chunk_size, istart, iend);
2231  } else if (sched == 2) {
2232  if (monotonic)
2233  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START)(
2234  up, start, end, incr, chunk_size, istart, iend);
2235  else
2236  status = KMP_EXPAND_NAME(
2237  KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START)(
2238  up, start, end, incr, chunk_size, istart, iend);
2239  } else if (sched == 3) {
2240  if (monotonic)
2241  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START)(
2242  up, start, end, incr, chunk_size, istart, iend);
2243  else
2244  status =
2245  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START)(
2246  up, start, end, incr, chunk_size, istart, iend);
2247  } else if (sched == 4) {
2248  status =
2249  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START)(
2250  up, start, end, incr, istart, iend);
2251  } else {
2252  KMP_ASSERT(0);
2253  }
2254  return status;
2255 }
2256 
2257 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_START)(
2258  unsigned ncounts, long *counts, long sched, long chunk_size, long *istart,
2259  long *iend, uintptr_t *reductions, void **mem) {
2260  int status = 0;
2261  int gtid = __kmp_entry_gtid();
2262  KA_TRACE(20, ("GOMP_loop_doacross_start: T#%d, reductions: %p\n", gtid,
2263  reductions));
2264  if (reductions)
2265  __kmp_GOMP_init_reductions(gtid, reductions, 1);
2266  if (mem)
2267  KMP_FATAL(GompFeatureNotSupported, "scan");
2268  if (istart == NULL)
2269  return true;
2270  // Ignore any monotonic flag
2271  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2272  sched &= ~MONOTONIC_FLAG;
2273  if (sched == 0) {
2274  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START)(
2275  ncounts, counts, istart, iend);
2276  } else if (sched == 1) {
2277  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START)(
2278  ncounts, counts, chunk_size, istart, iend);
2279  } else if (sched == 2) {
2280  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START)(
2281  ncounts, counts, chunk_size, istart, iend);
2282  } else if (sched == 3) {
2283  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START)(
2284  ncounts, counts, chunk_size, istart, iend);
2285  } else {
2286  KMP_ASSERT(0);
2287  }
2288  return status;
2289 }
2290 
2291 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START)(
2292  unsigned ncounts, unsigned long long *counts, long sched,
2293  unsigned long long chunk_size, unsigned long long *istart,
2294  unsigned long long *iend, uintptr_t *reductions, void **mem) {
2295  int status = 0;
2296  int gtid = __kmp_entry_gtid();
2297  KA_TRACE(20, ("GOMP_loop_ull_doacross_start: T#%d, reductions: %p\n", gtid,
2298  reductions));
2299  if (reductions)
2300  __kmp_GOMP_init_reductions(gtid, reductions, 1);
2301  if (mem)
2302  KMP_FATAL(GompFeatureNotSupported, "scan");
2303  if (istart == NULL)
2304  return true;
2305  // Ignore any monotonic flag
2306  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2307  sched &= ~MONOTONIC_FLAG;
2308  if (sched == 0) {
2309  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START)(
2310  ncounts, counts, istart, iend);
2311  } else if (sched == 1) {
2312  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START)(
2313  ncounts, counts, chunk_size, istart, iend);
2314  } else if (sched == 2) {
2315  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START)(
2316  ncounts, counts, chunk_size, istart, iend);
2317  } else if (sched == 3) {
2318  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START)(
2319  ncounts, counts, chunk_size, istart, iend);
2320  } else {
2321  KMP_ASSERT(0);
2322  }
2323  return status;
2324 }
2325 
2326 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_START)(
2327  long start, long end, long incr, long sched, long chunk_size, long *istart,
2328  long *iend, uintptr_t *reductions, void **mem) {
2329  int status = 0;
2330  int gtid = __kmp_entry_gtid();
2331  KA_TRACE(20, ("GOMP_loop_ordered_start: T#%d, reductions: %p\n", gtid,
2332  reductions));
2333  if (reductions)
2334  __kmp_GOMP_init_reductions(gtid, reductions, 1);
2335  if (mem)
2336  KMP_FATAL(GompFeatureNotSupported, "scan");
2337  if (istart == NULL)
2338  return true;
2339  // Ignore any monotonic flag
2340  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2341  sched &= ~MONOTONIC_FLAG;
2342  if (sched == 0) {
2343  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START)(
2344  start, end, incr, istart, iend);
2345  } else if (sched == 1) {
2346  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START)(
2347  start, end, incr, chunk_size, istart, iend);
2348  } else if (sched == 2) {
2349  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START)(
2350  start, end, incr, chunk_size, istart, iend);
2351  } else if (sched == 3) {
2352  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START)(
2353  start, end, incr, chunk_size, istart, iend);
2354  } else {
2355  KMP_ASSERT(0);
2356  }
2357  return status;
2358 }
2359 
2360 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START)(
2361  bool up, unsigned long long start, unsigned long long end,
2362  unsigned long long incr, long sched, unsigned long long chunk_size,
2363  unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,
2364  void **mem) {
2365  int status = 0;
2366  int gtid = __kmp_entry_gtid();
2367  KA_TRACE(20, ("GOMP_loop_ull_ordered_start: T#%d, reductions: %p\n", gtid,
2368  reductions));
2369  if (reductions)
2370  __kmp_GOMP_init_reductions(gtid, reductions, 1);
2371  if (mem)
2372  KMP_FATAL(GompFeatureNotSupported, "scan");
2373  if (istart == NULL)
2374  return true;
2375  // Ignore any monotonic flag
2376  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2377  sched &= ~MONOTONIC_FLAG;
2378  if (sched == 0) {
2379  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START)(
2380  up, start, end, incr, istart, iend);
2381  } else if (sched == 1) {
2382  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START)(
2383  up, start, end, incr, chunk_size, istart, iend);
2384  } else if (sched == 2) {
2385  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START)(
2386  up, start, end, incr, chunk_size, istart, iend);
2387  } else if (sched == 3) {
2388  status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START)(
2389  up, start, end, incr, chunk_size, istart, iend);
2390  } else {
2391  KMP_ASSERT(0);
2392  }
2393  return status;
2394 }
2395 
2396 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS2_START)(
2397  unsigned count, uintptr_t *reductions, void **mem) {
2398  int gtid = __kmp_entry_gtid();
2399  KA_TRACE(20,
2400  ("GOMP_sections2_start: T#%d, reductions: %p\n", gtid, reductions));
2401  if (reductions)
2402  __kmp_GOMP_init_reductions(gtid, reductions, 1);
2403  if (mem)
2404  KMP_FATAL(GompFeatureNotSupported, "scan");
2405  return KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(count);
2406 }
2407 
2408 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER)(
2409  bool cancelled) {
2410  int gtid = __kmp_get_gtid();
2411  MKLOC(loc, "GOMP_workshare_task_reduction_unregister");
2412  KA_TRACE(20, ("GOMP_workshare_task_reduction_unregister: T#%d\n", gtid));
2413  kmp_info_t *thr = __kmp_threads[gtid];
2414  kmp_team_t *team = thr->th.th_team;
2415  __kmpc_end_taskgroup(NULL, gtid);
2416  // If last thread out of workshare, then reset the team's reduce data
2417  // the GOMP_taskgroup_reduction_unregister() function will deallocate
2418  // private copies after reduction calculations take place.
2419  int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[1]);
2420  if (count == thr->th.th_team_nproc - 1) {
2421  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)
2422  ((uintptr_t *)KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[1]));
2423  KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[1], NULL);
2424  KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[1], 0);
2425  }
2426  if (!cancelled) {
2427  __kmpc_barrier(&loc, gtid);
2428  }
2429 }
2430 
2431 /* The following sections of code create aliases for the GOMP_* functions, then
2432  create versioned symbols using the assembler directive .symver. This is only
2433  pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in
2434  kmp_os.h */
2435 
2436 #ifdef KMP_USE_VERSION_SYMBOLS
2437 // GOMP_1.0 versioned symbols
2438 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0");
2439 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0");
2440 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0");
2441 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0");
2442 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0");
2443 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0");
2444 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0");
2445 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0");
2446 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
2447 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0");
2448 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0");
2449 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0");
2450 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0");
2451 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0");
2452 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10,
2453  "GOMP_1.0");
2454 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0");
2455 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0");
2456 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0");
2457 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10,
2458  "GOMP_1.0");
2459 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0");
2460 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0");
2461 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0");
2462 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0");
2463 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0");
2464 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0");
2465 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0");
2466 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0");
2467 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0");
2468 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10,
2469  "GOMP_1.0");
2470 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10,
2471  "GOMP_1.0");
2472 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10,
2473  "GOMP_1.0");
2474 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10,
2475  "GOMP_1.0");
2476 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0");
2477 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0");
2478 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0");
2479 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0");
2480 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0");
2481 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0");
2482 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0");
2483 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0");
2484 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0");
2485 
2486 // GOMP_2.0 versioned symbols
2487 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0");
2488 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0");
2489 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0");
2490 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0");
2491 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0");
2492 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0");
2493 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20,
2494  "GOMP_2.0");
2495 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20,
2496  "GOMP_2.0");
2497 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20,
2498  "GOMP_2.0");
2499 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20,
2500  "GOMP_2.0");
2501 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20,
2502  "GOMP_2.0");
2503 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20,
2504  "GOMP_2.0");
2505 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20,
2506  "GOMP_2.0");
2507 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20,
2508  "GOMP_2.0");
2509 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0");
2510 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0");
2511 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0");
2512 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0");
2513 
2514 // GOMP_3.0 versioned symbols
2515 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0");
2516 
2517 // GOMP_4.0 versioned symbols
2518 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0");
2519 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0");
2520 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0");
2521 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0");
2522 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0");
2523 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0");
2524 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0");
2525 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0");
2526 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0");
2527 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0");
2528 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0");
2529 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0");
2530 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0");
2531 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0");
2532 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0");
2533 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0");
2534 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0");
2535 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0");
2536 
2537 // GOMP_4.5 versioned symbols
2538 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP, 45, "GOMP_4.5");
2539 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP_ULL, 45, "GOMP_4.5");
2540 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_POST, 45, "GOMP_4.5");
2541 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_WAIT, 45, "GOMP_4.5");
2542 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START, 45,
2543  "GOMP_4.5");
2544 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START, 45,
2545  "GOMP_4.5");
2546 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START, 45,
2547  "GOMP_4.5");
2548 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START, 45,
2549  "GOMP_4.5");
2550 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_POST, 45, "GOMP_4.5");
2551 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT, 45, "GOMP_4.5");
2552 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START, 45,
2553  "GOMP_4.5");
2554 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START, 45,
2555  "GOMP_4.5");
2556 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45,
2557  "GOMP_4.5");
2558 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45,
2559  "GOMP_4.5");
2560 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45,
2561  "GOMP_4.5");
2562 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45,
2563  "GOMP_4.5");
2564 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45,
2565  "GOMP_4.5");
2566 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45,
2567  "GOMP_4.5");
2568 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45,
2569  "GOMP_4.5");
2570 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45,
2571  "GOMP_4.5");
2572 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45,
2573  "GOMP_4.5");
2574 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45,
2575  "GOMP_4.5");
2576 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45,
2577  "GOMP_4.5");
2578 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45,
2579  "GOMP_4.5");
2580 
2581 // GOMP_5.0 versioned symbols
2582 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT, 50,
2583  "GOMP_5.0");
2584 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START, 50,
2585  "GOMP_5.0");
2586 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT, 50,
2587  "GOMP_5.0");
2588 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START, 50,
2589  "GOMP_5.0");
2590 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT,
2591  50, "GOMP_5.0");
2592 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START,
2593  50, "GOMP_5.0");
2594 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT, 50,
2595  "GOMP_5.0");
2596 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START, 50,
2597  "GOMP_5.0");
2598 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50,
2599  "GOMP_5.0");
2600 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME,
2601  50, "GOMP_5.0");
2602 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0");
2603 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT_DEPEND, 50, "GOMP_5.0");
2604 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER, 50,
2605  "GOMP_5.0");
2606 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER, 50,
2607  "GOMP_5.0");
2608 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP, 50, "GOMP_5.0");
2609 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS, 50, "GOMP_5.0");
2610 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_START, 50, "GOMP_5.0");
2611 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_START, 50, "GOMP_5.0");
2612 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_START, 50, "GOMP_5.0");
2613 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START, 50, "GOMP_5.0");
2614 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_START, 50, "GOMP_5.0");
2615 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START, 50, "GOMP_5.0");
2616 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS2_START, 50, "GOMP_5.0");
2617 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, 50,
2618  "GOMP_5.0");
2619 #endif // KMP_USE_VERSION_SYMBOLS
2620 
2621 #ifdef __cplusplus
2622 } // extern "C"
2623 #endif // __cplusplus
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
sched_type
Definition: kmp.h:355
KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
@ kmp_sch_static
Definition: kmp.h:358
@ kmp_sch_guided_chunked
Definition: kmp.h:360
@ kmp_ord_static
Definition: kmp.h:384
Definition: kmp.h:233