14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
35 #include "ompd-specific.h"
38 #if OMP_PROFILING_SUPPORT
39 #include "llvm/Support/TimeProfiler.h"
40 static char *ProfileTraceFile =
nullptr;
44 #define KMP_USE_PRCTL 0
59 #if defined(KMP_GOMP_COMPAT)
60 char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64 char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68 char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77 kmp_info_t __kmp_monitor;
82 void __kmp_cleanup(
void);
84 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89 #if KMP_AFFINITY_SUPPORTED
90 static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93 static void __kmp_do_serial_initialize(
void);
94 void __kmp_fork_barrier(
int gtid,
int tid);
95 void __kmp_join_barrier(
int gtid);
96 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99 #ifdef USE_LOAD_BALANCE
100 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103 static int __kmp_expand_threads(
int nNeed);
105 static int __kmp_unregister_root_other_thread(
int gtid);
107 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
113 int __kmp_get_global_thread_id() {
115 kmp_info_t **other_threads;
123 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
124 __kmp_nth, __kmp_all_nth));
131 if (!TCR_4(__kmp_init_gtid))
134 #ifdef KMP_TDATA_GTID
135 if (TCR_4(__kmp_gtid_mode) >= 3) {
136 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
140 if (TCR_4(__kmp_gtid_mode) >= 2) {
141 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
142 return __kmp_gtid_get_specific();
144 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
146 stack_addr = (
char *)&stack_data;
147 other_threads = __kmp_threads;
160 for (i = 0; i < __kmp_threads_capacity; i++) {
162 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
166 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
167 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
171 if (stack_addr <= stack_base) {
172 size_t stack_diff = stack_base - stack_addr;
174 if (stack_diff <= stack_size) {
177 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
185 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
186 "thread, using TLS\n"));
187 i = __kmp_gtid_get_specific();
197 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
198 KMP_FATAL(StackOverflow, i);
201 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
202 if (stack_addr > stack_base) {
203 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
204 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
205 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 stack_base - stack_addr);
213 if (__kmp_storage_map) {
214 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
215 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
216 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
217 other_threads[i]->th.th_info.ds.ds_stacksize,
218 "th_%d stack (refinement)", i);
223 int __kmp_get_global_thread_id_reg() {
226 if (!__kmp_init_serial) {
229 #ifdef KMP_TDATA_GTID
230 if (TCR_4(__kmp_gtid_mode) >= 3) {
231 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
235 if (TCR_4(__kmp_gtid_mode) >= 2) {
236 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
237 gtid = __kmp_gtid_get_specific();
240 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
241 gtid = __kmp_get_global_thread_id();
245 if (gtid == KMP_GTID_DNE) {
247 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
248 "Registering a new gtid.\n"));
249 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
250 if (!__kmp_init_serial) {
251 __kmp_do_serial_initialize();
252 gtid = __kmp_gtid_get_specific();
254 gtid = __kmp_register_root(FALSE);
256 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
260 KMP_DEBUG_ASSERT(gtid >= 0);
266 void __kmp_check_stack_overlap(kmp_info_t *th) {
268 char *stack_beg = NULL;
269 char *stack_end = NULL;
272 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
273 if (__kmp_storage_map) {
274 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
275 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
277 gtid = __kmp_gtid_from_thread(th);
279 if (gtid == KMP_GTID_MONITOR) {
280 __kmp_print_storage_map_gtid(
281 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
282 "th_%s stack (%s)",
"mon",
283 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
285 __kmp_print_storage_map_gtid(
286 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
287 "th_%d stack (%s)", gtid,
288 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
294 gtid = __kmp_gtid_from_thread(th);
295 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
297 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
298 if (stack_beg == NULL) {
299 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
300 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
303 for (f = 0; f < __kmp_threads_capacity; f++) {
304 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
306 if (f_th && f_th != th) {
307 char *other_stack_end =
308 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
309 char *other_stack_beg =
310 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
311 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
312 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
315 if (__kmp_storage_map)
316 __kmp_print_storage_map_gtid(
317 -1, other_stack_beg, other_stack_end,
318 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
319 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
321 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
327 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
332 void __kmp_infinite_loop(
void) {
333 static int done = FALSE;
340 #define MAX_MESSAGE 512
342 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
343 char const *format, ...) {
344 char buffer[MAX_MESSAGE];
347 va_start(ap, format);
348 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
349 p2, (
unsigned long)size, format);
350 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
351 __kmp_vprintf(kmp_err, buffer, ap);
352 #if KMP_PRINT_DATA_PLACEMENT
355 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
356 if (__kmp_storage_map_verbose) {
357 node = __kmp_get_host_node(p1);
359 __kmp_storage_map_verbose = FALSE;
363 int localProc = __kmp_get_cpu_from_gtid(gtid);
365 const int page_size = KMP_GET_PAGE_SIZE();
367 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
368 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
370 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
373 __kmp_printf_no_lock(
" GTID %d\n", gtid);
382 (
char *)p1 += page_size;
383 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
384 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
389 (
char *)p1 + (page_size - 1),
390 __kmp_get_host_node(p1));
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
393 (
char *)p2 + (page_size - 1),
394 __kmp_get_host_node(p2));
400 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
403 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
406 void __kmp_warn(
char const *format, ...) {
407 char buffer[MAX_MESSAGE];
410 if (__kmp_generate_warnings == kmp_warnings_off) {
414 va_start(ap, format);
416 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
417 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
418 __kmp_vprintf(kmp_err, buffer, ap);
419 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
424 void __kmp_abort_process() {
426 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
428 if (__kmp_debug_buf) {
429 __kmp_dump_debug_buffer();
432 if (KMP_OS_WINDOWS) {
435 __kmp_global.g.g_abort = SIGABRT;
449 __kmp_unregister_library();
453 __kmp_infinite_loop();
454 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
458 void __kmp_abort_thread(
void) {
461 __kmp_infinite_loop();
467 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
468 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
471 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
472 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
475 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
477 __kmp_print_storage_map_gtid(
478 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
479 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
481 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
482 &thr->th.th_bar[bs_plain_barrier + 1],
483 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
486 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
487 &thr->th.th_bar[bs_forkjoin_barrier + 1],
488 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
491 #if KMP_FAST_REDUCTION_BARRIER
492 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
493 &thr->th.th_bar[bs_reduction_barrier + 1],
494 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
502 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
503 int team_id,
int num_thr) {
504 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
505 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
508 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
509 &team->t.t_bar[bs_last_barrier],
510 sizeof(kmp_balign_team_t) * bs_last_barrier,
511 "%s_%d.t_bar", header, team_id);
513 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
514 &team->t.t_bar[bs_plain_barrier + 1],
515 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
518 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
519 &team->t.t_bar[bs_forkjoin_barrier + 1],
520 sizeof(kmp_balign_team_t),
521 "%s_%d.t_bar[forkjoin]", header, team_id);
523 #if KMP_FAST_REDUCTION_BARRIER
524 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
525 &team->t.t_bar[bs_reduction_barrier + 1],
526 sizeof(kmp_balign_team_t),
527 "%s_%d.t_bar[reduction]", header, team_id);
530 __kmp_print_storage_map_gtid(
531 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
532 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
536 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
538 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
539 &team->t.t_disp_buffer[num_disp_buff],
540 sizeof(dispatch_shared_info_t) * num_disp_buff,
541 "%s_%d.t_disp_buffer", header, team_id);
544 static void __kmp_init_allocator() {
545 __kmp_init_memkind();
546 __kmp_init_target_mem();
548 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
555 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
560 case DLL_PROCESS_ATTACH:
561 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
565 case DLL_PROCESS_DETACH:
566 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
579 if (lpReserved == NULL)
580 __kmp_internal_end_library(__kmp_gtid_get_specific());
584 case DLL_THREAD_ATTACH:
585 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
591 case DLL_THREAD_DETACH:
592 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
594 __kmp_internal_end_thread(__kmp_gtid_get_specific());
605 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
606 int gtid = *gtid_ref;
607 #ifdef BUILD_PARALLEL_ORDERED
608 kmp_team_t *team = __kmp_team_from_gtid(gtid);
611 if (__kmp_env_consistency_check) {
612 if (__kmp_threads[gtid]->th.th_root->r.r_active)
613 #if KMP_USE_DYNAMIC_LOCK
614 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
616 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
619 #ifdef BUILD_PARALLEL_ORDERED
620 if (!team->t.t_serialized) {
622 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
630 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
631 int gtid = *gtid_ref;
632 #ifdef BUILD_PARALLEL_ORDERED
633 int tid = __kmp_tid_from_gtid(gtid);
634 kmp_team_t *team = __kmp_team_from_gtid(gtid);
637 if (__kmp_env_consistency_check) {
638 if (__kmp_threads[gtid]->th.th_root->r.r_active)
639 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
641 #ifdef BUILD_PARALLEL_ORDERED
642 if (!team->t.t_serialized) {
647 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
657 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
662 if (!TCR_4(__kmp_init_parallel))
663 __kmp_parallel_initialize();
664 __kmp_resume_if_soft_paused();
666 th = __kmp_threads[gtid];
667 team = th->th.th_team;
670 th->th.th_ident = id_ref;
672 if (team->t.t_serialized) {
675 kmp_int32 old_this = th->th.th_local.this_construct;
677 ++th->th.th_local.this_construct;
681 if (team->t.t_construct == old_this) {
682 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
683 th->th.th_local.this_construct);
686 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
687 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
688 team->t.t_active_level == 1) {
690 __kmp_itt_metadata_single(id_ref);
695 if (__kmp_env_consistency_check) {
696 if (status && push_ws) {
697 __kmp_push_workshare(gtid, ct_psingle, id_ref);
699 __kmp_check_workshare(gtid, ct_psingle, id_ref);
704 __kmp_itt_single_start(gtid);
710 void __kmp_exit_single(
int gtid) {
712 __kmp_itt_single_end(gtid);
714 if (__kmp_env_consistency_check)
715 __kmp_pop_workshare(gtid, ct_psingle, NULL);
724 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
725 int master_tid,
int set_nthreads,
729 KMP_DEBUG_ASSERT(__kmp_init_serial);
730 KMP_DEBUG_ASSERT(root && parent_team);
731 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
735 new_nthreads = set_nthreads;
736 if (!get__dynamic_2(parent_team, master_tid)) {
739 #ifdef USE_LOAD_BALANCE
740 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
741 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
742 if (new_nthreads == 1) {
743 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
744 "reservation to 1 thread\n",
748 if (new_nthreads < set_nthreads) {
749 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
750 "reservation to %d threads\n",
751 master_tid, new_nthreads));
755 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
756 new_nthreads = __kmp_avail_proc - __kmp_nth +
757 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
758 if (new_nthreads <= 1) {
759 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
760 "reservation to 1 thread\n",
764 if (new_nthreads < set_nthreads) {
765 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
766 "reservation to %d threads\n",
767 master_tid, new_nthreads));
769 new_nthreads = set_nthreads;
771 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
772 if (set_nthreads > 2) {
773 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
774 new_nthreads = (new_nthreads % set_nthreads) + 1;
775 if (new_nthreads == 1) {
776 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
777 "reservation to 1 thread\n",
781 if (new_nthreads < set_nthreads) {
782 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
783 "reservation to %d threads\n",
784 master_tid, new_nthreads));
792 if (__kmp_nth + new_nthreads -
793 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
795 int tl_nthreads = __kmp_max_nth - __kmp_nth +
796 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
797 if (tl_nthreads <= 0) {
802 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
803 __kmp_reserve_warn = 1;
804 __kmp_msg(kmp_ms_warning,
805 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
806 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
808 if (tl_nthreads == 1) {
809 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
810 "reduced reservation to 1 thread\n",
814 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
815 "reservation to %d threads\n",
816 master_tid, tl_nthreads));
817 new_nthreads = tl_nthreads;
821 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
822 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
823 if (cg_nthreads + new_nthreads -
824 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
826 int tl_nthreads = max_cg_threads - cg_nthreads +
827 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
828 if (tl_nthreads <= 0) {
833 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
834 __kmp_reserve_warn = 1;
835 __kmp_msg(kmp_ms_warning,
836 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
837 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
839 if (tl_nthreads == 1) {
840 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
841 "reduced reservation to 1 thread\n",
845 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
846 "reservation to %d threads\n",
847 master_tid, tl_nthreads));
848 new_nthreads = tl_nthreads;
854 capacity = __kmp_threads_capacity;
855 if (TCR_PTR(__kmp_threads[0]) == NULL) {
861 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
862 capacity -= __kmp_hidden_helper_threads_num;
864 if (__kmp_nth + new_nthreads -
865 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
868 int slotsRequired = __kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
871 int slotsAdded = __kmp_expand_threads(slotsRequired);
872 if (slotsAdded < slotsRequired) {
874 new_nthreads -= (slotsRequired - slotsAdded);
875 KMP_ASSERT(new_nthreads >= 1);
878 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
879 __kmp_reserve_warn = 1;
880 if (__kmp_tp_cached) {
881 __kmp_msg(kmp_ms_warning,
882 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
883 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
884 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
886 __kmp_msg(kmp_ms_warning,
887 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
888 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
895 if (new_nthreads == 1) {
897 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
898 "dead roots and rechecking; requested %d threads\n",
899 __kmp_get_gtid(), set_nthreads));
901 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
903 __kmp_get_gtid(), new_nthreads, set_nthreads));
912 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
913 kmp_info_t *master_th,
int master_gtid) {
917 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
918 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
922 master_th->th.th_info.ds.ds_tid = 0;
923 master_th->th.th_team = team;
924 master_th->th.th_team_nproc = team->t.t_nproc;
925 master_th->th.th_team_master = master_th;
926 master_th->th.th_team_serialized = FALSE;
927 master_th->th.th_dispatch = &team->t.t_dispatch[0];
930 #if KMP_NESTED_HOT_TEAMS
932 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
935 int level = team->t.t_active_level - 1;
936 if (master_th->th.th_teams_microtask) {
937 if (master_th->th.th_teams_size.nteams > 1) {
941 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
942 master_th->th.th_teams_level == team->t.t_level) {
947 if (level < __kmp_hot_teams_max_level) {
948 if (hot_teams[level].hot_team) {
950 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
954 hot_teams[level].hot_team = team;
955 hot_teams[level].hot_team_nth = team->t.t_nproc;
962 use_hot_team = team == root->r.r_hot_team;
967 team->t.t_threads[0] = master_th;
968 __kmp_initialize_info(master_th, team, 0, master_gtid);
971 for (i = 1; i < team->t.t_nproc; i++) {
974 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
975 team->t.t_threads[i] = thr;
976 KMP_DEBUG_ASSERT(thr);
977 KMP_DEBUG_ASSERT(thr->th.th_team == team);
979 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
980 "T#%d(%d:%d) join =%llu, plain=%llu\n",
981 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
982 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
983 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
984 team->t.t_bar[bs_plain_barrier].b_arrived));
985 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
986 thr->th.th_teams_level = master_th->th.th_teams_level;
987 thr->th.th_teams_size = master_th->th.th_teams_size;
990 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
991 for (b = 0; b < bs_last_barrier; ++b) {
992 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
993 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
995 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1001 #if KMP_AFFINITY_SUPPORTED
1002 __kmp_partition_places(team);
1006 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1007 for (i = 0; i < team->t.t_nproc; i++) {
1008 kmp_info_t *thr = team->t.t_threads[i];
1009 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1010 thr->th.th_prev_level != team->t.t_level) {
1011 team->t.t_display_affinity = 1;
1020 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1024 inline static void propagateFPControl(kmp_team_t *team) {
1025 if (__kmp_inherit_fp_control) {
1026 kmp_int16 x87_fpu_control_word;
1030 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1031 __kmp_store_mxcsr(&mxcsr);
1032 mxcsr &= KMP_X86_MXCSR_MASK;
1043 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1044 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1047 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1051 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1057 inline static void updateHWFPControl(kmp_team_t *team) {
1058 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1061 kmp_int16 x87_fpu_control_word;
1063 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1064 __kmp_store_mxcsr(&mxcsr);
1065 mxcsr &= KMP_X86_MXCSR_MASK;
1067 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1068 __kmp_clear_x87_fpu_status_word();
1069 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1072 if (team->t.t_mxcsr != mxcsr) {
1073 __kmp_load_mxcsr(&team->t.t_mxcsr);
1078 #define propagateFPControl(x) ((void)0)
1079 #define updateHWFPControl(x) ((void)0)
1082 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1087 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1088 kmp_info_t *this_thr;
1089 kmp_team_t *serial_team;
1091 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1098 if (!TCR_4(__kmp_init_parallel))
1099 __kmp_parallel_initialize();
1100 __kmp_resume_if_soft_paused();
1102 this_thr = __kmp_threads[global_tid];
1103 serial_team = this_thr->th.th_serial_team;
1106 KMP_DEBUG_ASSERT(serial_team);
1109 if (__kmp_tasking_mode != tskm_immediate_exec) {
1111 this_thr->th.th_task_team ==
1112 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1113 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1115 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1116 "team %p, new task_team = NULL\n",
1117 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1118 this_thr->th.th_task_team = NULL;
1121 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1122 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1123 proc_bind = proc_bind_false;
1124 }
else if (proc_bind == proc_bind_default) {
1127 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1130 this_thr->th.th_set_proc_bind = proc_bind_default;
1133 ompt_data_t ompt_parallel_data = ompt_data_none;
1134 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1135 if (ompt_enabled.enabled &&
1136 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1138 ompt_task_info_t *parent_task_info;
1139 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1141 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1142 if (ompt_enabled.ompt_callback_parallel_begin) {
1145 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1146 &(parent_task_info->task_data), &(parent_task_info->frame),
1147 &ompt_parallel_data, team_size,
1148 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1153 if (this_thr->th.th_team != serial_team) {
1155 int level = this_thr->th.th_team->t.t_level;
1157 if (serial_team->t.t_serialized) {
1160 kmp_team_t *new_team;
1162 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1165 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1169 proc_bind, &this_thr->th.th_current_task->td_icvs,
1170 0 USE_NESTED_HOT_ARG(NULL));
1171 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1172 KMP_ASSERT(new_team);
1175 new_team->t.t_threads[0] = this_thr;
1176 new_team->t.t_parent = this_thr->th.th_team;
1177 serial_team = new_team;
1178 this_thr->th.th_serial_team = serial_team;
1182 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1183 global_tid, serial_team));
1191 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1192 global_tid, serial_team));
1196 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1197 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1198 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1199 serial_team->t.t_ident = loc;
1200 serial_team->t.t_serialized = 1;
1201 serial_team->t.t_nproc = 1;
1202 serial_team->t.t_parent = this_thr->th.th_team;
1203 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1204 this_thr->th.th_team = serial_team;
1205 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1207 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1208 this_thr->th.th_current_task));
1209 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1210 this_thr->th.th_current_task->td_flags.executing = 0;
1212 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1217 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1218 &this_thr->th.th_current_task->td_parent->td_icvs);
1222 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1223 this_thr->th.th_current_task->td_icvs.nproc =
1224 __kmp_nested_nth.nth[level + 1];
1227 if (__kmp_nested_proc_bind.used &&
1228 (level + 1 < __kmp_nested_proc_bind.used)) {
1229 this_thr->th.th_current_task->td_icvs.proc_bind =
1230 __kmp_nested_proc_bind.bind_types[level + 1];
1234 serial_team->t.t_pkfn = (microtask_t)(~0);
1236 this_thr->th.th_info.ds.ds_tid = 0;
1239 this_thr->th.th_team_nproc = 1;
1240 this_thr->th.th_team_master = this_thr;
1241 this_thr->th.th_team_serialized = 1;
1243 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1244 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1245 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1247 propagateFPControl(serial_team);
1250 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1251 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1252 serial_team->t.t_dispatch->th_disp_buffer =
1253 (dispatch_private_info_t *)__kmp_allocate(
1254 sizeof(dispatch_private_info_t));
1256 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1263 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1264 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1265 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1266 ++serial_team->t.t_serialized;
1267 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1270 int level = this_thr->th.th_team->t.t_level;
1273 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1274 this_thr->th.th_current_task->td_icvs.nproc =
1275 __kmp_nested_nth.nth[level + 1];
1277 serial_team->t.t_level++;
1278 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1279 "of serial team %p to %d\n",
1280 global_tid, serial_team, serial_team->t.t_level));
1283 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1285 dispatch_private_info_t *disp_buffer =
1286 (dispatch_private_info_t *)__kmp_allocate(
1287 sizeof(dispatch_private_info_t));
1288 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1289 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1291 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1295 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1299 if (__kmp_display_affinity) {
1300 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1301 this_thr->th.th_prev_num_threads != 1) {
1303 __kmp_aux_display_affinity(global_tid, NULL);
1304 this_thr->th.th_prev_level = serial_team->t.t_level;
1305 this_thr->th.th_prev_num_threads = 1;
1309 if (__kmp_env_consistency_check)
1310 __kmp_push_parallel(global_tid, NULL);
1312 serial_team->t.ompt_team_info.master_return_address = codeptr;
1313 if (ompt_enabled.enabled &&
1314 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1315 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1316 OMPT_GET_FRAME_ADDRESS(0);
1318 ompt_lw_taskteam_t lw_taskteam;
1319 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1320 &ompt_parallel_data, codeptr);
1322 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1326 if (ompt_enabled.ompt_callback_implicit_task) {
1327 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1328 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1329 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1330 ompt_task_implicit);
1331 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1332 __kmp_tid_from_gtid(global_tid);
1336 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1337 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1338 OMPT_GET_FRAME_ADDRESS(0);
1345 int __kmp_fork_call(
ident_t *loc,
int gtid,
1346 enum fork_context_e call_context,
1347 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1352 int master_this_cons;
1354 kmp_team_t *parent_team;
1355 kmp_info_t *master_th;
1359 int master_set_numthreads;
1363 #if KMP_NESTED_HOT_TEAMS
1364 kmp_hot_team_ptr_t **p_hot_teams;
1367 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1370 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1371 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1374 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1376 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1377 __kmp_stkpadding += (short)((kmp_int64)dummy);
1383 if (!TCR_4(__kmp_init_parallel))
1384 __kmp_parallel_initialize();
1385 __kmp_resume_if_soft_paused();
1388 master_th = __kmp_threads[gtid];
1390 parent_team = master_th->th.th_team;
1391 master_tid = master_th->th.th_info.ds.ds_tid;
1392 master_this_cons = master_th->th.th_local.this_construct;
1393 root = master_th->th.th_root;
1394 master_active = root->r.r_active;
1395 master_set_numthreads = master_th->th.th_set_nproc;
1398 ompt_data_t ompt_parallel_data = ompt_data_none;
1399 ompt_data_t *parent_task_data;
1400 ompt_frame_t *ompt_frame;
1401 ompt_data_t *implicit_task_data;
1402 void *return_address = NULL;
1404 if (ompt_enabled.enabled) {
1405 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1407 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1412 __kmp_assign_root_init_mask();
1415 level = parent_team->t.t_level;
1417 active_level = parent_team->t.t_active_level;
1419 teams_level = master_th->th.th_teams_level;
1420 #if KMP_NESTED_HOT_TEAMS
1421 p_hot_teams = &master_th->th.th_hot_teams;
1422 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1423 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1424 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1425 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1427 (*p_hot_teams)[0].hot_team_nth = 1;
1432 if (ompt_enabled.enabled) {
1433 if (ompt_enabled.ompt_callback_parallel_begin) {
1434 int team_size = master_set_numthreads
1435 ? master_set_numthreads
1436 : get__nproc_2(parent_team, master_tid);
1437 int flags = OMPT_INVOKER(call_context) |
1438 ((microtask == (microtask_t)__kmp_teams_master)
1439 ? ompt_parallel_league
1440 : ompt_parallel_team);
1441 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1442 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1445 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1449 master_th->th.th_ident = loc;
1451 if (master_th->th.th_teams_microtask && ap &&
1452 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1456 parent_team->t.t_ident = loc;
1457 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1458 parent_team->t.t_argc = argc;
1459 argv = (
void **)parent_team->t.t_argv;
1460 for (i = argc - 1; i >= 0; --i)
1461 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1463 if (parent_team == master_th->th.th_serial_team) {
1466 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1468 if (call_context == fork_context_gnu) {
1471 parent_team->t.t_serialized--;
1476 parent_team->t.t_pkfn = microtask;
1481 void **exit_frame_p;
1483 ompt_lw_taskteam_t lw_taskteam;
1485 if (ompt_enabled.enabled) {
1486 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1487 &ompt_parallel_data, return_address);
1488 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1490 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1494 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1495 if (ompt_enabled.ompt_callback_implicit_task) {
1496 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1497 __kmp_tid_from_gtid(gtid);
1498 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1499 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1500 implicit_task_data, 1,
1501 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1505 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1507 exit_frame_p = &dummy;
1512 parent_team->t.t_serialized--;
1515 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1516 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1517 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1526 if (ompt_enabled.enabled) {
1527 *exit_frame_p = NULL;
1528 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1529 if (ompt_enabled.ompt_callback_implicit_task) {
1530 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1531 ompt_scope_end, NULL, implicit_task_data, 1,
1532 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1534 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1535 __ompt_lw_taskteam_unlink(master_th);
1536 if (ompt_enabled.ompt_callback_parallel_end) {
1537 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1538 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1539 OMPT_INVOKER(call_context) | ompt_parallel_team,
1542 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1548 parent_team->t.t_pkfn = microtask;
1549 parent_team->t.t_invoke = invoker;
1550 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1551 parent_team->t.t_active_level++;
1552 parent_team->t.t_level++;
1553 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1556 if (ompt_enabled.enabled) {
1557 ompt_lw_taskteam_t lw_taskteam;
1558 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1559 &ompt_parallel_data, return_address);
1560 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1565 if (master_set_numthreads) {
1566 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1568 kmp_info_t **other_threads = parent_team->t.t_threads;
1569 parent_team->t.t_nproc = master_set_numthreads;
1570 for (i = 0; i < master_set_numthreads; ++i) {
1571 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1575 master_th->th.th_set_nproc = 0;
1579 if (__kmp_debugging) {
1580 int nth = __kmp_omp_num_threads(loc);
1582 master_set_numthreads = nth;
1587 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1588 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1590 __kmp_forkjoin_frames_mode == 3 &&
1591 parent_team->t.t_active_level == 1
1592 && master_th->th.th_teams_size.nteams == 1) {
1593 kmp_uint64 tmp_time = __itt_get_timestamp();
1594 master_th->th.th_frame_time = tmp_time;
1595 parent_team->t.t_region_time = tmp_time;
1597 if (__itt_stack_caller_create_ptr) {
1598 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1600 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1604 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1605 "master_th=%p, gtid=%d\n",
1606 root, parent_team, master_th, gtid));
1607 __kmp_internal_fork(loc, gtid, parent_team);
1608 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1609 "master_th=%p, gtid=%d\n",
1610 root, parent_team, master_th, gtid));
1612 if (call_context == fork_context_gnu)
1616 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1617 parent_team->t.t_id, parent_team->t.t_pkfn));
1619 if (!parent_team->t.t_invoke(gtid)) {
1620 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1622 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1623 parent_team->t.t_id, parent_team->t.t_pkfn));
1626 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1632 if (__kmp_tasking_mode != tskm_immediate_exec) {
1633 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1634 parent_team->t.t_task_team[master_th->th.th_task_state]);
1638 int enter_teams = 0;
1639 if (parent_team->t.t_active_level >=
1640 master_th->th.th_current_task->td_icvs.max_active_levels) {
1643 enter_teams = ((ap == NULL && active_level == 0) ||
1644 (ap && teams_level > 0 && teams_level == level));
1646 master_set_numthreads
1647 ? master_set_numthreads
1656 if ((get__max_active_levels(master_th) == 1 &&
1657 (root->r.r_in_parallel && !enter_teams)) ||
1658 (__kmp_library == library_serial)) {
1659 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1667 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1672 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1673 nthreads, enter_teams);
1674 if (nthreads == 1) {
1678 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1682 KMP_DEBUG_ASSERT(nthreads > 0);
1685 master_th->th.th_set_nproc = 0;
1688 if (nthreads == 1) {
1690 #if KMP_OS_LINUX && \
1691 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1694 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1699 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1704 master_th->th.th_serial_team->t.t_pkfn = microtask;
1707 if (call_context == fork_context_intel) {
1709 master_th->th.th_serial_team->t.t_ident = loc;
1712 master_th->th.th_serial_team->t.t_level--;
1717 void **exit_frame_p;
1718 ompt_task_info_t *task_info;
1720 ompt_lw_taskteam_t lw_taskteam;
1722 if (ompt_enabled.enabled) {
1723 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1724 &ompt_parallel_data, return_address);
1726 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1729 task_info = OMPT_CUR_TASK_INFO(master_th);
1730 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1731 if (ompt_enabled.ompt_callback_implicit_task) {
1732 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1733 __kmp_tid_from_gtid(gtid);
1734 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1735 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1736 &(task_info->task_data), 1,
1737 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1738 ompt_task_implicit);
1742 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1744 exit_frame_p = &dummy;
1749 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1750 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1751 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1752 parent_team->t.t_argv
1761 if (ompt_enabled.enabled) {
1762 *exit_frame_p = NULL;
1763 if (ompt_enabled.ompt_callback_implicit_task) {
1764 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1765 ompt_scope_end, NULL, &(task_info->task_data), 1,
1766 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1767 ompt_task_implicit);
1769 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1770 __ompt_lw_taskteam_unlink(master_th);
1771 if (ompt_enabled.ompt_callback_parallel_end) {
1772 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1773 &ompt_parallel_data, parent_task_data,
1774 OMPT_INVOKER(call_context) | ompt_parallel_team,
1777 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1780 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1781 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1782 master_th->th.th_serial_team);
1783 team = master_th->th.th_team;
1785 team->t.t_invoke = invoker;
1786 __kmp_alloc_argv_entries(argc, team, TRUE);
1787 team->t.t_argc = argc;
1788 argv = (
void **)team->t.t_argv;
1790 for (i = argc - 1; i >= 0; --i)
1791 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1793 for (i = 0; i < argc; ++i)
1795 argv[i] = parent_team->t.t_argv[i];
1803 if (ompt_enabled.enabled) {
1804 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1805 if (ompt_enabled.ompt_callback_implicit_task) {
1806 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1807 ompt_scope_end, NULL, &(task_info->task_data), 0,
1808 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1810 if (ompt_enabled.ompt_callback_parallel_end) {
1811 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1812 &ompt_parallel_data, parent_task_data,
1813 OMPT_INVOKER(call_context) | ompt_parallel_league,
1816 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1821 for (i = argc - 1; i >= 0; --i)
1822 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1827 void **exit_frame_p;
1828 ompt_task_info_t *task_info;
1830 ompt_lw_taskteam_t lw_taskteam;
1832 if (ompt_enabled.enabled) {
1833 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1834 &ompt_parallel_data, return_address);
1835 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1837 task_info = OMPT_CUR_TASK_INFO(master_th);
1838 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1841 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1842 if (ompt_enabled.ompt_callback_implicit_task) {
1843 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1844 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1845 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1846 ompt_task_implicit);
1847 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1848 __kmp_tid_from_gtid(gtid);
1852 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1854 exit_frame_p = &dummy;
1859 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1860 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1861 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1870 if (ompt_enabled.enabled) {
1871 *exit_frame_p = NULL;
1872 if (ompt_enabled.ompt_callback_implicit_task) {
1873 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1874 ompt_scope_end, NULL, &(task_info->task_data), 1,
1875 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1876 ompt_task_implicit);
1879 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1880 __ompt_lw_taskteam_unlink(master_th);
1881 if (ompt_enabled.ompt_callback_parallel_end) {
1882 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1883 &ompt_parallel_data, parent_task_data,
1884 OMPT_INVOKER(call_context) | ompt_parallel_team,
1887 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1891 }
else if (call_context == fork_context_gnu) {
1893 ompt_lw_taskteam_t lwt;
1894 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1897 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1898 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1903 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1906 KMP_ASSERT2(call_context < fork_context_last,
1907 "__kmp_fork_call: unknown fork_context parameter");
1910 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1917 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1918 "curtask=%p, curtask_max_aclevel=%d\n",
1919 parent_team->t.t_active_level, master_th,
1920 master_th->th.th_current_task,
1921 master_th->th.th_current_task->td_icvs.max_active_levels));
1925 master_th->th.th_current_task->td_flags.executing = 0;
1927 if (!master_th->th.th_teams_microtask || level > teams_level) {
1929 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1933 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1934 if ((level + 1 < __kmp_nested_nth.used) &&
1935 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1936 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1942 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1943 kmp_proc_bind_t proc_bind_icv =
1945 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1946 proc_bind = proc_bind_false;
1948 if (proc_bind == proc_bind_default) {
1951 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1957 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1958 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1959 master_th->th.th_current_task->td_icvs.proc_bind)) {
1960 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1965 master_th->th.th_set_proc_bind = proc_bind_default;
1967 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
1968 kmp_internal_control_t new_icvs;
1969 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1970 new_icvs.next = NULL;
1971 if (nthreads_icv > 0) {
1972 new_icvs.nproc = nthreads_icv;
1974 if (proc_bind_icv != proc_bind_default) {
1975 new_icvs.proc_bind = proc_bind_icv;
1979 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
1980 team = __kmp_allocate_team(root, nthreads, nthreads,
1984 proc_bind, &new_icvs,
1985 argc USE_NESTED_HOT_ARG(master_th));
1988 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
1989 team = __kmp_allocate_team(root, nthreads, nthreads,
1994 &master_th->th.th_current_task->td_icvs,
1995 argc USE_NESTED_HOT_ARG(master_th));
1998 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2001 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2002 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2003 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2004 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2005 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2007 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2010 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2012 if (!master_th->th.th_teams_microtask || level > teams_level) {
2013 int new_level = parent_team->t.t_level + 1;
2014 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2015 new_level = parent_team->t.t_active_level + 1;
2016 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2019 int new_level = parent_team->t.t_level;
2020 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2021 new_level = parent_team->t.t_active_level;
2022 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2024 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2026 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2028 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2029 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2032 propagateFPControl(team);
2034 if (ompd_state & OMPD_ENABLE_BP)
2035 ompd_bp_parallel_begin();
2038 if (__kmp_tasking_mode != tskm_immediate_exec) {
2041 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2042 parent_team->t.t_task_team[master_th->th.th_task_state]);
2043 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2044 "%p, new task_team %p / team %p\n",
2045 __kmp_gtid_from_thread(master_th),
2046 master_th->th.th_task_team, parent_team,
2047 team->t.t_task_team[master_th->th.th_task_state], team));
2049 if (active_level || master_th->th.th_task_team) {
2051 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2052 if (master_th->th.th_task_state_top >=
2053 master_th->th.th_task_state_stack_sz) {
2054 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2055 kmp_uint8 *old_stack, *new_stack;
2057 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2058 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2059 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2061 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2065 old_stack = master_th->th.th_task_state_memo_stack;
2066 master_th->th.th_task_state_memo_stack = new_stack;
2067 master_th->th.th_task_state_stack_sz = new_size;
2068 __kmp_free(old_stack);
2072 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2073 master_th->th.th_task_state;
2074 master_th->th.th_task_state_top++;
2075 #if KMP_NESTED_HOT_TEAMS
2076 if (master_th->th.th_hot_teams &&
2077 active_level < __kmp_hot_teams_max_level &&
2078 team == master_th->th.th_hot_teams[active_level].hot_team) {
2080 master_th->th.th_task_state =
2082 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2085 master_th->th.th_task_state = 0;
2086 #if KMP_NESTED_HOT_TEAMS
2090 #if !KMP_NESTED_HOT_TEAMS
2091 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2092 (team == root->r.r_hot_team));
2098 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2099 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2101 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2102 (team->t.t_master_tid == 0 &&
2103 (team->t.t_parent == root->r.r_root_team ||
2104 team->t.t_parent->t.t_serialized)));
2108 argv = (
void **)team->t.t_argv;
2110 for (i = argc - 1; i >= 0; --i) {
2111 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2112 KMP_CHECK_UPDATE(*argv, new_argv);
2116 for (i = 0; i < argc; ++i) {
2118 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2123 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2124 if (!root->r.r_active)
2125 root->r.r_active = TRUE;
2127 __kmp_fork_team_threads(root, team, master_th, gtid);
2128 __kmp_setup_icv_copy(team, nthreads,
2129 &master_th->th.th_current_task->td_icvs, loc);
2132 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2135 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2138 if (team->t.t_active_level == 1
2139 && !master_th->th.th_teams_microtask) {
2141 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2142 (__kmp_forkjoin_frames_mode == 3 ||
2143 __kmp_forkjoin_frames_mode == 1)) {
2144 kmp_uint64 tmp_time = 0;
2145 if (__itt_get_timestamp_ptr)
2146 tmp_time = __itt_get_timestamp();
2148 master_th->th.th_frame_time = tmp_time;
2149 if (__kmp_forkjoin_frames_mode == 3)
2150 team->t.t_region_time = tmp_time;
2154 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2155 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2157 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2163 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2166 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2167 root, team, master_th, gtid));
2170 if (__itt_stack_caller_create_ptr) {
2173 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2174 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2175 }
else if (parent_team->t.t_serialized) {
2180 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2181 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2189 __kmp_internal_fork(loc, gtid, team);
2190 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2191 "master_th=%p, gtid=%d\n",
2192 root, team, master_th, gtid));
2195 if (call_context == fork_context_gnu) {
2196 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2201 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2202 team->t.t_id, team->t.t_pkfn));
2205 #if KMP_STATS_ENABLED
2209 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2213 if (!team->t.t_invoke(gtid)) {
2214 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2217 #if KMP_STATS_ENABLED
2220 KMP_SET_THREAD_STATE(previous_state);
2224 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2225 team->t.t_id, team->t.t_pkfn));
2228 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2230 if (ompt_enabled.enabled) {
2231 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2239 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2242 thread->th.ompt_thread_info.state =
2243 ((team->t.t_serialized) ? ompt_state_work_serial
2244 : ompt_state_work_parallel);
2247 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2248 kmp_team_t *team, ompt_data_t *parallel_data,
2249 int flags,
void *codeptr) {
2250 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2251 if (ompt_enabled.ompt_callback_parallel_end) {
2252 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2253 parallel_data, &(task_info->task_data), flags, codeptr);
2256 task_info->frame.enter_frame = ompt_data_none;
2257 __kmp_join_restore_state(thread, team);
2261 void __kmp_join_call(
ident_t *loc,
int gtid
2264 enum fork_context_e fork_context
2268 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2270 kmp_team_t *parent_team;
2271 kmp_info_t *master_th;
2275 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2278 master_th = __kmp_threads[gtid];
2279 root = master_th->th.th_root;
2280 team = master_th->th.th_team;
2281 parent_team = team->t.t_parent;
2283 master_th->th.th_ident = loc;
2286 void *team_microtask = (
void *)team->t.t_pkfn;
2290 if (ompt_enabled.enabled &&
2291 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2292 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2297 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2298 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2299 "th_task_team = %p\n",
2300 __kmp_gtid_from_thread(master_th), team,
2301 team->t.t_task_team[master_th->th.th_task_state],
2302 master_th->th.th_task_team));
2303 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2304 team->t.t_task_team[master_th->th.th_task_state]);
2308 if (team->t.t_serialized) {
2309 if (master_th->th.th_teams_microtask) {
2311 int level = team->t.t_level;
2312 int tlevel = master_th->th.th_teams_level;
2313 if (level == tlevel) {
2317 }
else if (level == tlevel + 1) {
2321 team->t.t_serialized++;
2327 if (ompt_enabled.enabled) {
2328 __kmp_join_restore_state(master_th, parent_team);
2335 master_active = team->t.t_master_active;
2340 __kmp_internal_join(loc, gtid, team);
2342 if (__itt_stack_caller_create_ptr) {
2343 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2345 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2346 team->t.t_stack_id = NULL;
2350 master_th->th.th_task_state =
2353 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2354 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2358 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2359 parent_team->t.t_stack_id = NULL;
2367 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2368 void *codeptr = team->t.ompt_team_info.master_return_address;
2373 if (team->t.t_active_level == 1 &&
2374 (!master_th->th.th_teams_microtask ||
2375 master_th->th.th_teams_size.nteams == 1)) {
2376 master_th->th.th_ident = loc;
2379 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2380 __kmp_forkjoin_frames_mode == 3)
2381 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2382 master_th->th.th_frame_time, 0, loc,
2383 master_th->th.th_team_nproc, 1);
2384 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2385 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2386 __kmp_itt_region_joined(gtid);
2390 if (master_th->th.th_teams_microtask && !exit_teams &&
2391 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2392 team->t.t_level == master_th->th.th_teams_level + 1) {
2397 ompt_data_t ompt_parallel_data = ompt_data_none;
2398 if (ompt_enabled.enabled) {
2399 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2400 if (ompt_enabled.ompt_callback_implicit_task) {
2401 int ompt_team_size = team->t.t_nproc;
2402 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2403 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2404 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2406 task_info->frame.exit_frame = ompt_data_none;
2407 task_info->task_data = ompt_data_none;
2408 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2409 __ompt_lw_taskteam_unlink(master_th);
2414 team->t.t_active_level--;
2415 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2421 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2422 int old_num = master_th->th.th_team_nproc;
2423 int new_num = master_th->th.th_teams_size.nth;
2424 kmp_info_t **other_threads = team->t.t_threads;
2425 team->t.t_nproc = new_num;
2426 for (
int i = 0; i < old_num; ++i) {
2427 other_threads[i]->th.th_team_nproc = new_num;
2430 for (
int i = old_num; i < new_num; ++i) {
2432 KMP_DEBUG_ASSERT(other_threads[i]);
2433 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2434 for (
int b = 0; b < bs_last_barrier; ++b) {
2435 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2436 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2438 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2441 if (__kmp_tasking_mode != tskm_immediate_exec) {
2443 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2449 if (ompt_enabled.enabled) {
2450 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2451 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2459 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2460 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2462 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2467 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2469 if (!master_th->th.th_teams_microtask ||
2470 team->t.t_level > master_th->th.th_teams_level) {
2472 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2474 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2477 if (ompt_enabled.enabled) {
2478 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2479 if (ompt_enabled.ompt_callback_implicit_task) {
2480 int flags = (team_microtask == (
void *)__kmp_teams_master)
2482 : ompt_task_implicit;
2483 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2484 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2485 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2486 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2488 task_info->frame.exit_frame = ompt_data_none;
2489 task_info->task_data = ompt_data_none;
2493 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2495 __kmp_pop_current_task_from_thread(master_th);
2497 #if KMP_AFFINITY_SUPPORTED
2499 master_th->th.th_first_place = team->t.t_first_place;
2500 master_th->th.th_last_place = team->t.t_last_place;
2502 master_th->th.th_def_allocator = team->t.t_def_allocator;
2505 if (ompd_state & OMPD_ENABLE_BP)
2506 ompd_bp_parallel_end();
2508 updateHWFPControl(team);
2510 if (root->r.r_active != master_active)
2511 root->r.r_active = master_active;
2513 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2521 master_th->th.th_team = parent_team;
2522 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2523 master_th->th.th_team_master = parent_team->t.t_threads[0];
2524 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2527 if (parent_team->t.t_serialized &&
2528 parent_team != master_th->th.th_serial_team &&
2529 parent_team != root->r.r_root_team) {
2530 __kmp_free_team(root,
2531 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2532 master_th->th.th_serial_team = parent_team;
2535 if (__kmp_tasking_mode != tskm_immediate_exec) {
2536 if (master_th->th.th_task_state_top >
2538 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2540 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2541 master_th->th.th_task_state;
2542 --master_th->th.th_task_state_top;
2544 master_th->th.th_task_state =
2546 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2549 master_th->th.th_task_team =
2550 parent_team->t.t_task_team[master_th->th.th_task_state];
2552 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2553 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2560 master_th->th.th_current_task->td_flags.executing = 1;
2562 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2566 OMPT_INVOKER(fork_context) |
2567 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2568 : ompt_parallel_team);
2569 if (ompt_enabled.enabled) {
2570 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2576 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2581 void __kmp_save_internal_controls(kmp_info_t *thread) {
2583 if (thread->th.th_team != thread->th.th_serial_team) {
2586 if (thread->th.th_team->t.t_serialized > 1) {
2589 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2592 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2593 thread->th.th_team->t.t_serialized) {
2598 kmp_internal_control_t *control =
2599 (kmp_internal_control_t *)__kmp_allocate(
2600 sizeof(kmp_internal_control_t));
2602 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2604 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2606 control->next = thread->th.th_team->t.t_control_stack_top;
2607 thread->th.th_team->t.t_control_stack_top = control;
2613 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2617 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2618 KMP_DEBUG_ASSERT(__kmp_init_serial);
2622 else if (new_nth > __kmp_max_nth)
2623 new_nth = __kmp_max_nth;
2626 thread = __kmp_threads[gtid];
2627 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2630 __kmp_save_internal_controls(thread);
2632 set__nproc(thread, new_nth);
2637 root = thread->th.th_root;
2638 if (__kmp_init_parallel && (!root->r.r_active) &&
2639 (root->r.r_hot_team->t.t_nproc > new_nth)
2640 #
if KMP_NESTED_HOT_TEAMS
2641 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2644 kmp_team_t *hot_team = root->r.r_hot_team;
2647 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2650 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2651 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2652 if (__kmp_tasking_mode != tskm_immediate_exec) {
2655 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2657 __kmp_free_thread(hot_team->t.t_threads[f]);
2658 hot_team->t.t_threads[f] = NULL;
2660 hot_team->t.t_nproc = new_nth;
2661 #if KMP_NESTED_HOT_TEAMS
2662 if (thread->th.th_hot_teams) {
2663 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2664 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2668 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2671 for (f = 0; f < new_nth; f++) {
2672 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2673 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2676 hot_team->t.t_size_changed = -1;
2681 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2684 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2686 gtid, max_active_levels));
2687 KMP_DEBUG_ASSERT(__kmp_init_serial);
2690 if (max_active_levels < 0) {
2691 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2696 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2697 "max_active_levels for thread %d = (%d)\n",
2698 gtid, max_active_levels));
2701 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2706 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2707 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2708 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2714 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2715 "max_active_levels for thread %d = (%d)\n",
2716 gtid, max_active_levels));
2718 thread = __kmp_threads[gtid];
2720 __kmp_save_internal_controls(thread);
2722 set__max_active_levels(thread, max_active_levels);
2726 int __kmp_get_max_active_levels(
int gtid) {
2729 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2730 KMP_DEBUG_ASSERT(__kmp_init_serial);
2732 thread = __kmp_threads[gtid];
2733 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2734 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2735 "curtask_maxaclevel=%d\n",
2736 gtid, thread->th.th_current_task,
2737 thread->th.th_current_task->td_icvs.max_active_levels));
2738 return thread->th.th_current_task->td_icvs.max_active_levels;
2742 void __kmp_set_num_teams(
int num_teams) {
2744 __kmp_nteams = num_teams;
2746 int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2748 void __kmp_set_teams_thread_limit(
int limit) {
2750 __kmp_teams_thread_limit = limit;
2752 int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2754 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2755 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2758 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2760 kmp_sched_t orig_kind;
2763 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2764 gtid, (
int)kind, chunk));
2765 KMP_DEBUG_ASSERT(__kmp_init_serial);
2772 kind = __kmp_sched_without_mods(kind);
2774 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2775 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2777 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2778 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2780 kind = kmp_sched_default;
2784 thread = __kmp_threads[gtid];
2786 __kmp_save_internal_controls(thread);
2788 if (kind < kmp_sched_upper_std) {
2789 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2792 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2794 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2795 __kmp_sch_map[kind - kmp_sched_lower - 1];
2800 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2801 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2802 kmp_sched_lower - 2];
2804 __kmp_sched_apply_mods_intkind(
2805 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2806 if (kind == kmp_sched_auto || chunk < 1) {
2808 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2810 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2815 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2819 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2820 KMP_DEBUG_ASSERT(__kmp_init_serial);
2822 thread = __kmp_threads[gtid];
2824 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2825 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2827 case kmp_sch_static_greedy:
2828 case kmp_sch_static_balanced:
2829 *kind = kmp_sched_static;
2830 __kmp_sched_apply_mods_stdkind(kind, th_type);
2833 case kmp_sch_static_chunked:
2834 *kind = kmp_sched_static;
2836 case kmp_sch_dynamic_chunked:
2837 *kind = kmp_sched_dynamic;
2840 case kmp_sch_guided_iterative_chunked:
2841 case kmp_sch_guided_analytical_chunked:
2842 *kind = kmp_sched_guided;
2845 *kind = kmp_sched_auto;
2847 case kmp_sch_trapezoidal:
2848 *kind = kmp_sched_trapezoidal;
2850 #if KMP_STATIC_STEAL_ENABLED
2851 case kmp_sch_static_steal:
2852 *kind = kmp_sched_static_steal;
2856 KMP_FATAL(UnknownSchedulingType, th_type);
2859 __kmp_sched_apply_mods_stdkind(kind, th_type);
2860 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2863 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2869 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2870 KMP_DEBUG_ASSERT(__kmp_init_serial);
2877 thr = __kmp_threads[gtid];
2878 team = thr->th.th_team;
2879 ii = team->t.t_level;
2883 if (thr->th.th_teams_microtask) {
2885 int tlevel = thr->th.th_teams_level;
2888 KMP_DEBUG_ASSERT(ii >= tlevel);
2900 return __kmp_tid_from_gtid(gtid);
2902 dd = team->t.t_serialized;
2904 while (ii > level) {
2905 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2907 if ((team->t.t_serialized) && (!dd)) {
2908 team = team->t.t_parent;
2912 team = team->t.t_parent;
2913 dd = team->t.t_serialized;
2918 return (dd > 1) ? (0) : (team->t.t_master_tid);
2921 int __kmp_get_team_size(
int gtid,
int level) {
2927 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2928 KMP_DEBUG_ASSERT(__kmp_init_serial);
2935 thr = __kmp_threads[gtid];
2936 team = thr->th.th_team;
2937 ii = team->t.t_level;
2941 if (thr->th.th_teams_microtask) {
2943 int tlevel = thr->th.th_teams_level;
2946 KMP_DEBUG_ASSERT(ii >= tlevel);
2957 while (ii > level) {
2958 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2960 if (team->t.t_serialized && (!dd)) {
2961 team = team->t.t_parent;
2965 team = team->t.t_parent;
2970 return team->t.t_nproc;
2973 kmp_r_sched_t __kmp_get_schedule_global() {
2978 kmp_r_sched_t r_sched;
2984 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
2985 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
2988 r_sched.r_sched_type = __kmp_static;
2991 r_sched.r_sched_type = __kmp_guided;
2993 r_sched.r_sched_type = __kmp_sched;
2995 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
2997 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2999 r_sched.chunk = KMP_DEFAULT_CHUNK;
3001 r_sched.chunk = __kmp_chunk;
3009 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3011 KMP_DEBUG_ASSERT(team);
3012 if (!realloc || argc > team->t.t_max_argc) {
3014 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3015 "current entries=%d\n",
3016 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3018 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3019 __kmp_free((
void *)team->t.t_argv);
3021 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3023 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3024 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3026 team->t.t_id, team->t.t_max_argc));
3027 team->t.t_argv = &team->t.t_inline_argv[0];
3028 if (__kmp_storage_map) {
3029 __kmp_print_storage_map_gtid(
3030 -1, &team->t.t_inline_argv[0],
3031 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3032 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3037 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3038 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3040 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3042 team->t.t_id, team->t.t_max_argc));
3044 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3045 if (__kmp_storage_map) {
3046 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3047 &team->t.t_argv[team->t.t_max_argc],
3048 sizeof(
void *) * team->t.t_max_argc,
3049 "team_%d.t_argv", team->t.t_id);
3055 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3057 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3059 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3060 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3061 sizeof(dispatch_shared_info_t) * num_disp_buff);
3062 team->t.t_dispatch =
3063 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3064 team->t.t_implicit_task_taskdata =
3065 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3066 team->t.t_max_nproc = max_nth;
3069 for (i = 0; i < num_disp_buff; ++i) {
3070 team->t.t_disp_buffer[i].buffer_index = i;
3071 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3075 static void __kmp_free_team_arrays(kmp_team_t *team) {
3078 for (i = 0; i < team->t.t_max_nproc; ++i) {
3079 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3080 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3081 team->t.t_dispatch[i].th_disp_buffer = NULL;
3084 #if KMP_USE_HIER_SCHED
3085 __kmp_dispatch_free_hierarchies(team);
3087 __kmp_free(team->t.t_threads);
3088 __kmp_free(team->t.t_disp_buffer);
3089 __kmp_free(team->t.t_dispatch);
3090 __kmp_free(team->t.t_implicit_task_taskdata);
3091 team->t.t_threads = NULL;
3092 team->t.t_disp_buffer = NULL;
3093 team->t.t_dispatch = NULL;
3094 team->t.t_implicit_task_taskdata = 0;
3097 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3098 kmp_info_t **oldThreads = team->t.t_threads;
3100 __kmp_free(team->t.t_disp_buffer);
3101 __kmp_free(team->t.t_dispatch);
3102 __kmp_free(team->t.t_implicit_task_taskdata);
3103 __kmp_allocate_team_arrays(team, max_nth);
3105 KMP_MEMCPY(team->t.t_threads, oldThreads,
3106 team->t.t_nproc *
sizeof(kmp_info_t *));
3108 __kmp_free(oldThreads);
3111 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3113 kmp_r_sched_t r_sched =
3114 __kmp_get_schedule_global();
3116 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3118 kmp_internal_control_t g_icvs = {
3120 (kmp_int8)__kmp_global.g.g_dynamic,
3122 (kmp_int8)__kmp_env_blocktime,
3124 __kmp_dflt_blocktime,
3129 __kmp_dflt_team_nth,
3133 __kmp_dflt_max_active_levels,
3137 __kmp_nested_proc_bind.bind_types[0],
3138 __kmp_default_device,
3145 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3147 kmp_internal_control_t gx_icvs;
3148 gx_icvs.serial_nesting_level =
3150 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3151 gx_icvs.next = NULL;
3156 static void __kmp_initialize_root(kmp_root_t *root) {
3158 kmp_team_t *root_team;
3159 kmp_team_t *hot_team;
3160 int hot_team_max_nth;
3161 kmp_r_sched_t r_sched =
3162 __kmp_get_schedule_global();
3163 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3164 KMP_DEBUG_ASSERT(root);
3165 KMP_ASSERT(!root->r.r_begin);
3168 __kmp_init_lock(&root->r.r_begin_lock);
3169 root->r.r_begin = FALSE;
3170 root->r.r_active = FALSE;
3171 root->r.r_in_parallel = 0;
3172 root->r.r_blocktime = __kmp_dflt_blocktime;
3173 #if KMP_AFFINITY_SUPPORTED
3174 root->r.r_affinity_assigned = FALSE;
3179 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3182 __kmp_allocate_team(root,
3188 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3190 USE_NESTED_HOT_ARG(NULL)
3195 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3198 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3200 root->r.r_root_team = root_team;
3201 root_team->t.t_control_stack_top = NULL;
3204 root_team->t.t_threads[0] = NULL;
3205 root_team->t.t_nproc = 1;
3206 root_team->t.t_serialized = 1;
3208 root_team->t.t_sched.sched = r_sched.sched;
3211 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3212 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3216 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3219 __kmp_allocate_team(root,
3221 __kmp_dflt_team_nth_ub * 2,
3225 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3227 USE_NESTED_HOT_ARG(NULL)
3229 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3231 root->r.r_hot_team = hot_team;
3232 root_team->t.t_control_stack_top = NULL;
3235 hot_team->t.t_parent = root_team;
3238 hot_team_max_nth = hot_team->t.t_max_nproc;
3239 for (f = 0; f < hot_team_max_nth; ++f) {
3240 hot_team->t.t_threads[f] = NULL;
3242 hot_team->t.t_nproc = 1;
3244 hot_team->t.t_sched.sched = r_sched.sched;
3245 hot_team->t.t_size_changed = 0;
3250 typedef struct kmp_team_list_item {
3251 kmp_team_p
const *entry;
3252 struct kmp_team_list_item *next;
3253 } kmp_team_list_item_t;
3254 typedef kmp_team_list_item_t *kmp_team_list_t;
3256 static void __kmp_print_structure_team_accum(
3257 kmp_team_list_t list,
3258 kmp_team_p
const *team
3268 KMP_DEBUG_ASSERT(list != NULL);
3273 __kmp_print_structure_team_accum(list, team->t.t_parent);
3274 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3278 while (l->next != NULL && l->entry != team) {
3281 if (l->next != NULL) {
3287 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3293 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3294 sizeof(kmp_team_list_item_t));
3301 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3304 __kmp_printf(
"%s", title);
3306 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3308 __kmp_printf(
" - (nil)\n");
3312 static void __kmp_print_structure_thread(
char const *title,
3313 kmp_info_p
const *thread) {
3314 __kmp_printf(
"%s", title);
3315 if (thread != NULL) {
3316 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3318 __kmp_printf(
" - (nil)\n");
3322 void __kmp_print_structure(
void) {
3324 kmp_team_list_t list;
3328 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3332 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3333 "Table\n------------------------------\n");
3336 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3337 __kmp_printf(
"%2d", gtid);
3338 if (__kmp_threads != NULL) {
3339 __kmp_printf(
" %p", __kmp_threads[gtid]);
3341 if (__kmp_root != NULL) {
3342 __kmp_printf(
" %p", __kmp_root[gtid]);
3349 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3351 if (__kmp_threads != NULL) {
3353 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3354 kmp_info_t
const *thread = __kmp_threads[gtid];
3355 if (thread != NULL) {
3356 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3357 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3358 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3359 __kmp_print_structure_team(
" Serial Team: ",
3360 thread->th.th_serial_team);
3361 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3362 __kmp_print_structure_thread(
" Primary: ",
3363 thread->th.th_team_master);
3364 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3365 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3366 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3367 __kmp_print_structure_thread(
" Next in pool: ",
3368 thread->th.th_next_pool);
3370 __kmp_print_structure_team_accum(list, thread->th.th_team);
3371 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3375 __kmp_printf(
"Threads array is not allocated.\n");
3379 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3381 if (__kmp_root != NULL) {
3383 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3384 kmp_root_t
const *root = __kmp_root[gtid];
3386 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3387 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3388 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3389 __kmp_print_structure_thread(
" Uber Thread: ",
3390 root->r.r_uber_thread);
3391 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3392 __kmp_printf(
" In Parallel: %2d\n",
3393 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3395 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3396 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3400 __kmp_printf(
"Ubers array is not allocated.\n");
3403 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3405 while (list->next != NULL) {
3406 kmp_team_p
const *team = list->entry;
3408 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3409 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3410 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3411 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3412 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3413 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3414 for (i = 0; i < team->t.t_nproc; ++i) {
3415 __kmp_printf(
" Thread %2d: ", i);
3416 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3418 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3424 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3426 __kmp_print_structure_thread(
"Thread pool: ",
3427 CCAST(kmp_info_t *, __kmp_thread_pool));
3428 __kmp_print_structure_team(
"Team pool: ",
3429 CCAST(kmp_team_t *, __kmp_team_pool));
3433 while (list != NULL) {
3434 kmp_team_list_item_t *item = list;
3436 KMP_INTERNAL_FREE(item);
3445 static const unsigned __kmp_primes[] = {
3446 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3447 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3448 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3449 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3450 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3451 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3452 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3453 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3454 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3455 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3456 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3460 unsigned short __kmp_get_random(kmp_info_t *thread) {
3461 unsigned x = thread->th.th_x;
3462 unsigned short r = (
unsigned short)(x >> 16);
3464 thread->th.th_x = x * thread->th.th_a + 1;
3466 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3467 thread->th.th_info.ds.ds_tid, r));
3473 void __kmp_init_random(kmp_info_t *thread) {
3474 unsigned seed = thread->th.th_info.ds.ds_tid;
3477 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3478 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3480 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3486 static int __kmp_reclaim_dead_roots(
void) {
3489 for (i = 0; i < __kmp_threads_capacity; ++i) {
3490 if (KMP_UBER_GTID(i) &&
3491 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3494 r += __kmp_unregister_root_other_thread(i);
3519 static int __kmp_expand_threads(
int nNeed) {
3521 int minimumRequiredCapacity;
3523 kmp_info_t **newThreads;
3524 kmp_root_t **newRoot;
3530 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3533 added = __kmp_reclaim_dead_roots();
3562 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3565 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3569 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3571 newCapacity = __kmp_threads_capacity;
3573 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3574 : __kmp_sys_max_nth;
3575 }
while (newCapacity < minimumRequiredCapacity);
3576 newThreads = (kmp_info_t **)__kmp_allocate(
3577 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3579 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3580 KMP_MEMCPY(newThreads, __kmp_threads,
3581 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3582 KMP_MEMCPY(newRoot, __kmp_root,
3583 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3585 kmp_info_t **temp_threads = __kmp_threads;
3586 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3587 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3588 __kmp_free(temp_threads);
3589 added += newCapacity - __kmp_threads_capacity;
3590 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3592 if (newCapacity > __kmp_tp_capacity) {
3593 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3594 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3595 __kmp_threadprivate_resize_cache(newCapacity);
3597 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3599 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3608 int __kmp_register_root(
int initial_thread) {
3609 kmp_info_t *root_thread;
3613 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3614 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3631 capacity = __kmp_threads_capacity;
3632 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3639 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3640 capacity -= __kmp_hidden_helper_threads_num;
3644 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3645 if (__kmp_tp_cached) {
3646 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3647 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3648 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3650 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3660 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3663 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3664 gtid <= __kmp_hidden_helper_threads_num;
3667 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3668 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3669 "hidden helper thread: T#%d\n",
3675 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3678 for (gtid = __kmp_hidden_helper_threads_num + 1;
3679 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3683 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3684 KMP_ASSERT(gtid < __kmp_threads_capacity);
3689 TCW_4(__kmp_nth, __kmp_nth + 1);
3693 if (__kmp_adjust_gtid_mode) {
3694 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3695 if (TCR_4(__kmp_gtid_mode) != 2) {
3696 TCW_4(__kmp_gtid_mode, 2);
3699 if (TCR_4(__kmp_gtid_mode) != 1) {
3700 TCW_4(__kmp_gtid_mode, 1);
3705 #ifdef KMP_ADJUST_BLOCKTIME
3708 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3709 if (__kmp_nth > __kmp_avail_proc) {
3710 __kmp_zero_bt = TRUE;
3716 if (!(root = __kmp_root[gtid])) {
3717 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3718 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3721 #if KMP_STATS_ENABLED
3723 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3724 __kmp_stats_thread_ptr->startLife();
3725 KMP_SET_THREAD_STATE(SERIAL_REGION);
3728 __kmp_initialize_root(root);
3731 if (root->r.r_uber_thread) {
3732 root_thread = root->r.r_uber_thread;
3734 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3735 if (__kmp_storage_map) {
3736 __kmp_print_thread_storage_map(root_thread, gtid);
3738 root_thread->th.th_info.ds.ds_gtid = gtid;
3740 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3742 root_thread->th.th_root = root;
3743 if (__kmp_env_consistency_check) {
3744 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3747 __kmp_initialize_fast_memory(root_thread);
3751 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3752 __kmp_initialize_bget(root_thread);
3754 __kmp_init_random(root_thread);
3758 if (!root_thread->th.th_serial_team) {
3759 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3760 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3761 root_thread->th.th_serial_team = __kmp_allocate_team(
3766 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3768 KMP_ASSERT(root_thread->th.th_serial_team);
3769 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3770 root_thread->th.th_serial_team));
3773 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3775 root->r.r_root_team->t.t_threads[0] = root_thread;
3776 root->r.r_hot_team->t.t_threads[0] = root_thread;
3777 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3779 root_thread->th.th_serial_team->t.t_serialized = 0;
3780 root->r.r_uber_thread = root_thread;
3783 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3784 TCW_4(__kmp_init_gtid, TRUE);
3787 __kmp_gtid_set_specific(gtid);
3790 __kmp_itt_thread_name(gtid);
3793 #ifdef KMP_TDATA_GTID
3796 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3797 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3799 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3801 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3802 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3803 KMP_INIT_BARRIER_STATE));
3806 for (b = 0; b < bs_last_barrier; ++b) {
3807 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3809 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3813 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3814 KMP_INIT_BARRIER_STATE);
3816 #if KMP_AFFINITY_SUPPORTED
3817 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3818 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3819 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3820 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3822 root_thread->th.th_def_allocator = __kmp_def_allocator;
3823 root_thread->th.th_prev_level = 0;
3824 root_thread->th.th_prev_num_threads = 1;
3826 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3827 tmp->cg_root = root_thread;
3828 tmp->cg_thread_limit = __kmp_cg_max_nth;
3829 tmp->cg_nthreads = 1;
3830 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3831 " cg_nthreads init to 1\n",
3834 root_thread->th.th_cg_roots = tmp;
3836 __kmp_root_counter++;
3839 if (!initial_thread && ompt_enabled.enabled) {
3841 kmp_info_t *root_thread = ompt_get_thread();
3843 ompt_set_thread_state(root_thread, ompt_state_overhead);
3845 if (ompt_enabled.ompt_callback_thread_begin) {
3846 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3847 ompt_thread_initial, __ompt_get_thread_data_internal());
3849 ompt_data_t *task_data;
3850 ompt_data_t *parallel_data;
3851 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3853 if (ompt_enabled.ompt_callback_implicit_task) {
3854 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3855 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3858 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3862 if (ompd_state & OMPD_ENABLE_BP)
3863 ompd_bp_thread_begin();
3867 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3872 #if KMP_NESTED_HOT_TEAMS
3873 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3874 const int max_level) {
3876 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3877 if (!hot_teams || !hot_teams[level].hot_team) {
3880 KMP_DEBUG_ASSERT(level < max_level);
3881 kmp_team_t *team = hot_teams[level].hot_team;
3882 nth = hot_teams[level].hot_team_nth;
3884 if (level < max_level - 1) {
3885 for (i = 0; i < nth; ++i) {
3886 kmp_info_t *th = team->t.t_threads[i];
3887 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3888 if (i > 0 && th->th.th_hot_teams) {
3889 __kmp_free(th->th.th_hot_teams);
3890 th->th.th_hot_teams = NULL;
3894 __kmp_free_team(root, team, NULL);
3901 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3902 kmp_team_t *root_team = root->r.r_root_team;
3903 kmp_team_t *hot_team = root->r.r_hot_team;
3904 int n = hot_team->t.t_nproc;
3907 KMP_DEBUG_ASSERT(!root->r.r_active);
3909 root->r.r_root_team = NULL;
3910 root->r.r_hot_team = NULL;
3913 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3914 #if KMP_NESTED_HOT_TEAMS
3915 if (__kmp_hot_teams_max_level >
3917 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3918 kmp_info_t *th = hot_team->t.t_threads[i];
3919 if (__kmp_hot_teams_max_level > 1) {
3920 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3922 if (th->th.th_hot_teams) {
3923 __kmp_free(th->th.th_hot_teams);
3924 th->th.th_hot_teams = NULL;
3929 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3934 if (__kmp_tasking_mode != tskm_immediate_exec) {
3935 __kmp_wait_to_unref_task_teams();
3941 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3943 (LPVOID) & (root->r.r_uber_thread->th),
3944 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3945 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3949 if (ompd_state & OMPD_ENABLE_BP)
3950 ompd_bp_thread_end();
3954 ompt_data_t *task_data;
3955 ompt_data_t *parallel_data;
3956 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3958 if (ompt_enabled.ompt_callback_implicit_task) {
3959 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3960 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
3962 if (ompt_enabled.ompt_callback_thread_end) {
3963 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3964 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3970 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
3971 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
3973 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
3974 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
3977 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
3978 root->r.r_uber_thread->th.th_cg_roots->cg_root);
3979 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
3980 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
3981 root->r.r_uber_thread->th.th_cg_roots = NULL;
3983 __kmp_reap_thread(root->r.r_uber_thread, 1);
3987 root->r.r_uber_thread = NULL;
3989 root->r.r_begin = FALSE;
3994 void __kmp_unregister_root_current_thread(
int gtid) {
3995 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3999 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4000 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4001 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4004 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4007 kmp_root_t *root = __kmp_root[gtid];
4009 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4010 KMP_ASSERT(KMP_UBER_GTID(gtid));
4011 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4012 KMP_ASSERT(root->r.r_active == FALSE);
4016 kmp_info_t *thread = __kmp_threads[gtid];
4017 kmp_team_t *team = thread->th.th_team;
4018 kmp_task_team_t *task_team = thread->th.th_task_team;
4021 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
4024 thread->th.ompt_thread_info.state = ompt_state_undefined;
4026 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4029 __kmp_reset_root(gtid, root);
4033 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4035 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4042 static int __kmp_unregister_root_other_thread(
int gtid) {
4043 kmp_root_t *root = __kmp_root[gtid];
4046 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4047 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4048 KMP_ASSERT(KMP_UBER_GTID(gtid));
4049 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4050 KMP_ASSERT(root->r.r_active == FALSE);
4052 r = __kmp_reset_root(gtid, root);
4054 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4060 void __kmp_task_info() {
4062 kmp_int32 gtid = __kmp_entry_gtid();
4063 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4064 kmp_info_t *this_thr = __kmp_threads[gtid];
4065 kmp_team_t *steam = this_thr->th.th_serial_team;
4066 kmp_team_t *team = this_thr->th.th_team;
4069 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4071 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4072 team->t.t_implicit_task_taskdata[tid].td_parent);
4079 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4080 int tid,
int gtid) {
4084 KMP_DEBUG_ASSERT(this_thr != NULL);
4085 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4086 KMP_DEBUG_ASSERT(team);
4087 KMP_DEBUG_ASSERT(team->t.t_threads);
4088 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4089 kmp_info_t *master = team->t.t_threads[0];
4090 KMP_DEBUG_ASSERT(master);
4091 KMP_DEBUG_ASSERT(master->th.th_root);
4095 TCW_SYNC_PTR(this_thr->th.th_team, team);
4097 this_thr->th.th_info.ds.ds_tid = tid;
4098 this_thr->th.th_set_nproc = 0;
4099 if (__kmp_tasking_mode != tskm_immediate_exec)
4102 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4104 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4105 this_thr->th.th_set_proc_bind = proc_bind_default;
4106 #if KMP_AFFINITY_SUPPORTED
4107 this_thr->th.th_new_place = this_thr->th.th_current_place;
4109 this_thr->th.th_root = master->th.th_root;
4112 this_thr->th.th_team_nproc = team->t.t_nproc;
4113 this_thr->th.th_team_master = master;
4114 this_thr->th.th_team_serialized = team->t.t_serialized;
4115 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4117 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4119 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4120 tid, gtid, this_thr, this_thr->th.th_current_task));
4122 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4125 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4126 tid, gtid, this_thr, this_thr->th.th_current_task));
4131 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4133 this_thr->th.th_local.this_construct = 0;
4135 if (!this_thr->th.th_pri_common) {
4136 this_thr->th.th_pri_common =
4137 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4138 if (__kmp_storage_map) {
4139 __kmp_print_storage_map_gtid(
4140 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4141 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4143 this_thr->th.th_pri_head = NULL;
4146 if (this_thr != master &&
4147 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4149 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4150 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4153 int i = tmp->cg_nthreads--;
4154 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4155 " on node %p of thread %p to %d\n",
4156 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4161 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4163 this_thr->th.th_cg_roots->cg_nthreads++;
4164 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4165 " node %p of thread %p to %d\n",
4166 this_thr, this_thr->th.th_cg_roots,
4167 this_thr->th.th_cg_roots->cg_root,
4168 this_thr->th.th_cg_roots->cg_nthreads));
4169 this_thr->th.th_current_task->td_icvs.thread_limit =
4170 this_thr->th.th_cg_roots->cg_thread_limit;
4175 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4178 sizeof(dispatch_private_info_t) *
4179 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4180 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4181 team->t.t_max_nproc));
4182 KMP_ASSERT(dispatch);
4183 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4184 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4186 dispatch->th_disp_index = 0;
4187 dispatch->th_doacross_buf_idx = 0;
4188 if (!dispatch->th_disp_buffer) {
4189 dispatch->th_disp_buffer =
4190 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4192 if (__kmp_storage_map) {
4193 __kmp_print_storage_map_gtid(
4194 gtid, &dispatch->th_disp_buffer[0],
4195 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4197 : __kmp_dispatch_num_buffers],
4199 "th_%d.th_dispatch.th_disp_buffer "
4200 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4201 gtid, team->t.t_id, gtid);
4204 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4207 dispatch->th_dispatch_pr_current = 0;
4208 dispatch->th_dispatch_sh_current = 0;
4210 dispatch->th_deo_fcn = 0;
4211 dispatch->th_dxo_fcn = 0;
4214 this_thr->th.th_next_pool = NULL;
4216 if (!this_thr->th.th_task_state_memo_stack) {
4218 this_thr->th.th_task_state_memo_stack =
4219 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4220 this_thr->th.th_task_state_top = 0;
4221 this_thr->th.th_task_state_stack_sz = 4;
4222 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4224 this_thr->th.th_task_state_memo_stack[i] = 0;
4227 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4228 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4238 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4240 kmp_team_t *serial_team;
4241 kmp_info_t *new_thr;
4244 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4245 KMP_DEBUG_ASSERT(root && team);
4246 #if !KMP_NESTED_HOT_TEAMS
4247 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4252 if (__kmp_thread_pool) {
4253 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4254 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4255 if (new_thr == __kmp_thread_pool_insert_pt) {
4256 __kmp_thread_pool_insert_pt = NULL;
4258 TCW_4(new_thr->th.th_in_pool, FALSE);
4259 __kmp_suspend_initialize_thread(new_thr);
4260 __kmp_lock_suspend_mx(new_thr);
4261 if (new_thr->th.th_active_in_pool == TRUE) {
4262 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4263 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4264 new_thr->th.th_active_in_pool = FALSE;
4266 __kmp_unlock_suspend_mx(new_thr);
4268 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4269 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4270 KMP_ASSERT(!new_thr->th.th_team);
4271 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4274 __kmp_initialize_info(new_thr, team, new_tid,
4275 new_thr->th.th_info.ds.ds_gtid);
4276 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4278 TCW_4(__kmp_nth, __kmp_nth + 1);
4280 new_thr->th.th_task_state = 0;
4281 new_thr->th.th_task_state_top = 0;
4282 new_thr->th.th_task_state_stack_sz = 4;
4284 #ifdef KMP_ADJUST_BLOCKTIME
4287 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4288 if (__kmp_nth > __kmp_avail_proc) {
4289 __kmp_zero_bt = TRUE;
4298 kmp_balign_t *balign = new_thr->th.th_bar;
4299 for (b = 0; b < bs_last_barrier; ++b)
4300 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4303 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4304 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4311 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4312 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4317 if (!TCR_4(__kmp_init_monitor)) {
4318 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4319 if (!TCR_4(__kmp_init_monitor)) {
4320 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4321 TCW_4(__kmp_init_monitor, 1);
4322 __kmp_create_monitor(&__kmp_monitor);
4323 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4334 while (TCR_4(__kmp_init_monitor) < 2) {
4337 KF_TRACE(10, (
"after monitor thread has started\n"));
4340 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4347 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4349 : __kmp_hidden_helper_threads_num + 1;
4351 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4353 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4356 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4357 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4362 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4364 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4366 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4369 __itt_suppress_mark_range(
4370 __itt_suppress_range, __itt_suppress_threading_errors,
4371 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4372 __itt_suppress_mark_range(
4373 __itt_suppress_range, __itt_suppress_threading_errors,
4374 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4376 __itt_suppress_mark_range(
4377 __itt_suppress_range, __itt_suppress_threading_errors,
4378 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4380 __itt_suppress_mark_range(__itt_suppress_range,
4381 __itt_suppress_threading_errors,
4382 &new_thr->th.th_suspend_init_count,
4383 sizeof(new_thr->th.th_suspend_init_count));
4386 __itt_suppress_mark_range(__itt_suppress_range,
4387 __itt_suppress_threading_errors,
4388 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4389 sizeof(new_thr->th.th_bar[0].bb.b_go));
4390 __itt_suppress_mark_range(__itt_suppress_range,
4391 __itt_suppress_threading_errors,
4392 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4393 sizeof(new_thr->th.th_bar[1].bb.b_go));
4394 __itt_suppress_mark_range(__itt_suppress_range,
4395 __itt_suppress_threading_errors,
4396 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4397 sizeof(new_thr->th.th_bar[2].bb.b_go));
4399 if (__kmp_storage_map) {
4400 __kmp_print_thread_storage_map(new_thr, new_gtid);
4405 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4406 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4407 new_thr->th.th_serial_team = serial_team =
4408 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4412 proc_bind_default, &r_icvs,
4413 0 USE_NESTED_HOT_ARG(NULL));
4415 KMP_ASSERT(serial_team);
4416 serial_team->t.t_serialized = 0;
4418 serial_team->t.t_threads[0] = new_thr;
4420 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4424 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4427 __kmp_initialize_fast_memory(new_thr);
4431 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4432 __kmp_initialize_bget(new_thr);
4435 __kmp_init_random(new_thr);
4439 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4440 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4443 kmp_balign_t *balign = new_thr->th.th_bar;
4444 for (b = 0; b < bs_last_barrier; ++b) {
4445 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4446 balign[b].bb.team = NULL;
4447 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4448 balign[b].bb.use_oncore_barrier = 0;
4451 new_thr->th.th_spin_here = FALSE;
4452 new_thr->th.th_next_waiting = 0;
4454 new_thr->th.th_blocking =
false;
4457 #if KMP_AFFINITY_SUPPORTED
4458 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4459 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4460 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4461 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4463 new_thr->th.th_def_allocator = __kmp_def_allocator;
4464 new_thr->th.th_prev_level = 0;
4465 new_thr->th.th_prev_num_threads = 1;
4467 TCW_4(new_thr->th.th_in_pool, FALSE);
4468 new_thr->th.th_active_in_pool = FALSE;
4469 TCW_4(new_thr->th.th_active, TRUE);
4477 if (__kmp_adjust_gtid_mode) {
4478 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4479 if (TCR_4(__kmp_gtid_mode) != 2) {
4480 TCW_4(__kmp_gtid_mode, 2);
4483 if (TCR_4(__kmp_gtid_mode) != 1) {
4484 TCW_4(__kmp_gtid_mode, 1);
4489 #ifdef KMP_ADJUST_BLOCKTIME
4492 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4493 if (__kmp_nth > __kmp_avail_proc) {
4494 __kmp_zero_bt = TRUE;
4501 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4502 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4504 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4506 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4517 static void __kmp_reinitialize_team(kmp_team_t *team,
4518 kmp_internal_control_t *new_icvs,
4520 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4521 team->t.t_threads[0], team));
4522 KMP_DEBUG_ASSERT(team && new_icvs);
4523 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4524 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4526 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4528 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4529 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4531 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4532 team->t.t_threads[0], team));
4538 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4539 kmp_internal_control_t *new_icvs,
4541 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4544 KMP_DEBUG_ASSERT(team);
4545 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4546 KMP_DEBUG_ASSERT(team->t.t_threads);
4549 team->t.t_master_tid = 0;
4551 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4552 team->t.t_nproc = new_nproc;
4555 team->t.t_next_pool = NULL;
4559 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4560 team->t.t_invoke = NULL;
4563 team->t.t_sched.sched = new_icvs->sched.sched;
4565 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4566 team->t.t_fp_control_saved = FALSE;
4567 team->t.t_x87_fpu_control_word = 0;
4568 team->t.t_mxcsr = 0;
4571 team->t.t_construct = 0;
4573 team->t.t_ordered.dt.t_value = 0;
4574 team->t.t_master_active = FALSE;
4577 team->t.t_copypriv_data = NULL;
4580 team->t.t_copyin_counter = 0;
4583 team->t.t_control_stack_top = NULL;
4585 __kmp_reinitialize_team(team, new_icvs, loc);
4588 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4591 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4594 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4595 if (KMP_AFFINITY_CAPABLE()) {
4597 if (old_mask != NULL) {
4598 status = __kmp_get_system_affinity(old_mask, TRUE);
4601 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4605 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4610 #if KMP_AFFINITY_SUPPORTED
4616 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4618 if (KMP_HIDDEN_HELPER_TEAM(team))
4621 kmp_info_t *master_th = team->t.t_threads[0];
4622 KMP_DEBUG_ASSERT(master_th != NULL);
4623 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4624 int first_place = master_th->th.th_first_place;
4625 int last_place = master_th->th.th_last_place;
4626 int masters_place = master_th->th.th_current_place;
4627 team->t.t_first_place = first_place;
4628 team->t.t_last_place = last_place;
4630 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4631 "bound to place %d partition = [%d,%d]\n",
4632 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4633 team->t.t_id, masters_place, first_place, last_place));
4635 switch (proc_bind) {
4637 case proc_bind_default:
4640 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4643 case proc_bind_primary: {
4645 int n_th = team->t.t_nproc;
4646 for (f = 1; f < n_th; f++) {
4647 kmp_info_t *th = team->t.t_threads[f];
4648 KMP_DEBUG_ASSERT(th != NULL);
4649 th->th.th_first_place = first_place;
4650 th->th.th_last_place = last_place;
4651 th->th.th_new_place = masters_place;
4652 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4653 team->t.t_display_affinity != 1) {
4654 team->t.t_display_affinity = 1;
4657 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4658 "partition = [%d,%d]\n",
4659 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4660 f, masters_place, first_place, last_place));
4664 case proc_bind_close: {
4666 int n_th = team->t.t_nproc;
4668 if (first_place <= last_place) {
4669 n_places = last_place - first_place + 1;
4671 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4673 if (n_th <= n_places) {
4674 int place = masters_place;
4675 for (f = 1; f < n_th; f++) {
4676 kmp_info_t *th = team->t.t_threads[f];
4677 KMP_DEBUG_ASSERT(th != NULL);
4679 if (place == last_place) {
4680 place = first_place;
4681 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4686 th->th.th_first_place = first_place;
4687 th->th.th_last_place = last_place;
4688 th->th.th_new_place = place;
4689 if (__kmp_display_affinity && place != th->th.th_current_place &&
4690 team->t.t_display_affinity != 1) {
4691 team->t.t_display_affinity = 1;
4694 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4695 "partition = [%d,%d]\n",
4696 __kmp_gtid_from_thread(team->t.t_threads[f]),
4697 team->t.t_id, f, place, first_place, last_place));
4700 int S, rem, gap, s_count;
4701 S = n_th / n_places;
4703 rem = n_th - (S * n_places);
4704 gap = rem > 0 ? n_places / rem : n_places;
4705 int place = masters_place;
4707 for (f = 0; f < n_th; f++) {
4708 kmp_info_t *th = team->t.t_threads[f];
4709 KMP_DEBUG_ASSERT(th != NULL);
4711 th->th.th_first_place = first_place;
4712 th->th.th_last_place = last_place;
4713 th->th.th_new_place = place;
4714 if (__kmp_display_affinity && place != th->th.th_current_place &&
4715 team->t.t_display_affinity != 1) {
4716 team->t.t_display_affinity = 1;
4720 if ((s_count == S) && rem && (gap_ct == gap)) {
4722 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4724 if (place == last_place) {
4725 place = first_place;
4726 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4734 }
else if (s_count == S) {
4735 if (place == last_place) {
4736 place = first_place;
4737 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4747 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4748 "partition = [%d,%d]\n",
4749 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4750 th->th.th_new_place, first_place, last_place));
4752 KMP_DEBUG_ASSERT(place == masters_place);
4756 case proc_bind_spread: {
4758 int n_th = team->t.t_nproc;
4761 if (first_place <= last_place) {
4762 n_places = last_place - first_place + 1;
4764 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4766 if (n_th <= n_places) {
4769 if (n_places !=
static_cast<int>(__kmp_affinity_num_masks)) {
4770 int S = n_places / n_th;
4771 int s_count, rem, gap, gap_ct;
4773 place = masters_place;
4774 rem = n_places - n_th * S;
4775 gap = rem ? n_th / rem : 1;
4778 if (update_master_only == 1)
4780 for (f = 0; f < thidx; f++) {
4781 kmp_info_t *th = team->t.t_threads[f];
4782 KMP_DEBUG_ASSERT(th != NULL);
4784 th->th.th_first_place = place;
4785 th->th.th_new_place = place;
4786 if (__kmp_display_affinity && place != th->th.th_current_place &&
4787 team->t.t_display_affinity != 1) {
4788 team->t.t_display_affinity = 1;
4791 while (s_count < S) {
4792 if (place == last_place) {
4793 place = first_place;
4794 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4801 if (rem && (gap_ct == gap)) {
4802 if (place == last_place) {
4803 place = first_place;
4804 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4812 th->th.th_last_place = place;
4815 if (place == last_place) {
4816 place = first_place;
4817 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4824 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4825 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4826 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4827 f, th->th.th_new_place, th->th.th_first_place,
4828 th->th.th_last_place, __kmp_affinity_num_masks));
4834 double current =
static_cast<double>(masters_place);
4836 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4841 if (update_master_only == 1)
4843 for (f = 0; f < thidx; f++) {
4844 first =
static_cast<int>(current);
4845 last =
static_cast<int>(current + spacing) - 1;
4846 KMP_DEBUG_ASSERT(last >= first);
4847 if (first >= n_places) {
4848 if (masters_place) {
4851 if (first == (masters_place + 1)) {
4852 KMP_DEBUG_ASSERT(f == n_th);
4855 if (last == masters_place) {
4856 KMP_DEBUG_ASSERT(f == (n_th - 1));
4860 KMP_DEBUG_ASSERT(f == n_th);
4865 if (last >= n_places) {
4866 last = (n_places - 1);
4871 KMP_DEBUG_ASSERT(0 <= first);
4872 KMP_DEBUG_ASSERT(n_places > first);
4873 KMP_DEBUG_ASSERT(0 <= last);
4874 KMP_DEBUG_ASSERT(n_places > last);
4875 KMP_DEBUG_ASSERT(last_place >= first_place);
4876 th = team->t.t_threads[f];
4877 KMP_DEBUG_ASSERT(th);
4878 th->th.th_first_place = first;
4879 th->th.th_new_place = place;
4880 th->th.th_last_place = last;
4881 if (__kmp_display_affinity && place != th->th.th_current_place &&
4882 team->t.t_display_affinity != 1) {
4883 team->t.t_display_affinity = 1;
4886 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4887 "partition = [%d,%d], spacing = %.4f\n",
4888 __kmp_gtid_from_thread(team->t.t_threads[f]),
4889 team->t.t_id, f, th->th.th_new_place,
4890 th->th.th_first_place, th->th.th_last_place, spacing));
4894 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4896 int S, rem, gap, s_count;
4897 S = n_th / n_places;
4899 rem = n_th - (S * n_places);
4900 gap = rem > 0 ? n_places / rem : n_places;
4901 int place = masters_place;
4904 if (update_master_only == 1)
4906 for (f = 0; f < thidx; f++) {
4907 kmp_info_t *th = team->t.t_threads[f];
4908 KMP_DEBUG_ASSERT(th != NULL);
4910 th->th.th_first_place = place;
4911 th->th.th_last_place = place;
4912 th->th.th_new_place = place;
4913 if (__kmp_display_affinity && place != th->th.th_current_place &&
4914 team->t.t_display_affinity != 1) {
4915 team->t.t_display_affinity = 1;
4919 if ((s_count == S) && rem && (gap_ct == gap)) {
4921 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4923 if (place == last_place) {
4924 place = first_place;
4925 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4933 }
else if (s_count == S) {
4934 if (place == last_place) {
4935 place = first_place;
4936 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4945 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4946 "partition = [%d,%d]\n",
4947 __kmp_gtid_from_thread(team->t.t_threads[f]),
4948 team->t.t_id, f, th->th.th_new_place,
4949 th->th.th_first_place, th->th.th_last_place));
4951 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4959 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4967 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4969 ompt_data_t ompt_parallel_data,
4971 kmp_proc_bind_t new_proc_bind,
4972 kmp_internal_control_t *new_icvs,
4973 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4974 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4977 int use_hot_team = !root->r.r_active;
4980 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4981 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4982 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4985 #if KMP_NESTED_HOT_TEAMS
4986 kmp_hot_team_ptr_t *hot_teams;
4988 team = master->th.th_team;
4989 level = team->t.t_active_level;
4990 if (master->th.th_teams_microtask) {
4991 if (master->th.th_teams_size.nteams > 1 &&
4994 (microtask_t)__kmp_teams_master ||
4995 master->th.th_teams_level <
5001 hot_teams = master->th.th_hot_teams;
5002 if (level < __kmp_hot_teams_max_level && hot_teams &&
5003 hot_teams[level].hot_team) {
5011 KMP_DEBUG_ASSERT(new_nproc == 1);
5015 if (use_hot_team && new_nproc > 1) {
5016 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5017 #if KMP_NESTED_HOT_TEAMS
5018 team = hot_teams[level].hot_team;
5020 team = root->r.r_hot_team;
5023 if (__kmp_tasking_mode != tskm_immediate_exec) {
5024 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5025 "task_team[1] = %p before reinit\n",
5026 team->t.t_task_team[0], team->t.t_task_team[1]));
5033 if (team->t.t_nproc == new_nproc) {
5034 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5037 if (team->t.t_size_changed == -1) {
5038 team->t.t_size_changed = 1;
5040 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5044 kmp_r_sched_t new_sched = new_icvs->sched;
5046 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5048 __kmp_reinitialize_team(team, new_icvs,
5049 root->r.r_uber_thread->th.th_ident);
5051 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5052 team->t.t_threads[0], team));
5053 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5055 #if KMP_AFFINITY_SUPPORTED
5056 if ((team->t.t_size_changed == 0) &&
5057 (team->t.t_proc_bind == new_proc_bind)) {
5058 if (new_proc_bind == proc_bind_spread) {
5059 __kmp_partition_places(
5062 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5063 "proc_bind = %d, partition = [%d,%d]\n",
5064 team->t.t_id, new_proc_bind, team->t.t_first_place,
5065 team->t.t_last_place));
5067 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5068 __kmp_partition_places(team);
5071 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5073 }
else if (team->t.t_nproc > new_nproc) {
5075 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5078 team->t.t_size_changed = 1;
5079 #if KMP_NESTED_HOT_TEAMS
5080 if (__kmp_hot_teams_mode == 0) {
5083 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5084 hot_teams[level].hot_team_nth = new_nproc;
5087 for (f = new_nproc; f < team->t.t_nproc; f++) {
5088 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5089 if (__kmp_tasking_mode != tskm_immediate_exec) {
5092 team->t.t_threads[f]->th.th_task_team = NULL;
5094 __kmp_free_thread(team->t.t_threads[f]);
5095 team->t.t_threads[f] = NULL;
5097 #if KMP_NESTED_HOT_TEAMS
5102 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5103 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5104 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5105 for (
int b = 0; b < bs_last_barrier; ++b) {
5106 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5107 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5109 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5114 team->t.t_nproc = new_nproc;
5116 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5117 __kmp_reinitialize_team(team, new_icvs,
5118 root->r.r_uber_thread->th.th_ident);
5121 for (f = 0; f < new_nproc; ++f) {
5122 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5127 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5128 team->t.t_threads[0], team));
5130 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5133 for (f = 0; f < team->t.t_nproc; f++) {
5134 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5135 team->t.t_threads[f]->th.th_team_nproc ==
5140 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5141 #if KMP_AFFINITY_SUPPORTED
5142 __kmp_partition_places(team);
5145 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5146 kmp_affin_mask_t *old_mask;
5147 if (KMP_AFFINITY_CAPABLE()) {
5148 KMP_CPU_ALLOC(old_mask);
5153 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5156 team->t.t_size_changed = 1;
5158 #if KMP_NESTED_HOT_TEAMS
5159 int avail_threads = hot_teams[level].hot_team_nth;
5160 if (new_nproc < avail_threads)
5161 avail_threads = new_nproc;
5162 kmp_info_t **other_threads = team->t.t_threads;
5163 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5167 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5168 for (b = 0; b < bs_last_barrier; ++b) {
5169 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5170 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5172 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5176 if (hot_teams[level].hot_team_nth >= new_nproc) {
5179 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5180 team->t.t_nproc = new_nproc;
5186 hot_teams[level].hot_team_nth = new_nproc;
5188 if (team->t.t_max_nproc < new_nproc) {
5190 __kmp_reallocate_team_arrays(team, new_nproc);
5191 __kmp_reinitialize_team(team, new_icvs, NULL);
5194 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5200 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5204 for (f = team->t.t_nproc; f < new_nproc; f++) {
5205 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5206 KMP_DEBUG_ASSERT(new_worker);
5207 team->t.t_threads[f] = new_worker;
5210 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5211 "join=%llu, plain=%llu\n",
5212 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5213 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5214 team->t.t_bar[bs_plain_barrier].b_arrived));
5218 kmp_balign_t *balign = new_worker->th.th_bar;
5219 for (b = 0; b < bs_last_barrier; ++b) {
5220 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5221 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5222 KMP_BARRIER_PARENT_FLAG);
5224 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5230 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5231 if (KMP_AFFINITY_CAPABLE()) {
5233 __kmp_set_system_affinity(old_mask, TRUE);
5234 KMP_CPU_FREE(old_mask);
5237 #if KMP_NESTED_HOT_TEAMS
5241 int old_nproc = team->t.t_nproc;
5243 __kmp_initialize_team(team, new_nproc, new_icvs,
5244 root->r.r_uber_thread->th.th_ident);
5247 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5248 for (f = 0; f < team->t.t_nproc; ++f)
5249 __kmp_initialize_info(team->t.t_threads[f], team, f,
5250 __kmp_gtid_from_tid(f, team));
5258 for (f = old_nproc; f < team->t.t_nproc; ++f)
5259 team->t.t_threads[f]->th.th_task_state =
5260 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5263 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5264 for (f = old_nproc; f < team->t.t_nproc; ++f)
5265 team->t.t_threads[f]->th.th_task_state = old_state;
5269 for (f = 0; f < team->t.t_nproc; ++f) {
5270 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5271 team->t.t_threads[f]->th.th_team_nproc ==
5276 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5277 #if KMP_AFFINITY_SUPPORTED
5278 __kmp_partition_places(team);
5282 kmp_info_t *master = team->t.t_threads[0];
5283 if (master->th.th_teams_microtask) {
5284 for (f = 1; f < new_nproc; ++f) {
5286 kmp_info_t *thr = team->t.t_threads[f];
5287 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5288 thr->th.th_teams_level = master->th.th_teams_level;
5289 thr->th.th_teams_size = master->th.th_teams_size;
5292 #if KMP_NESTED_HOT_TEAMS
5296 for (f = 1; f < new_nproc; ++f) {
5297 kmp_info_t *thr = team->t.t_threads[f];
5299 kmp_balign_t *balign = thr->th.th_bar;
5300 for (b = 0; b < bs_last_barrier; ++b) {
5301 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5302 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5304 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5312 __kmp_alloc_argv_entries(argc, team, TRUE);
5313 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5317 KF_TRACE(10, (
" hot_team = %p\n", team));
5320 if (__kmp_tasking_mode != tskm_immediate_exec) {
5321 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5322 "task_team[1] = %p after reinit\n",
5323 team->t.t_task_team[0], team->t.t_task_team[1]));
5328 __ompt_team_assign_id(team, ompt_parallel_data);
5338 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5341 if (team->t.t_max_nproc >= max_nproc) {
5343 __kmp_team_pool = team->t.t_next_pool;
5346 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5348 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5349 "task_team[1] %p to NULL\n",
5350 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5351 team->t.t_task_team[0] = NULL;
5352 team->t.t_task_team[1] = NULL;
5355 __kmp_alloc_argv_entries(argc, team, TRUE);
5356 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5359 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5360 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5363 for (b = 0; b < bs_last_barrier; ++b) {
5364 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5366 team->t.t_bar[b].b_master_arrived = 0;
5367 team->t.t_bar[b].b_team_arrived = 0;
5372 team->t.t_proc_bind = new_proc_bind;
5374 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5378 __ompt_team_assign_id(team, ompt_parallel_data);
5390 team = __kmp_reap_team(team);
5391 __kmp_team_pool = team;
5396 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5399 team->t.t_max_nproc = max_nproc;
5402 __kmp_allocate_team_arrays(team, max_nproc);
5404 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5405 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5407 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5409 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5410 team->t.t_task_team[0] = NULL;
5412 team->t.t_task_team[1] = NULL;
5415 if (__kmp_storage_map) {
5416 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5420 __kmp_alloc_argv_entries(argc, team, FALSE);
5421 team->t.t_argc = argc;
5424 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5425 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5428 for (b = 0; b < bs_last_barrier; ++b) {
5429 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5431 team->t.t_bar[b].b_master_arrived = 0;
5432 team->t.t_bar[b].b_team_arrived = 0;
5437 team->t.t_proc_bind = new_proc_bind;
5440 __ompt_team_assign_id(team, ompt_parallel_data);
5441 team->t.ompt_serialized_team_info = NULL;
5446 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5457 void __kmp_free_team(kmp_root_t *root,
5458 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5460 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5464 KMP_DEBUG_ASSERT(root);
5465 KMP_DEBUG_ASSERT(team);
5466 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5467 KMP_DEBUG_ASSERT(team->t.t_threads);
5469 int use_hot_team = team == root->r.r_hot_team;
5470 #if KMP_NESTED_HOT_TEAMS
5472 kmp_hot_team_ptr_t *hot_teams;
5474 level = team->t.t_active_level - 1;
5475 if (master->th.th_teams_microtask) {
5476 if (master->th.th_teams_size.nteams > 1) {
5480 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5481 master->th.th_teams_level == team->t.t_level) {
5486 hot_teams = master->th.th_hot_teams;
5487 if (level < __kmp_hot_teams_max_level) {
5488 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5495 TCW_SYNC_PTR(team->t.t_pkfn,
5498 team->t.t_copyin_counter = 0;
5503 if (!use_hot_team) {
5504 if (__kmp_tasking_mode != tskm_immediate_exec) {
5506 for (f = 1; f < team->t.t_nproc; ++f) {
5507 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5508 kmp_info_t *th = team->t.t_threads[f];
5509 volatile kmp_uint32 *state = &th->th.th_reap_state;
5510 while (*state != KMP_SAFE_TO_REAP) {
5514 if (!__kmp_is_thread_alive(th, &ecode)) {
5515 *state = KMP_SAFE_TO_REAP;
5520 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5521 if (fl.is_sleeping())
5522 fl.resume(__kmp_gtid_from_thread(th));
5529 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5530 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5531 if (task_team != NULL) {
5532 for (f = 0; f < team->t.t_nproc; ++f) {
5533 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5534 team->t.t_threads[f]->th.th_task_team = NULL;
5538 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5539 __kmp_get_gtid(), task_team, team->t.t_id));
5540 #if KMP_NESTED_HOT_TEAMS
5541 __kmp_free_task_team(master, task_team);
5543 team->t.t_task_team[tt_idx] = NULL;
5549 team->t.t_parent = NULL;
5550 team->t.t_level = 0;
5551 team->t.t_active_level = 0;
5554 for (f = 1; f < team->t.t_nproc; ++f) {
5555 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5556 __kmp_free_thread(team->t.t_threads[f]);
5557 team->t.t_threads[f] = NULL;
5562 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5563 __kmp_team_pool = (
volatile kmp_team_t *)team;
5566 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5567 team->t.t_threads[1]->th.th_cg_roots);
5568 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5570 for (f = 1; f < team->t.t_nproc; ++f) {
5571 kmp_info_t *thr = team->t.t_threads[f];
5572 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5573 thr->th.th_cg_roots->cg_root == thr);
5575 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5576 thr->th.th_cg_roots = tmp->up;
5577 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5578 " up to node %p. cg_nthreads was %d\n",
5579 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5580 int i = tmp->cg_nthreads--;
5585 if (thr->th.th_cg_roots)
5586 thr->th.th_current_task->td_icvs.thread_limit =
5587 thr->th.th_cg_roots->cg_thread_limit;
5596 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5597 kmp_team_t *next_pool = team->t.t_next_pool;
5599 KMP_DEBUG_ASSERT(team);
5600 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5601 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5602 KMP_DEBUG_ASSERT(team->t.t_threads);
5603 KMP_DEBUG_ASSERT(team->t.t_argv);
5608 __kmp_free_team_arrays(team);
5609 if (team->t.t_argv != &team->t.t_inline_argv[0])
5610 __kmp_free((
void *)team->t.t_argv);
5642 void __kmp_free_thread(kmp_info_t *this_th) {
5646 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5647 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5649 KMP_DEBUG_ASSERT(this_th);
5654 kmp_balign_t *balign = this_th->th.th_bar;
5655 for (b = 0; b < bs_last_barrier; ++b) {
5656 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5657 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5658 balign[b].bb.team = NULL;
5659 balign[b].bb.leaf_kids = 0;
5661 this_th->th.th_task_state = 0;
5662 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5665 TCW_PTR(this_th->th.th_team, NULL);
5666 TCW_PTR(this_th->th.th_root, NULL);
5667 TCW_PTR(this_th->th.th_dispatch, NULL);
5669 while (this_th->th.th_cg_roots) {
5670 this_th->th.th_cg_roots->cg_nthreads--;
5671 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5672 " %p of thread %p to %d\n",
5673 this_th, this_th->th.th_cg_roots,
5674 this_th->th.th_cg_roots->cg_root,
5675 this_th->th.th_cg_roots->cg_nthreads));
5676 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5677 if (tmp->cg_root == this_th) {
5678 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5680 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5681 this_th->th.th_cg_roots = tmp->up;
5684 if (tmp->cg_nthreads == 0) {
5687 this_th->th.th_cg_roots = NULL;
5697 __kmp_free_implicit_task(this_th);
5698 this_th->th.th_current_task = NULL;
5702 gtid = this_th->th.th_info.ds.ds_gtid;
5703 if (__kmp_thread_pool_insert_pt != NULL) {
5704 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5705 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5706 __kmp_thread_pool_insert_pt = NULL;
5715 if (__kmp_thread_pool_insert_pt != NULL) {
5716 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5718 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5720 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5721 scan = &((*scan)->th.th_next_pool))
5726 TCW_PTR(this_th->th.th_next_pool, *scan);
5727 __kmp_thread_pool_insert_pt = *scan = this_th;
5728 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5729 (this_th->th.th_info.ds.ds_gtid <
5730 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5731 TCW_4(this_th->th.th_in_pool, TRUE);
5732 __kmp_suspend_initialize_thread(this_th);
5733 __kmp_lock_suspend_mx(this_th);
5734 if (this_th->th.th_active == TRUE) {
5735 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5736 this_th->th.th_active_in_pool = TRUE;
5740 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5743 __kmp_unlock_suspend_mx(this_th);
5745 TCW_4(__kmp_nth, __kmp_nth - 1);
5747 #ifdef KMP_ADJUST_BLOCKTIME
5750 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5751 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5752 if (__kmp_nth <= __kmp_avail_proc) {
5753 __kmp_zero_bt = FALSE;
5763 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5764 #if OMP_PROFILING_SUPPORT
5765 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5767 if (ProfileTraceFile)
5768 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5771 int gtid = this_thr->th.th_info.ds.ds_gtid;
5773 kmp_team_t **
volatile pteam;
5776 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5778 if (__kmp_env_consistency_check) {
5779 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5783 if (ompd_state & OMPD_ENABLE_BP)
5784 ompd_bp_thread_begin();
5788 ompt_data_t *thread_data =
nullptr;
5789 if (ompt_enabled.enabled) {
5790 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5791 *thread_data = ompt_data_none;
5793 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5794 this_thr->th.ompt_thread_info.wait_id = 0;
5795 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5796 this_thr->th.ompt_thread_info.parallel_flags = 0;
5797 if (ompt_enabled.ompt_callback_thread_begin) {
5798 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5799 ompt_thread_worker, thread_data);
5801 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5806 while (!TCR_4(__kmp_global.g.g_done)) {
5807 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5811 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5814 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5817 if (ompt_enabled.enabled) {
5818 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5822 pteam = &this_thr->th.th_team;
5825 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5827 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5830 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5831 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5832 (*pteam)->t.t_pkfn));
5834 updateHWFPControl(*pteam);
5837 if (ompt_enabled.enabled) {
5838 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5842 rc = (*pteam)->t.t_invoke(gtid);
5846 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5847 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5848 (*pteam)->t.t_pkfn));
5851 if (ompt_enabled.enabled) {
5853 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
5855 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5859 __kmp_join_barrier(gtid);
5862 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5865 if (ompd_state & OMPD_ENABLE_BP)
5866 ompd_bp_thread_end();
5870 if (ompt_enabled.ompt_callback_thread_end) {
5871 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5875 this_thr->th.th_task_team = NULL;
5877 __kmp_common_destroy_gtid(gtid);
5879 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5882 #if OMP_PROFILING_SUPPORT
5883 llvm::timeTraceProfilerFinishThread();
5890 void __kmp_internal_end_dest(
void *specific_gtid) {
5893 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
5895 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5899 __kmp_internal_end_thread(gtid);
5902 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
5904 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5905 __kmp_internal_end_atexit();
5912 void __kmp_internal_end_atexit(
void) {
5913 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5937 __kmp_internal_end_library(-1);
5939 __kmp_close_console();
5943 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5948 KMP_DEBUG_ASSERT(thread != NULL);
5950 gtid = thread->th.th_info.ds.ds_gtid;
5953 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5956 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5960 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
5962 __kmp_release_64(&flag);
5966 __kmp_reap_worker(thread);
5978 if (thread->th.th_active_in_pool) {
5979 thread->th.th_active_in_pool = FALSE;
5980 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5981 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5985 __kmp_free_implicit_task(thread);
5989 __kmp_free_fast_memory(thread);
5992 __kmp_suspend_uninitialize_thread(thread);
5994 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5995 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6000 #ifdef KMP_ADJUST_BLOCKTIME
6003 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6004 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6005 if (__kmp_nth <= __kmp_avail_proc) {
6006 __kmp_zero_bt = FALSE;
6012 if (__kmp_env_consistency_check) {
6013 if (thread->th.th_cons) {
6014 __kmp_free_cons_stack(thread->th.th_cons);
6015 thread->th.th_cons = NULL;
6019 if (thread->th.th_pri_common != NULL) {
6020 __kmp_free(thread->th.th_pri_common);
6021 thread->th.th_pri_common = NULL;
6024 if (thread->th.th_task_state_memo_stack != NULL) {
6025 __kmp_free(thread->th.th_task_state_memo_stack);
6026 thread->th.th_task_state_memo_stack = NULL;
6030 if (thread->th.th_local.bget_data != NULL) {
6031 __kmp_finalize_bget(thread);
6035 #if KMP_AFFINITY_SUPPORTED
6036 if (thread->th.th_affin_mask != NULL) {
6037 KMP_CPU_FREE(thread->th.th_affin_mask);
6038 thread->th.th_affin_mask = NULL;
6042 #if KMP_USE_HIER_SCHED
6043 if (thread->th.th_hier_bar_data != NULL) {
6044 __kmp_free(thread->th.th_hier_bar_data);
6045 thread->th.th_hier_bar_data = NULL;
6049 __kmp_reap_team(thread->th.th_serial_team);
6050 thread->th.th_serial_team = NULL;
6057 static void __kmp_internal_end(
void) {
6061 __kmp_unregister_library();
6068 __kmp_reclaim_dead_roots();
6072 for (i = 0; i < __kmp_threads_capacity; i++)
6074 if (__kmp_root[i]->r.r_active)
6077 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6079 if (i < __kmp_threads_capacity) {
6091 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6092 if (TCR_4(__kmp_init_monitor)) {
6093 __kmp_reap_monitor(&__kmp_monitor);
6094 TCW_4(__kmp_init_monitor, 0);
6096 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6097 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6103 for (i = 0; i < __kmp_threads_capacity; i++) {
6104 if (__kmp_root[i]) {
6107 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6116 while (__kmp_thread_pool != NULL) {
6118 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6119 __kmp_thread_pool = thread->th.th_next_pool;
6121 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6122 thread->th.th_next_pool = NULL;
6123 thread->th.th_in_pool = FALSE;
6124 __kmp_reap_thread(thread, 0);
6126 __kmp_thread_pool_insert_pt = NULL;
6129 while (__kmp_team_pool != NULL) {
6131 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6132 __kmp_team_pool = team->t.t_next_pool;
6134 team->t.t_next_pool = NULL;
6135 __kmp_reap_team(team);
6138 __kmp_reap_task_teams();
6145 for (i = 0; i < __kmp_threads_capacity; i++) {
6146 kmp_info_t *thr = __kmp_threads[i];
6147 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6152 for (i = 0; i < __kmp_threads_capacity; ++i) {
6159 TCW_SYNC_4(__kmp_init_common, FALSE);
6161 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6169 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6170 if (TCR_4(__kmp_init_monitor)) {
6171 __kmp_reap_monitor(&__kmp_monitor);
6172 TCW_4(__kmp_init_monitor, 0);
6174 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6175 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6178 TCW_4(__kmp_init_gtid, FALSE);
6187 void __kmp_internal_end_library(
int gtid_req) {
6194 if (__kmp_global.g.g_abort) {
6195 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6199 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6200 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6205 if (TCR_4(__kmp_init_hidden_helper) &&
6206 !TCR_4(__kmp_hidden_helper_team_done)) {
6207 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6209 __kmp_hidden_helper_main_thread_release();
6211 __kmp_hidden_helper_threads_deinitz_wait();
6217 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6219 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6220 if (gtid == KMP_GTID_SHUTDOWN) {
6221 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6222 "already shutdown\n"));
6224 }
else if (gtid == KMP_GTID_MONITOR) {
6225 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6226 "registered, or system shutdown\n"));
6228 }
else if (gtid == KMP_GTID_DNE) {
6229 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6232 }
else if (KMP_UBER_GTID(gtid)) {
6234 if (__kmp_root[gtid]->r.r_active) {
6235 __kmp_global.g.g_abort = -1;
6236 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6237 __kmp_unregister_library();
6239 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6245 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6246 __kmp_unregister_root_current_thread(gtid);
6253 #ifdef DUMP_DEBUG_ON_EXIT
6254 if (__kmp_debug_buf)
6255 __kmp_dump_debug_buffer();
6260 __kmp_unregister_library();
6265 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6268 if (__kmp_global.g.g_abort) {
6269 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6271 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6274 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6275 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6284 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6287 __kmp_internal_end();
6289 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6290 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6292 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6294 #ifdef DUMP_DEBUG_ON_EXIT
6295 if (__kmp_debug_buf)
6296 __kmp_dump_debug_buffer();
6300 __kmp_close_console();
6303 __kmp_fini_allocator();
6307 void __kmp_internal_end_thread(
int gtid_req) {
6316 if (__kmp_global.g.g_abort) {
6317 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6321 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6322 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6327 if (TCR_4(__kmp_init_hidden_helper) &&
6328 !TCR_4(__kmp_hidden_helper_team_done)) {
6329 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6331 __kmp_hidden_helper_main_thread_release();
6333 __kmp_hidden_helper_threads_deinitz_wait();
6340 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6342 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6343 if (gtid == KMP_GTID_SHUTDOWN) {
6344 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6345 "already shutdown\n"));
6347 }
else if (gtid == KMP_GTID_MONITOR) {
6348 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6349 "registered, or system shutdown\n"));
6351 }
else if (gtid == KMP_GTID_DNE) {
6352 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6356 }
else if (KMP_UBER_GTID(gtid)) {
6358 if (__kmp_root[gtid]->r.r_active) {
6359 __kmp_global.g.g_abort = -1;
6360 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6362 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6366 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6368 __kmp_unregister_root_current_thread(gtid);
6372 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6375 __kmp_threads[gtid]->th.th_task_team = NULL;
6379 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6385 if (__kmp_pause_status != kmp_hard_paused)
6389 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6394 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6397 if (__kmp_global.g.g_abort) {
6398 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6400 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6403 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6404 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6415 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6417 for (i = 0; i < __kmp_threads_capacity; ++i) {
6418 if (KMP_UBER_GTID(i)) {
6421 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6422 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6423 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6430 __kmp_internal_end();
6432 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6433 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6435 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6437 #ifdef DUMP_DEBUG_ON_EXIT
6438 if (__kmp_debug_buf)
6439 __kmp_dump_debug_buffer();
6446 static long __kmp_registration_flag = 0;
6448 static char *__kmp_registration_str = NULL;
6451 static inline char *__kmp_reg_status_name() {
6457 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6458 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6461 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6465 void __kmp_register_library_startup(
void) {
6467 char *name = __kmp_reg_status_name();
6473 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6474 __kmp_initialize_system_tick();
6476 __kmp_read_system_time(&time.dtime);
6477 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6478 __kmp_registration_str =
6479 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6480 __kmp_registration_flag, KMP_LIBRARY_FILE);
6482 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6483 __kmp_registration_str));
6489 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6490 char *shm_name = __kmp_str_format(
"/%s", name);
6491 int shm_preexist = 0;
6493 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6494 if ((fd1 == -1) && (errno == EEXIST)) {
6497 fd1 = shm_open(shm_name, O_RDWR, 0666);
6500 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6506 }
else if (fd1 == -1) {
6509 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6512 if (shm_preexist == 0) {
6514 if (ftruncate(fd1, SHM_SIZE) == -1) {
6516 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6517 KMP_ERR(errno), __kmp_msg_null);
6521 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6522 if (data1 == MAP_FAILED) {
6524 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6527 if (shm_preexist == 0) {
6528 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6531 value = __kmp_str_format(
"%s", data1);
6532 munmap(data1, SHM_SIZE);
6536 __kmp_env_set(name, __kmp_registration_str, 0);
6538 value = __kmp_env_get(name);
6541 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6548 char *flag_addr_str = NULL;
6549 char *flag_val_str = NULL;
6550 char const *file_name = NULL;
6551 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6552 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6555 unsigned long *flag_addr = 0;
6556 unsigned long flag_val = 0;
6557 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6558 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6559 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6563 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6577 file_name =
"unknown library";
6582 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6583 if (!__kmp_str_match_true(duplicate_ok)) {
6585 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6586 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6588 KMP_INTERNAL_FREE(duplicate_ok);
6589 __kmp_duplicate_library_ok = 1;
6594 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6596 shm_unlink(shm_name);
6599 __kmp_env_unset(name);
6603 KMP_DEBUG_ASSERT(0);
6607 KMP_INTERNAL_FREE((
void *)value);
6608 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6609 KMP_INTERNAL_FREE((
void *)shm_name);
6612 KMP_INTERNAL_FREE((
void *)name);
6616 void __kmp_unregister_library(
void) {
6618 char *name = __kmp_reg_status_name();
6621 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6622 char *shm_name = __kmp_str_format(
"/%s", name);
6623 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6628 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6629 if (data1 != MAP_FAILED) {
6630 value = __kmp_str_format(
"%s", data1);
6631 munmap(data1, SHM_SIZE);
6635 value = __kmp_env_get(name);
6638 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6639 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6640 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6642 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6643 shm_unlink(shm_name);
6645 __kmp_env_unset(name);
6649 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6650 KMP_INTERNAL_FREE(shm_name);
6653 KMP_INTERNAL_FREE(__kmp_registration_str);
6654 KMP_INTERNAL_FREE(value);
6655 KMP_INTERNAL_FREE(name);
6657 __kmp_registration_flag = 0;
6658 __kmp_registration_str = NULL;
6665 #if KMP_MIC_SUPPORTED
6667 static void __kmp_check_mic_type() {
6668 kmp_cpuid_t cpuid_state = {0};
6669 kmp_cpuid_t *cs_p = &cpuid_state;
6670 __kmp_x86_cpuid(1, 0, cs_p);
6672 if ((cs_p->eax & 0xff0) == 0xB10) {
6673 __kmp_mic_type = mic2;
6674 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6675 __kmp_mic_type = mic3;
6677 __kmp_mic_type = non_mic;
6684 static void __kmp_user_level_mwait_init() {
6685 struct kmp_cpuid buf;
6686 __kmp_x86_cpuid(7, 0, &buf);
6687 __kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait;
6688 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6689 __kmp_umwait_enabled));
6691 #elif KMP_HAVE_MWAIT
6692 #ifndef AT_INTELPHIUSERMWAIT
6695 #define AT_INTELPHIUSERMWAIT 10000
6700 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6701 unsigned long getauxval(
unsigned long) {
return 0; }
6703 static void __kmp_user_level_mwait_init() {
6708 if (__kmp_mic_type == mic3) {
6709 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6710 if ((res & 0x1) || __kmp_user_level_mwait) {
6711 __kmp_mwait_enabled = TRUE;
6712 if (__kmp_user_level_mwait) {
6713 KMP_INFORM(EnvMwaitWarn);
6716 __kmp_mwait_enabled = FALSE;
6719 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
6720 "__kmp_mwait_enabled = %d\n",
6721 __kmp_mic_type, __kmp_mwait_enabled));
6725 static void __kmp_do_serial_initialize(
void) {
6729 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6731 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6732 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6733 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6734 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6735 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6745 __kmp_validate_locks();
6748 __kmp_init_allocator();
6753 __kmp_register_library_startup();
6756 if (TCR_4(__kmp_global.g.g_done)) {
6757 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6760 __kmp_global.g.g_abort = 0;
6761 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6764 #if KMP_USE_ADAPTIVE_LOCKS
6765 #if KMP_DEBUG_ADAPTIVE_LOCKS
6766 __kmp_init_speculative_stats();
6769 #if KMP_STATS_ENABLED
6772 __kmp_init_lock(&__kmp_global_lock);
6773 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6774 __kmp_init_lock(&__kmp_debug_lock);
6775 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6776 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6777 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6778 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6779 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6780 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6781 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6782 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6783 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6784 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6785 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6786 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6787 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6788 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6789 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6791 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6793 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6797 __kmp_runtime_initialize();
6799 #if KMP_MIC_SUPPORTED
6800 __kmp_check_mic_type();
6807 __kmp_abort_delay = 0;
6811 __kmp_dflt_team_nth_ub = __kmp_xproc;
6812 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6813 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6815 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6816 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6818 __kmp_max_nth = __kmp_sys_max_nth;
6819 __kmp_cg_max_nth = __kmp_sys_max_nth;
6820 __kmp_teams_max_nth = __kmp_xproc;
6821 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6822 __kmp_teams_max_nth = __kmp_sys_max_nth;
6827 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6829 __kmp_monitor_wakeups =
6830 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6831 __kmp_bt_intervals =
6832 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6835 __kmp_library = library_throughput;
6837 __kmp_static = kmp_sch_static_balanced;
6844 #if KMP_FAST_REDUCTION_BARRIER
6845 #define kmp_reduction_barrier_gather_bb ((int)1)
6846 #define kmp_reduction_barrier_release_bb ((int)1)
6847 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6848 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6850 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6851 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6852 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6853 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6854 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6855 #if KMP_FAST_REDUCTION_BARRIER
6856 if (i == bs_reduction_barrier) {
6858 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6859 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6860 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6861 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6865 #if KMP_FAST_REDUCTION_BARRIER
6866 #undef kmp_reduction_barrier_release_pat
6867 #undef kmp_reduction_barrier_gather_pat
6868 #undef kmp_reduction_barrier_release_bb
6869 #undef kmp_reduction_barrier_gather_bb
6871 #if KMP_MIC_SUPPORTED
6872 if (__kmp_mic_type == mic2) {
6874 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6875 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6877 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6878 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6880 #if KMP_FAST_REDUCTION_BARRIER
6881 if (__kmp_mic_type == mic2) {
6882 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6883 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6890 __kmp_env_checks = TRUE;
6892 __kmp_env_checks = FALSE;
6896 __kmp_foreign_tp = TRUE;
6898 __kmp_global.g.g_dynamic = FALSE;
6899 __kmp_global.g.g_dynamic_mode = dynamic_default;
6901 __kmp_init_nesting_mode();
6903 __kmp_env_initialize(NULL);
6905 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
6906 __kmp_user_level_mwait_init();
6910 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6911 if (__kmp_str_match_true(val)) {
6912 kmp_str_buf_t buffer;
6913 __kmp_str_buf_init(&buffer);
6914 __kmp_i18n_dump_catalog(&buffer);
6915 __kmp_printf(
"%s", buffer.str);
6916 __kmp_str_buf_free(&buffer);
6918 __kmp_env_free(&val);
6921 __kmp_threads_capacity =
6922 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6924 __kmp_tp_capacity = __kmp_default_tp_capacity(
6925 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6930 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6931 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6932 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6933 __kmp_thread_pool = NULL;
6934 __kmp_thread_pool_insert_pt = NULL;
6935 __kmp_team_pool = NULL;
6942 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6944 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6945 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6946 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6949 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6951 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6956 gtid = __kmp_register_root(TRUE);
6957 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6958 KMP_ASSERT(KMP_UBER_GTID(gtid));
6959 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6963 __kmp_common_initialize();
6967 __kmp_register_atfork();
6970 #if !KMP_DYNAMIC_LIB
6974 int rc = atexit(__kmp_internal_end_atexit);
6976 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6982 #if KMP_HANDLE_SIGNALS
6988 __kmp_install_signals(FALSE);
6991 __kmp_install_signals(TRUE);
6996 __kmp_init_counter++;
6998 __kmp_init_serial = TRUE;
7000 if (__kmp_settings) {
7004 if (__kmp_display_env || __kmp_display_env_verbose) {
7005 __kmp_env_print_2();
7014 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7017 void __kmp_serial_initialize(
void) {
7018 if (__kmp_init_serial) {
7021 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7022 if (__kmp_init_serial) {
7023 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7026 __kmp_do_serial_initialize();
7027 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7030 static void __kmp_do_middle_initialize(
void) {
7032 int prev_dflt_team_nth;
7034 if (!__kmp_init_serial) {
7035 __kmp_do_serial_initialize();
7038 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7042 prev_dflt_team_nth = __kmp_dflt_team_nth;
7044 #if KMP_AFFINITY_SUPPORTED
7047 __kmp_affinity_initialize();
7051 KMP_ASSERT(__kmp_xproc > 0);
7052 if (__kmp_avail_proc == 0) {
7053 __kmp_avail_proc = __kmp_xproc;
7059 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7060 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7065 if (__kmp_dflt_team_nth == 0) {
7066 #ifdef KMP_DFLT_NTH_CORES
7068 __kmp_dflt_team_nth = __kmp_ncores;
7069 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7070 "__kmp_ncores (%d)\n",
7071 __kmp_dflt_team_nth));
7074 __kmp_dflt_team_nth = __kmp_avail_proc;
7075 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7076 "__kmp_avail_proc(%d)\n",
7077 __kmp_dflt_team_nth));
7081 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7082 __kmp_dflt_team_nth = KMP_MIN_NTH;
7084 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7085 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7088 if (__kmp_nesting_mode > 0)
7089 __kmp_set_nesting_mode_threads();
7093 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7095 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7100 for (i = 0; i < __kmp_threads_capacity; i++) {
7101 kmp_info_t *thread = __kmp_threads[i];
7104 if (thread->th.th_current_task->td_icvs.nproc != 0)
7107 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7112 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7113 __kmp_dflt_team_nth));
7115 #ifdef KMP_ADJUST_BLOCKTIME
7117 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7118 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7119 if (__kmp_nth > __kmp_avail_proc) {
7120 __kmp_zero_bt = TRUE;
7126 TCW_SYNC_4(__kmp_init_middle, TRUE);
7128 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7131 void __kmp_middle_initialize(
void) {
7132 if (__kmp_init_middle) {
7135 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7136 if (__kmp_init_middle) {
7137 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7140 __kmp_do_middle_initialize();
7141 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7144 void __kmp_parallel_initialize(
void) {
7145 int gtid = __kmp_entry_gtid();
7148 if (TCR_4(__kmp_init_parallel))
7150 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7151 if (TCR_4(__kmp_init_parallel)) {
7152 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7157 if (TCR_4(__kmp_global.g.g_done)) {
7160 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7161 __kmp_infinite_loop();
7167 if (!__kmp_init_middle) {
7168 __kmp_do_middle_initialize();
7170 __kmp_assign_root_init_mask();
7171 __kmp_resume_if_hard_paused();
7174 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7175 KMP_ASSERT(KMP_UBER_GTID(gtid));
7177 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7180 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7181 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7182 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7186 #if KMP_HANDLE_SIGNALS
7188 __kmp_install_signals(TRUE);
7192 __kmp_suspend_initialize();
7194 #if defined(USE_LOAD_BALANCE)
7195 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7196 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7199 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7200 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7204 if (__kmp_version) {
7205 __kmp_print_version_2();
7209 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7212 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7214 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7217 void __kmp_hidden_helper_initialize() {
7218 if (TCR_4(__kmp_init_hidden_helper))
7222 if (!TCR_4(__kmp_init_parallel))
7223 __kmp_parallel_initialize();
7227 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7228 if (TCR_4(__kmp_init_hidden_helper)) {
7229 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7234 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7238 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7241 __kmp_do_initialize_hidden_helper_threads();
7244 __kmp_hidden_helper_threads_initz_wait();
7247 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7249 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7254 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7256 kmp_disp_t *dispatch;
7261 this_thr->th.th_local.this_construct = 0;
7262 #if KMP_CACHE_MANAGE
7263 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7265 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7266 KMP_DEBUG_ASSERT(dispatch);
7267 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7271 dispatch->th_disp_index = 0;
7272 dispatch->th_doacross_buf_idx = 0;
7273 if (__kmp_env_consistency_check)
7274 __kmp_push_parallel(gtid, team->t.t_ident);
7279 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7281 if (__kmp_env_consistency_check)
7282 __kmp_pop_parallel(gtid, team->t.t_ident);
7284 __kmp_finish_implicit_task(this_thr);
7287 int __kmp_invoke_task_func(
int gtid) {
7289 int tid = __kmp_tid_from_gtid(gtid);
7290 kmp_info_t *this_thr = __kmp_threads[gtid];
7291 kmp_team_t *team = this_thr->th.th_team;
7293 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7295 if (__itt_stack_caller_create_ptr) {
7297 if (team->t.t_stack_id != NULL) {
7298 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7300 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7301 __kmp_itt_stack_callee_enter(
7302 (__itt_caller)team->t.t_parent->t.t_stack_id);
7306 #if INCLUDE_SSC_MARKS
7307 SSC_MARK_INVOKING();
7312 void **exit_frame_p;
7313 ompt_data_t *my_task_data;
7314 ompt_data_t *my_parallel_data;
7317 if (ompt_enabled.enabled) {
7318 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7319 .ompt_task_info.frame.exit_frame.ptr);
7321 exit_frame_p = &dummy;
7325 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7326 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7327 if (ompt_enabled.ompt_callback_implicit_task) {
7328 ompt_team_size = team->t.t_nproc;
7329 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7330 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7331 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7332 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7336 #if KMP_STATS_ENABLED
7338 if (previous_state == stats_state_e::TEAMS_REGION) {
7339 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7341 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7343 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7346 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7347 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7354 *exit_frame_p = NULL;
7355 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7358 #if KMP_STATS_ENABLED
7359 if (previous_state == stats_state_e::TEAMS_REGION) {
7360 KMP_SET_THREAD_STATE(previous_state);
7362 KMP_POP_PARTITIONED_TIMER();
7366 if (__itt_stack_caller_create_ptr) {
7368 if (team->t.t_stack_id != NULL) {
7369 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7371 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7372 __kmp_itt_stack_callee_leave(
7373 (__itt_caller)team->t.t_parent->t.t_stack_id);
7377 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7382 void __kmp_teams_master(
int gtid) {
7384 kmp_info_t *thr = __kmp_threads[gtid];
7385 kmp_team_t *team = thr->th.th_team;
7386 ident_t *loc = team->t.t_ident;
7387 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7388 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7389 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7390 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7391 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7394 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7397 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7398 tmp->cg_nthreads = 1;
7399 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7400 " cg_nthreads to 1\n",
7402 tmp->up = thr->th.th_cg_roots;
7403 thr->th.th_cg_roots = tmp;
7407 #if INCLUDE_SSC_MARKS
7410 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7411 (microtask_t)thr->th.th_teams_microtask,
7412 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7413 #if INCLUDE_SSC_MARKS
7417 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7418 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7421 __kmp_join_call(loc, gtid
7430 int __kmp_invoke_teams_master(
int gtid) {
7431 kmp_info_t *this_thr = __kmp_threads[gtid];
7432 kmp_team_t *team = this_thr->th.th_team;
7434 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7435 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7436 (
void *)__kmp_teams_master);
7438 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7440 int tid = __kmp_tid_from_gtid(gtid);
7441 ompt_data_t *task_data =
7442 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7443 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7444 if (ompt_enabled.ompt_callback_implicit_task) {
7445 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7446 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7448 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7451 __kmp_teams_master(gtid);
7453 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7455 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7464 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7465 kmp_info_t *thr = __kmp_threads[gtid];
7467 if (num_threads > 0)
7468 thr->th.th_set_nproc = num_threads;
7471 static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7473 KMP_DEBUG_ASSERT(thr);
7475 if (!TCR_4(__kmp_init_middle))
7476 __kmp_middle_initialize();
7477 __kmp_assign_root_init_mask();
7478 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7479 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7481 if (num_threads == 0) {
7482 if (__kmp_teams_thread_limit > 0) {
7483 num_threads = __kmp_teams_thread_limit;
7485 num_threads = __kmp_avail_proc / num_teams;
7490 if (num_threads > __kmp_dflt_team_nth) {
7491 num_threads = __kmp_dflt_team_nth;
7493 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7494 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7496 if (num_teams * num_threads > __kmp_teams_max_nth) {
7497 num_threads = __kmp_teams_max_nth / num_teams;
7499 if (num_threads == 0) {
7505 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7507 if (num_threads > __kmp_dflt_team_nth) {
7508 num_threads = __kmp_dflt_team_nth;
7510 if (num_teams * num_threads > __kmp_teams_max_nth) {
7511 int new_threads = __kmp_teams_max_nth / num_teams;
7512 if (new_threads == 0) {
7515 if (new_threads != num_threads) {
7516 if (!__kmp_reserve_warn) {
7517 __kmp_reserve_warn = 1;
7518 __kmp_msg(kmp_ms_warning,
7519 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7520 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7523 num_threads = new_threads;
7526 thr->th.th_teams_size.nth = num_threads;
7531 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7533 kmp_info_t *thr = __kmp_threads[gtid];
7534 KMP_DEBUG_ASSERT(num_teams >= 0);
7535 KMP_DEBUG_ASSERT(num_threads >= 0);
7537 if (num_teams == 0) {
7538 if (__kmp_nteams > 0) {
7539 num_teams = __kmp_nteams;
7544 if (num_teams > __kmp_teams_max_nth) {
7545 if (!__kmp_reserve_warn) {
7546 __kmp_reserve_warn = 1;
7547 __kmp_msg(kmp_ms_warning,
7548 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7549 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7551 num_teams = __kmp_teams_max_nth;
7555 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7557 __kmp_push_thread_limit(thr, num_teams, num_threads);
7562 void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7563 int num_teams_ub,
int num_threads) {
7564 kmp_info_t *thr = __kmp_threads[gtid];
7565 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7566 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7567 KMP_DEBUG_ASSERT(num_threads >= 0);
7569 if (num_teams_lb > num_teams_ub) {
7570 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7571 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7576 if (num_teams_lb == 0 && num_teams_ub > 0)
7577 num_teams_lb = num_teams_ub;
7579 if (num_teams_lb == 0 && num_teams_ub == 0) {
7580 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7581 if (num_teams > __kmp_teams_max_nth) {
7582 if (!__kmp_reserve_warn) {
7583 __kmp_reserve_warn = 1;
7584 __kmp_msg(kmp_ms_warning,
7585 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7586 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7588 num_teams = __kmp_teams_max_nth;
7590 }
else if (num_teams_lb == num_teams_ub) {
7591 num_teams = num_teams_ub;
7593 if (num_threads == 0) {
7594 if (num_teams_ub > __kmp_teams_max_nth) {
7595 num_teams = num_teams_lb;
7597 num_teams = num_teams_ub;
7600 num_teams = (num_threads > __kmp_teams_max_nth)
7602 : __kmp_teams_max_nth / num_threads;
7603 if (num_teams < num_teams_lb) {
7604 num_teams = num_teams_lb;
7605 }
else if (num_teams > num_teams_ub) {
7606 num_teams = num_teams_ub;
7612 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7614 __kmp_push_thread_limit(thr, num_teams, num_threads);
7618 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7619 kmp_info_t *thr = __kmp_threads[gtid];
7620 thr->th.th_set_proc_bind = proc_bind;
7625 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7626 kmp_info_t *this_thr = __kmp_threads[gtid];
7632 KMP_DEBUG_ASSERT(team);
7633 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7634 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7637 team->t.t_construct = 0;
7638 team->t.t_ordered.dt.t_value =
7642 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7643 if (team->t.t_max_nproc > 1) {
7645 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7646 team->t.t_disp_buffer[i].buffer_index = i;
7647 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7650 team->t.t_disp_buffer[0].buffer_index = 0;
7651 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7655 KMP_ASSERT(this_thr->th.th_team == team);
7658 for (f = 0; f < team->t.t_nproc; f++) {
7659 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7660 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7665 __kmp_fork_barrier(gtid, 0);
7668 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7669 kmp_info_t *this_thr = __kmp_threads[gtid];
7671 KMP_DEBUG_ASSERT(team);
7672 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7673 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7679 if (__kmp_threads[gtid] &&
7680 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7681 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7682 __kmp_threads[gtid]);
7683 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7684 "team->t.t_nproc=%d\n",
7685 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7687 __kmp_print_structure();
7689 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7690 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7693 __kmp_join_barrier(gtid);
7695 if (ompt_enabled.enabled &&
7696 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7697 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7698 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7699 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7701 void *codeptr = NULL;
7702 if (KMP_MASTER_TID(ds_tid) &&
7703 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7704 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7705 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7707 if (ompt_enabled.ompt_callback_sync_region_wait) {
7708 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7709 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7712 if (ompt_enabled.ompt_callback_sync_region) {
7713 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7714 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7718 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7719 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7720 ompt_scope_end, NULL, task_data, 0, ds_tid,
7721 ompt_task_implicit);
7727 KMP_ASSERT(this_thr->th.th_team == team);
7732 #ifdef USE_LOAD_BALANCE
7736 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7739 kmp_team_t *hot_team;
7741 if (root->r.r_active) {
7744 hot_team = root->r.r_hot_team;
7745 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7746 return hot_team->t.t_nproc - 1;
7751 for (i = 1; i < hot_team->t.t_nproc; i++) {
7752 if (hot_team->t.t_threads[i]->th.th_active) {
7761 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7764 int hot_team_active;
7765 int team_curr_active;
7768 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7770 KMP_DEBUG_ASSERT(root);
7771 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7772 ->th.th_current_task->td_icvs.dynamic == TRUE);
7773 KMP_DEBUG_ASSERT(set_nproc > 1);
7775 if (set_nproc == 1) {
7776 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7785 pool_active = __kmp_thread_pool_active_nth;
7786 hot_team_active = __kmp_active_hot_team_nproc(root);
7787 team_curr_active = pool_active + hot_team_active + 1;
7790 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7791 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
7792 "hot team active = %d\n",
7793 system_active, pool_active, hot_team_active));
7795 if (system_active < 0) {
7799 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7800 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7803 retval = __kmp_avail_proc - __kmp_nth +
7804 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7805 if (retval > set_nproc) {
7808 if (retval < KMP_MIN_NTH) {
7809 retval = KMP_MIN_NTH;
7812 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7820 if (system_active < team_curr_active) {
7821 system_active = team_curr_active;
7823 retval = __kmp_avail_proc - system_active + team_curr_active;
7824 if (retval > set_nproc) {
7827 if (retval < KMP_MIN_NTH) {
7828 retval = KMP_MIN_NTH;
7831 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7840 void __kmp_cleanup(
void) {
7843 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7845 if (TCR_4(__kmp_init_parallel)) {
7846 #if KMP_HANDLE_SIGNALS
7847 __kmp_remove_signals();
7849 TCW_4(__kmp_init_parallel, FALSE);
7852 if (TCR_4(__kmp_init_middle)) {
7853 #if KMP_AFFINITY_SUPPORTED
7854 __kmp_affinity_uninitialize();
7856 __kmp_cleanup_hierarchy();
7857 TCW_4(__kmp_init_middle, FALSE);
7860 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7862 if (__kmp_init_serial) {
7863 __kmp_runtime_destroy();
7864 __kmp_init_serial = FALSE;
7867 __kmp_cleanup_threadprivate_caches();
7869 for (f = 0; f < __kmp_threads_capacity; f++) {
7870 if (__kmp_root[f] != NULL) {
7871 __kmp_free(__kmp_root[f]);
7872 __kmp_root[f] = NULL;
7875 __kmp_free(__kmp_threads);
7878 __kmp_threads = NULL;
7880 __kmp_threads_capacity = 0;
7882 #if KMP_USE_DYNAMIC_LOCK
7883 __kmp_cleanup_indirect_user_locks();
7885 __kmp_cleanup_user_locks();
7889 __kmp_free(ompd_env_block);
7890 ompd_env_block = NULL;
7891 ompd_env_block_size = 0;
7895 #if KMP_AFFINITY_SUPPORTED
7896 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7897 __kmp_cpuinfo_file = NULL;
7900 #if KMP_USE_ADAPTIVE_LOCKS
7901 #if KMP_DEBUG_ADAPTIVE_LOCKS
7902 __kmp_print_speculative_stats();
7905 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7906 __kmp_nested_nth.nth = NULL;
7907 __kmp_nested_nth.size = 0;
7908 __kmp_nested_nth.used = 0;
7909 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7910 __kmp_nested_proc_bind.bind_types = NULL;
7911 __kmp_nested_proc_bind.size = 0;
7912 __kmp_nested_proc_bind.used = 0;
7913 if (__kmp_affinity_format) {
7914 KMP_INTERNAL_FREE(__kmp_affinity_format);
7915 __kmp_affinity_format = NULL;
7918 __kmp_i18n_catclose();
7920 #if KMP_USE_HIER_SCHED
7921 __kmp_hier_scheds.deallocate();
7924 #if KMP_STATS_ENABLED
7928 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7933 int __kmp_ignore_mppbeg(
void) {
7936 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7937 if (__kmp_str_match_false(env))
7944 int __kmp_ignore_mppend(
void) {
7947 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7948 if (__kmp_str_match_false(env))
7955 void __kmp_internal_begin(
void) {
7961 gtid = __kmp_entry_gtid();
7962 root = __kmp_threads[gtid]->th.th_root;
7963 KMP_ASSERT(KMP_UBER_GTID(gtid));
7965 if (root->r.r_begin)
7967 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7968 if (root->r.r_begin) {
7969 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7973 root->r.r_begin = TRUE;
7975 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7980 void __kmp_user_set_library(
enum library_type arg) {
7987 gtid = __kmp_entry_gtid();
7988 thread = __kmp_threads[gtid];
7990 root = thread->th.th_root;
7992 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7994 if (root->r.r_in_parallel) {
7996 KMP_WARNING(SetLibraryIncorrectCall);
8001 case library_serial:
8002 thread->th.th_set_nproc = 0;
8003 set__nproc(thread, 1);
8005 case library_turnaround:
8006 thread->th.th_set_nproc = 0;
8007 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8008 : __kmp_dflt_team_nth_ub);
8010 case library_throughput:
8011 thread->th.th_set_nproc = 0;
8012 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8013 : __kmp_dflt_team_nth_ub);
8016 KMP_FATAL(UnknownLibraryType, arg);
8019 __kmp_aux_set_library(arg);
8022 void __kmp_aux_set_stacksize(
size_t arg) {
8023 if (!__kmp_init_serial)
8024 __kmp_serial_initialize();
8027 if (arg & (0x1000 - 1)) {
8028 arg &= ~(0x1000 - 1);
8033 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8036 if (!TCR_4(__kmp_init_parallel)) {
8039 if (value < __kmp_sys_min_stksize)
8040 value = __kmp_sys_min_stksize;
8041 else if (value > KMP_MAX_STKSIZE)
8042 value = KMP_MAX_STKSIZE;
8044 __kmp_stksize = value;
8046 __kmp_env_stksize = TRUE;
8049 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8054 void __kmp_aux_set_library(
enum library_type arg) {
8055 __kmp_library = arg;
8057 switch (__kmp_library) {
8058 case library_serial: {
8059 KMP_INFORM(LibraryIsSerial);
8061 case library_turnaround:
8062 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8063 __kmp_use_yield = 2;
8065 case library_throughput:
8066 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8067 __kmp_dflt_blocktime = 200;
8070 KMP_FATAL(UnknownLibraryType, arg);
8076 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8077 kmp_info_t *thr = __kmp_entry_thread();
8078 teams_serialized = 0;
8079 if (thr->th.th_teams_microtask) {
8080 kmp_team_t *team = thr->th.th_team;
8081 int tlevel = thr->th.th_teams_level;
8082 int ii = team->t.t_level;
8083 teams_serialized = team->t.t_serialized;
8084 int level = tlevel + 1;
8085 KMP_DEBUG_ASSERT(ii >= tlevel);
8086 while (ii > level) {
8087 for (teams_serialized = team->t.t_serialized;
8088 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8090 if (team->t.t_serialized && (!teams_serialized)) {
8091 team = team->t.t_parent;
8095 team = team->t.t_parent;
8104 int __kmp_aux_get_team_num() {
8106 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8108 if (serialized > 1) {
8111 return team->t.t_master_tid;
8117 int __kmp_aux_get_num_teams() {
8119 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8121 if (serialized > 1) {
8124 return team->t.t_parent->t.t_nproc;
8163 typedef struct kmp_affinity_format_field_t {
8165 const char *long_name;
8168 } kmp_affinity_format_field_t;
8170 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8171 #if KMP_AFFINITY_SUPPORTED
8172 {
'A',
"thread_affinity",
's'},
8174 {
't',
"team_num",
'd'},
8175 {
'T',
"num_teams",
'd'},
8176 {
'L',
"nesting_level",
'd'},
8177 {
'n',
"thread_num",
'd'},
8178 {
'N',
"num_threads",
'd'},
8179 {
'a',
"ancestor_tnum",
'd'},
8181 {
'P',
"process_id",
'd'},
8182 {
'i',
"native_thread_id",
'd'}};
8185 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8187 kmp_str_buf_t *field_buffer) {
8188 int rc, format_index, field_value;
8189 const char *width_left, *width_right;
8190 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8191 static const int FORMAT_SIZE = 20;
8192 char format[FORMAT_SIZE] = {0};
8193 char absolute_short_name = 0;
8195 KMP_DEBUG_ASSERT(gtid >= 0);
8196 KMP_DEBUG_ASSERT(th);
8197 KMP_DEBUG_ASSERT(**ptr ==
'%');
8198 KMP_DEBUG_ASSERT(field_buffer);
8200 __kmp_str_buf_clear(field_buffer);
8207 __kmp_str_buf_cat(field_buffer,
"%", 1);
8218 right_justify =
false;
8220 right_justify =
true;
8224 width_left = width_right = NULL;
8225 if (**ptr >=
'0' && **ptr <=
'9') {
8233 format[format_index++] =
'%';
8235 format[format_index++] =
'-';
8237 format[format_index++] =
'0';
8238 if (width_left && width_right) {
8242 while (i < 8 && width_left < width_right) {
8243 format[format_index++] = *width_left;
8251 found_valid_name =
false;
8252 parse_long_name = (**ptr ==
'{');
8253 if (parse_long_name)
8255 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8256 sizeof(__kmp_affinity_format_table[0]);
8258 char short_name = __kmp_affinity_format_table[i].short_name;
8259 const char *long_name = __kmp_affinity_format_table[i].long_name;
8260 char field_format = __kmp_affinity_format_table[i].field_format;
8261 if (parse_long_name) {
8262 size_t length = KMP_STRLEN(long_name);
8263 if (strncmp(*ptr, long_name, length) == 0) {
8264 found_valid_name =
true;
8267 }
else if (**ptr == short_name) {
8268 found_valid_name =
true;
8271 if (found_valid_name) {
8272 format[format_index++] = field_format;
8273 format[format_index++] =
'\0';
8274 absolute_short_name = short_name;
8278 if (parse_long_name) {
8280 absolute_short_name = 0;
8288 switch (absolute_short_name) {
8290 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8293 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8296 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8299 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8302 static const int BUFFER_SIZE = 256;
8303 char buf[BUFFER_SIZE];
8304 __kmp_expand_host_name(buf, BUFFER_SIZE);
8305 rc = __kmp_str_buf_print(field_buffer, format, buf);
8308 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8311 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8314 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8318 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8319 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8321 #if KMP_AFFINITY_SUPPORTED
8324 __kmp_str_buf_init(&buf);
8325 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8326 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8327 __kmp_str_buf_free(&buf);
8333 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8335 if (parse_long_name) {
8344 KMP_ASSERT(format_index <= FORMAT_SIZE);
8354 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8355 kmp_str_buf_t *buffer) {
8356 const char *parse_ptr;
8358 const kmp_info_t *th;
8359 kmp_str_buf_t field;
8361 KMP_DEBUG_ASSERT(buffer);
8362 KMP_DEBUG_ASSERT(gtid >= 0);
8364 __kmp_str_buf_init(&field);
8365 __kmp_str_buf_clear(buffer);
8367 th = __kmp_threads[gtid];
8373 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8374 parse_ptr = __kmp_affinity_format;
8376 KMP_DEBUG_ASSERT(parse_ptr);
8378 while (*parse_ptr !=
'\0') {
8380 if (*parse_ptr ==
'%') {
8382 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8383 __kmp_str_buf_catbuf(buffer, &field);
8387 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8392 __kmp_str_buf_free(&field);
8397 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8399 __kmp_str_buf_init(&buf);
8400 __kmp_aux_capture_affinity(gtid, format, &buf);
8401 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8402 __kmp_str_buf_free(&buf);
8407 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8408 int blocktime = arg;
8414 __kmp_save_internal_controls(thread);
8417 if (blocktime < KMP_MIN_BLOCKTIME)
8418 blocktime = KMP_MIN_BLOCKTIME;
8419 else if (blocktime > KMP_MAX_BLOCKTIME)
8420 blocktime = KMP_MAX_BLOCKTIME;
8422 set__blocktime_team(thread->th.th_team, tid, blocktime);
8423 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8427 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8429 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8430 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8436 set__bt_set_team(thread->th.th_team, tid, bt_set);
8437 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8439 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8440 "bt_intervals=%d, monitor_updates=%d\n",
8441 __kmp_gtid_from_tid(tid, thread->th.th_team),
8442 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8443 __kmp_monitor_wakeups));
8445 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8446 __kmp_gtid_from_tid(tid, thread->th.th_team),
8447 thread->th.th_team->t.t_id, tid, blocktime));
8451 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8452 if (!__kmp_init_serial) {
8453 __kmp_serial_initialize();
8455 __kmp_env_initialize(str);
8457 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8465 PACKED_REDUCTION_METHOD_T
8466 __kmp_determine_reduction_method(
8467 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8468 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8469 kmp_critical_name *lck) {
8480 PACKED_REDUCTION_METHOD_T retval;
8484 KMP_DEBUG_ASSERT(loc);
8485 KMP_DEBUG_ASSERT(lck);
8487 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8488 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8489 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8491 retval = critical_reduce_block;
8494 team_size = __kmp_get_team_num_threads(global_tid);
8495 if (team_size == 1) {
8497 retval = empty_reduce_block;
8501 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8503 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8504 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
8506 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8507 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8509 int teamsize_cutoff = 4;
8511 #if KMP_MIC_SUPPORTED
8512 if (__kmp_mic_type != non_mic) {
8513 teamsize_cutoff = 8;
8516 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8517 if (tree_available) {
8518 if (team_size <= teamsize_cutoff) {
8519 if (atomic_available) {
8520 retval = atomic_reduce_block;
8523 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8525 }
else if (atomic_available) {
8526 retval = atomic_reduce_block;
8529 #error "Unknown or unsupported OS"
8533 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8535 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8539 if (atomic_available) {
8540 if (num_vars <= 2) {
8541 retval = atomic_reduce_block;
8547 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8548 if (atomic_available && (num_vars <= 3)) {
8549 retval = atomic_reduce_block;
8550 }
else if (tree_available) {
8551 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8552 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8553 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8558 #error "Unknown or unsupported OS"
8562 #error "Unknown or unsupported architecture"
8570 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8573 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8575 int atomic_available, tree_available;
8577 switch ((forced_retval = __kmp_force_reduction_method)) {
8578 case critical_reduce_block:
8582 case atomic_reduce_block:
8583 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8584 if (!atomic_available) {
8585 KMP_WARNING(RedMethodNotSupported,
"atomic");
8586 forced_retval = critical_reduce_block;
8590 case tree_reduce_block:
8591 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8592 if (!tree_available) {
8593 KMP_WARNING(RedMethodNotSupported,
"tree");
8594 forced_retval = critical_reduce_block;
8596 #if KMP_FAST_REDUCTION_BARRIER
8597 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8606 retval = forced_retval;
8609 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8611 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8612 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8617 kmp_int32 __kmp_get_reduce_method(
void) {
8618 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8623 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8627 void __kmp_hard_pause() {
8628 __kmp_pause_status = kmp_hard_paused;
8629 __kmp_internal_end_thread(-1);
8633 void __kmp_resume_if_soft_paused() {
8634 if (__kmp_pause_status == kmp_soft_paused) {
8635 __kmp_pause_status = kmp_not_paused;
8637 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8638 kmp_info_t *thread = __kmp_threads[gtid];
8640 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8642 if (fl.is_sleeping())
8644 else if (__kmp_try_suspend_mx(thread)) {
8645 __kmp_unlock_suspend_mx(thread);
8648 if (fl.is_sleeping()) {
8651 }
else if (__kmp_try_suspend_mx(thread)) {
8652 __kmp_unlock_suspend_mx(thread);
8664 int __kmp_pause_resource(kmp_pause_status_t level) {
8665 if (level == kmp_not_paused) {
8666 if (__kmp_pause_status == kmp_not_paused) {
8670 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8671 __kmp_pause_status == kmp_hard_paused);
8672 __kmp_pause_status = kmp_not_paused;
8675 }
else if (level == kmp_soft_paused) {
8676 if (__kmp_pause_status != kmp_not_paused) {
8683 }
else if (level == kmp_hard_paused) {
8684 if (__kmp_pause_status != kmp_not_paused) {
8697 void __kmp_omp_display_env(
int verbose) {
8698 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8699 if (__kmp_init_serial == 0)
8700 __kmp_do_serial_initialize();
8701 __kmp_display_env_impl(!verbose, verbose);
8702 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8706 kmp_info_t **__kmp_hidden_helper_threads;
8707 kmp_info_t *__kmp_hidden_helper_main_thread;
8708 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
8710 kmp_int32 __kmp_hidden_helper_threads_num = 8;
8711 kmp_int32 __kmp_enable_hidden_helper = TRUE;
8713 kmp_int32 __kmp_hidden_helper_threads_num = 0;
8714 kmp_int32 __kmp_enable_hidden_helper = FALSE;
8718 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
8720 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
8725 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
8726 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
8727 __kmp_hidden_helper_threads_num)
8733 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
8734 __kmp_hidden_helper_initz_release();
8735 __kmp_hidden_helper_main_thread_wait();
8737 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
8738 __kmp_hidden_helper_worker_thread_signal();
8744 void __kmp_hidden_helper_threads_initz_routine() {
8746 const int gtid = __kmp_register_root(TRUE);
8747 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
8748 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
8749 __kmp_hidden_helper_main_thread->th.th_set_nproc =
8750 __kmp_hidden_helper_threads_num;
8752 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
8757 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
8759 __kmp_hidden_helper_threads_deinitz_release();
8779 void __kmp_init_nesting_mode() {
8780 int levels = KMP_HW_LAST;
8781 __kmp_nesting_mode_nlevels = levels;
8782 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
8783 for (
int i = 0; i < levels; ++i)
8784 __kmp_nesting_nth_level[i] = 0;
8785 if (__kmp_nested_nth.size < levels) {
8786 __kmp_nested_nth.nth =
8787 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
8788 __kmp_nested_nth.size = levels;
8793 void __kmp_set_nesting_mode_threads() {
8794 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
8796 if (__kmp_nesting_mode == 1)
8797 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
8798 else if (__kmp_nesting_mode > 1)
8799 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
8801 if (__kmp_topology) {
8803 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
8804 loc < __kmp_nesting_mode_nlevels;
8805 loc++, hw_level++) {
8806 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
8807 if (__kmp_nesting_nth_level[loc] == 1)
8811 if (__kmp_nesting_mode > 1 && loc > 1) {
8812 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
8813 int num_cores = __kmp_topology->get_count(core_level);
8814 int upper_levels = 1;
8815 for (
int level = 0; level < loc - 1; ++level)
8816 upper_levels *= __kmp_nesting_nth_level[level];
8817 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
8818 __kmp_nesting_nth_level[loc - 1] =
8819 num_cores / __kmp_nesting_nth_level[loc - 2];
8821 __kmp_nesting_mode_nlevels = loc;
8822 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
8824 if (__kmp_avail_proc >= 4) {
8825 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
8826 __kmp_nesting_nth_level[1] = 2;
8827 __kmp_nesting_mode_nlevels = 2;
8829 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
8830 __kmp_nesting_mode_nlevels = 1;
8832 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
8834 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
8835 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
8837 set__nproc(thread, __kmp_nesting_nth_level[0]);
8838 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
8839 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
8840 if (get__max_active_levels(thread) > 1) {
8842 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
8844 if (__kmp_nesting_mode == 1)
8845 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)