16 #include "kmp_error.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
23 #define MAX_MESSAGE 512
39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
44 }
else if (__kmp_ignore_mppbeg() == FALSE) {
46 __kmp_internal_begin();
47 KC_TRACE(10, (
"__kmpc_begin: called\n"));
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, (
"__kmpc_end: called\n"));
67 KA_TRACE(30, (
"__kmpc_end\n"));
69 __kmp_internal_end_thread(-1);
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
100 kmp_int32 gtid = __kmp_entry_gtid();
102 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
123 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125 return TCR_4(__kmp_all_nth);
135 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
145 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
167 if (__kmp_par_range == 0) {
174 semi2 = strchr(semi2,
';');
178 semi2 = strchr(semi2 + 1,
';');
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
187 if ((*name ==
'/') || (*name ==
';')) {
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
194 semi3 = strchr(semi2 + 1,
';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
201 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
205 return __kmp_par_range < 0;
219 return __kmp_entry_thread()->th.th_root->r.r_active;
232 kmp_int32 num_threads) {
233 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
239 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
244 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
263 int gtid = __kmp_entry_gtid();
265 #if (KMP_STATS_ENABLED)
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
285 va_start(ap, microtask);
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 kmp_team_t *parent_team = master_th->th.th_team;
292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
294 ompt_frame = &(lwt->ompt_task_info.frame);
296 int tid = __kmp_tid_from_gtid(gtid);
298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
302 OMPT_STORE_RETURN_ADDRESS(gtid);
305 #if INCLUDE_SSC_MARKS
308 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
309 VOLATILE_CAST(microtask_t) microtask,
310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
312 #if INCLUDE_SSC_MARKS
315 __kmp_join_call(loc, gtid
325 #if KMP_STATS_ENABLED
326 if (previous_state == stats_state_e::SERIAL_REGION) {
327 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
328 KMP_SET_THREAD_STATE(previous_state);
330 KMP_POP_PARTITIONED_TIMER();
347 kmp_int32 num_teams, kmp_int32 num_threads) {
349 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
350 global_tid, num_teams, num_threads));
351 __kmp_assert_valid_gtid(global_tid);
352 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
372 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
373 kmp_int32 num_threads) {
374 KA_TRACE(20, (
"__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
375 " num_teams_ub=%d num_threads=%d\n",
376 global_tid, num_teams_lb, num_teams_ub, num_threads));
377 __kmp_assert_valid_gtid(global_tid);
378 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
394 int gtid = __kmp_entry_gtid();
395 kmp_info_t *this_thr = __kmp_threads[gtid];
397 va_start(ap, microtask);
399 #if KMP_STATS_ENABLED
402 if (previous_state == stats_state_e::SERIAL_REGION) {
403 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
405 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
410 this_thr->th.th_teams_microtask = microtask;
411 this_thr->th.th_teams_level =
412 this_thr->th.th_team->t.t_level;
415 kmp_team_t *parent_team = this_thr->th.th_team;
416 int tid = __kmp_tid_from_gtid(gtid);
417 if (ompt_enabled.enabled) {
418 parent_team->t.t_implicit_task_taskdata[tid]
419 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
421 OMPT_STORE_RETURN_ADDRESS(gtid);
426 if (this_thr->th.th_teams_size.nteams == 0) {
427 __kmp_push_num_teams(loc, gtid, 0, 0);
429 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
431 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
434 loc, gtid, fork_context_intel, argc,
435 VOLATILE_CAST(microtask_t) __kmp_teams_master,
436 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
437 __kmp_join_call(loc, gtid
445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
447 this_thr->th.th_cg_roots = tmp->up;
448 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
449 " to node %p. cg_nthreads was %d\n",
450 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
451 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
452 int i = tmp->cg_nthreads--;
457 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
458 this_thr->th.th_current_task->td_icvs.thread_limit =
459 this_thr->th.th_cg_roots->cg_thread_limit;
461 this_thr->th.th_teams_microtask = NULL;
462 this_thr->th.th_teams_level = 0;
463 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
465 #if KMP_STATS_ENABLED
466 if (previous_state == stats_state_e::SERIAL_REGION) {
467 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
468 KMP_SET_THREAD_STATE(previous_state);
470 KMP_POP_PARTITIONED_TIMER();
479 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
497 __kmp_assert_valid_gtid(global_tid);
499 OMPT_STORE_RETURN_ADDRESS(global_tid);
501 __kmp_serialized_parallel(loc, global_tid);
512 kmp_internal_control_t *top;
513 kmp_info_t *this_thr;
514 kmp_team_t *serial_team;
517 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
525 __kmp_assert_valid_gtid(global_tid);
526 if (!TCR_4(__kmp_init_parallel))
527 __kmp_parallel_initialize();
529 __kmp_resume_if_soft_paused();
531 this_thr = __kmp_threads[global_tid];
532 serial_team = this_thr->th.th_serial_team;
534 kmp_task_team_t *task_team = this_thr->th.th_task_team;
536 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
537 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
540 KMP_DEBUG_ASSERT(serial_team);
541 KMP_ASSERT(serial_team->t.t_serialized);
542 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
543 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
545 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
548 if (ompt_enabled.enabled &&
549 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
550 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
551 if (ompt_enabled.ompt_callback_implicit_task) {
552 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
553 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
554 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
558 ompt_data_t *parent_task_data;
559 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
561 if (ompt_enabled.ompt_callback_parallel_end) {
562 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
563 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
564 ompt_parallel_invoker_program | ompt_parallel_team,
565 OMPT_LOAD_RETURN_ADDRESS(global_tid));
567 __ompt_lw_taskteam_unlink(this_thr);
568 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
574 top = serial_team->t.t_control_stack_top;
575 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
576 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
577 serial_team->t.t_control_stack_top = top->next;
582 serial_team->t.t_level--;
585 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
587 dispatch_private_info_t *disp_buffer =
588 serial_team->t.t_dispatch->th_disp_buffer;
589 serial_team->t.t_dispatch->th_disp_buffer =
590 serial_team->t.t_dispatch->th_disp_buffer->next;
591 __kmp_free(disp_buffer);
593 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
595 --serial_team->t.t_serialized;
596 if (serial_team->t.t_serialized == 0) {
600 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
601 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
602 __kmp_clear_x87_fpu_status_word();
603 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
604 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
609 if (ompd_state & OMPD_ENABLE_BP)
610 ompd_bp_parallel_end();
613 this_thr->th.th_team = serial_team->t.t_parent;
614 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
617 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
618 this_thr->th.th_team_master =
619 serial_team->t.t_parent->t.t_threads[0];
620 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
623 this_thr->th.th_dispatch =
624 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
626 __kmp_pop_current_task_from_thread(this_thr);
628 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
629 this_thr->th.th_current_task->td_flags.executing = 1;
631 if (__kmp_tasking_mode != tskm_immediate_exec) {
633 this_thr->th.th_task_team =
634 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
636 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
638 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
641 if (__kmp_tasking_mode != tskm_immediate_exec) {
642 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
643 "depth of serial team %p to %d\n",
644 global_tid, serial_team, serial_team->t.t_serialized));
648 if (__kmp_env_consistency_check)
649 __kmp_pop_parallel(global_tid, NULL);
651 if (ompt_enabled.enabled)
652 this_thr->th.ompt_thread_info.state =
653 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
654 : ompt_state_work_parallel);
667 KC_TRACE(10, (
"__kmpc_flush: called\n"));
672 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
686 if (!__kmp_cpuinfo.initialized) {
687 __kmp_query_cpuid(&__kmp_cpuinfo);
689 if (!__kmp_cpuinfo.sse2) {
694 #elif KMP_COMPILER_MSVC
697 __sync_synchronize();
701 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
707 #error Unknown or unsupported architecture
710 #if OMPT_SUPPORT && OMPT_OPTIONAL
711 if (ompt_enabled.ompt_callback_flush) {
712 ompt_callbacks.ompt_callback(ompt_callback_flush)(
713 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
728 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
729 __kmp_assert_valid_gtid(global_tid);
731 if (!TCR_4(__kmp_init_parallel))
732 __kmp_parallel_initialize();
734 __kmp_resume_if_soft_paused();
736 if (__kmp_env_consistency_check) {
738 KMP_WARNING(ConstructIdentInvalid);
740 __kmp_check_barrier(global_tid, ct_barrier, loc);
744 ompt_frame_t *ompt_frame;
745 if (ompt_enabled.enabled) {
746 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
747 if (ompt_frame->enter_frame.ptr == NULL)
748 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
750 OMPT_STORE_RETURN_ADDRESS(global_tid);
752 __kmp_threads[global_tid]->th.th_ident = loc;
760 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
761 #if OMPT_SUPPORT && OMPT_OPTIONAL
762 if (ompt_enabled.enabled) {
763 ompt_frame->enter_frame = ompt_data_none;
778 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
779 __kmp_assert_valid_gtid(global_tid);
781 if (!TCR_4(__kmp_init_parallel))
782 __kmp_parallel_initialize();
784 __kmp_resume_if_soft_paused();
786 if (KMP_MASTER_GTID(global_tid)) {
788 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
792 #if OMPT_SUPPORT && OMPT_OPTIONAL
794 if (ompt_enabled.ompt_callback_masked) {
795 kmp_info_t *this_thr = __kmp_threads[global_tid];
796 kmp_team_t *team = this_thr->th.th_team;
798 int tid = __kmp_tid_from_gtid(global_tid);
799 ompt_callbacks.ompt_callback(ompt_callback_masked)(
800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
802 OMPT_GET_RETURN_ADDRESS(0));
807 if (__kmp_env_consistency_check) {
808 #if KMP_USE_DYNAMIC_LOCK
810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
815 __kmp_push_sync(global_tid, ct_master, loc, NULL);
817 __kmp_check_sync(global_tid, ct_master, loc, NULL);
833 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
834 __kmp_assert_valid_gtid(global_tid);
835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
836 KMP_POP_PARTITIONED_TIMER();
838 #if OMPT_SUPPORT && OMPT_OPTIONAL
839 kmp_info_t *this_thr = __kmp_threads[global_tid];
840 kmp_team_t *team = this_thr->th.th_team;
841 if (ompt_enabled.ompt_callback_masked) {
842 int tid = __kmp_tid_from_gtid(global_tid);
843 ompt_callbacks.ompt_callback(ompt_callback_masked)(
844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
846 OMPT_GET_RETURN_ADDRESS(0));
850 if (__kmp_env_consistency_check) {
851 if (KMP_MASTER_GTID(global_tid))
852 __kmp_pop_sync(global_tid, ct_master, loc);
867 KC_TRACE(10, (
"__kmpc_masked: called T#%d\n", global_tid));
868 __kmp_assert_valid_gtid(global_tid);
870 if (!TCR_4(__kmp_init_parallel))
871 __kmp_parallel_initialize();
873 __kmp_resume_if_soft_paused();
875 tid = __kmp_tid_from_gtid(global_tid);
878 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
882 #if OMPT_SUPPORT && OMPT_OPTIONAL
884 if (ompt_enabled.ompt_callback_masked) {
885 kmp_info_t *this_thr = __kmp_threads[global_tid];
886 kmp_team_t *team = this_thr->th.th_team;
887 ompt_callbacks.ompt_callback(ompt_callback_masked)(
888 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
889 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
890 OMPT_GET_RETURN_ADDRESS(0));
895 if (__kmp_env_consistency_check) {
896 #if KMP_USE_DYNAMIC_LOCK
898 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
900 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
903 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
905 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
921 KC_TRACE(10, (
"__kmpc_end_masked: called T#%d\n", global_tid));
922 __kmp_assert_valid_gtid(global_tid);
923 KMP_POP_PARTITIONED_TIMER();
925 #if OMPT_SUPPORT && OMPT_OPTIONAL
926 kmp_info_t *this_thr = __kmp_threads[global_tid];
927 kmp_team_t *team = this_thr->th.th_team;
928 if (ompt_enabled.ompt_callback_masked) {
929 int tid = __kmp_tid_from_gtid(global_tid);
930 ompt_callbacks.ompt_callback(ompt_callback_masked)(
931 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
932 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
933 OMPT_GET_RETURN_ADDRESS(0));
937 if (__kmp_env_consistency_check) {
938 __kmp_pop_sync(global_tid, ct_masked, loc);
952 KMP_DEBUG_ASSERT(__kmp_init_serial);
954 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
955 __kmp_assert_valid_gtid(gtid);
957 if (!TCR_4(__kmp_init_parallel))
958 __kmp_parallel_initialize();
960 __kmp_resume_if_soft_paused();
963 __kmp_itt_ordered_prep(gtid);
967 th = __kmp_threads[gtid];
969 #if OMPT_SUPPORT && OMPT_OPTIONAL
973 OMPT_STORE_RETURN_ADDRESS(gtid);
974 if (ompt_enabled.enabled) {
975 team = __kmp_team_from_gtid(gtid);
976 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
978 th->th.ompt_thread_info.wait_id = lck;
979 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
982 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
983 if (ompt_enabled.ompt_callback_mutex_acquire) {
984 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
985 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
991 if (th->th.th_dispatch->th_deo_fcn != 0)
992 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
994 __kmp_parallel_deo(>id, &cid, loc);
996 #if OMPT_SUPPORT && OMPT_OPTIONAL
997 if (ompt_enabled.enabled) {
999 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1000 th->th.ompt_thread_info.wait_id = 0;
1003 if (ompt_enabled.ompt_callback_mutex_acquired) {
1004 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1005 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1011 __kmp_itt_ordered_start(gtid);
1026 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
1027 __kmp_assert_valid_gtid(gtid);
1030 __kmp_itt_ordered_end(gtid);
1034 th = __kmp_threads[gtid];
1036 if (th->th.th_dispatch->th_dxo_fcn != 0)
1037 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1039 __kmp_parallel_dxo(>id, &cid, loc);
1041 #if OMPT_SUPPORT && OMPT_OPTIONAL
1042 OMPT_STORE_RETURN_ADDRESS(gtid);
1043 if (ompt_enabled.ompt_callback_mutex_released) {
1044 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1046 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1047 ->t.t_ordered.dt.t_value,
1048 OMPT_LOAD_RETURN_ADDRESS(gtid));
1053 #if KMP_USE_DYNAMIC_LOCK
1055 static __forceinline
void
1056 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
1057 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1061 kmp_indirect_lock_t **lck;
1062 lck = (kmp_indirect_lock_t **)crit;
1063 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1064 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1065 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1066 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1068 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1070 __kmp_itt_critical_creating(ilk->lock, loc);
1072 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
1075 __kmp_itt_critical_destroyed(ilk->lock);
1081 KMP_DEBUG_ASSERT(*lck != NULL);
1085 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1087 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1088 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1089 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1090 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1091 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1093 KMP_FSYNC_PREPARE(l); \
1094 KMP_INIT_YIELD(spins); \
1095 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1097 if (TCR_4(__kmp_nth) > \
1098 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1101 KMP_YIELD_SPIN(spins); \
1103 __kmp_spin_backoff(&backoff); \
1105 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1106 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1108 KMP_FSYNC_ACQUIRED(l); \
1112 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1114 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1115 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1116 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1117 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1118 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1122 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1123 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1127 #include <sys/syscall.h>
1130 #define FUTEX_WAIT 0
1133 #define FUTEX_WAKE 1
1137 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1139 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1140 kmp_int32 gtid_code = (gtid + 1) << 1; \
1142 KMP_FSYNC_PREPARE(ftx); \
1143 kmp_int32 poll_val; \
1144 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1145 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1146 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1147 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1149 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1151 KMP_LOCK_BUSY(1, futex))) { \
1154 poll_val |= KMP_LOCK_BUSY(1, futex); \
1157 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1158 NULL, NULL, 0)) != 0) { \
1163 KMP_FSYNC_ACQUIRED(ftx); \
1167 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1171 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1172 KMP_FSYNC_ACQUIRED(ftx); \
1180 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1182 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1184 KMP_FSYNC_RELEASING(ftx); \
1185 kmp_int32 poll_val = \
1186 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1187 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1188 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1189 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1192 KMP_YIELD_OVERSUB(); \
1199 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1202 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1205 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1212 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1213 __kmp_init_user_lock_with_checks(lck);
1214 __kmp_set_user_lock_location(lck, loc);
1216 __kmp_itt_critical_creating(lck);
1227 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1232 __kmp_itt_critical_destroyed(lck);
1236 __kmp_destroy_user_lock_with_checks(lck);
1237 __kmp_user_lock_free(&idx, gtid, lck);
1238 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1239 KMP_DEBUG_ASSERT(lck != NULL);
1258 kmp_critical_name *crit) {
1259 #if KMP_USE_DYNAMIC_LOCK
1260 #if OMPT_SUPPORT && OMPT_OPTIONAL
1261 OMPT_STORE_RETURN_ADDRESS(global_tid);
1266 #if OMPT_SUPPORT && OMPT_OPTIONAL
1267 ompt_state_t prev_state = ompt_state_undefined;
1268 ompt_thread_info_t ti;
1270 kmp_user_lock_p lck;
1272 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1273 __kmp_assert_valid_gtid(global_tid);
1277 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1278 KMP_CHECK_USER_LOCK_INIT();
1280 if ((__kmp_user_lock_kind == lk_tas) &&
1281 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1282 lck = (kmp_user_lock_p)crit;
1285 else if ((__kmp_user_lock_kind == lk_futex) &&
1286 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1287 lck = (kmp_user_lock_p)crit;
1291 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1294 if (__kmp_env_consistency_check)
1295 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1303 __kmp_itt_critical_acquiring(lck);
1305 #if OMPT_SUPPORT && OMPT_OPTIONAL
1306 OMPT_STORE_RETURN_ADDRESS(gtid);
1307 void *codeptr_ra = NULL;
1308 if (ompt_enabled.enabled) {
1309 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1311 prev_state = ti.state;
1312 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1313 ti.state = ompt_state_wait_critical;
1316 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1317 if (ompt_enabled.ompt_callback_mutex_acquire) {
1318 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1319 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1320 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1326 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1329 __kmp_itt_critical_acquired(lck);
1331 #if OMPT_SUPPORT && OMPT_OPTIONAL
1332 if (ompt_enabled.enabled) {
1334 ti.state = prev_state;
1338 if (ompt_enabled.ompt_callback_mutex_acquired) {
1339 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1340 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1344 KMP_POP_PARTITIONED_TIMER();
1346 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1347 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1351 #if KMP_USE_DYNAMIC_LOCK
1354 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1356 #define KMP_TSX_LOCK(seq) lockseq_##seq
1358 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1361 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1362 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1364 #define KMP_CPUINFO_RTM 0
1368 if (hint & kmp_lock_hint_hle)
1369 return KMP_TSX_LOCK(hle);
1370 if (hint & kmp_lock_hint_rtm)
1371 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1372 if (hint & kmp_lock_hint_adaptive)
1373 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1376 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1377 return __kmp_user_lock_seq;
1378 if ((hint & omp_lock_hint_speculative) &&
1379 (hint & omp_lock_hint_nonspeculative))
1380 return __kmp_user_lock_seq;
1383 if (hint & omp_lock_hint_contended)
1384 return lockseq_queuing;
1387 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1391 if (hint & omp_lock_hint_speculative)
1392 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1394 return __kmp_user_lock_seq;
1397 #if OMPT_SUPPORT && OMPT_OPTIONAL
1398 #if KMP_USE_DYNAMIC_LOCK
1399 static kmp_mutex_impl_t
1400 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1402 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1407 return kmp_mutex_impl_queuing;
1410 return kmp_mutex_impl_spin;
1413 case locktag_rtm_spin:
1414 return kmp_mutex_impl_speculative;
1417 return kmp_mutex_impl_none;
1419 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1422 switch (ilock->type) {
1424 case locktag_adaptive:
1425 case locktag_rtm_queuing:
1426 return kmp_mutex_impl_speculative;
1428 case locktag_nested_tas:
1429 return kmp_mutex_impl_spin;
1431 case locktag_nested_futex:
1433 case locktag_ticket:
1434 case locktag_queuing:
1436 case locktag_nested_ticket:
1437 case locktag_nested_queuing:
1438 case locktag_nested_drdpa:
1439 return kmp_mutex_impl_queuing;
1441 return kmp_mutex_impl_none;
1446 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1447 switch (__kmp_user_lock_kind) {
1449 return kmp_mutex_impl_spin;
1456 return kmp_mutex_impl_queuing;
1459 case lk_rtm_queuing:
1462 return kmp_mutex_impl_speculative;
1465 return kmp_mutex_impl_none;
1485 kmp_critical_name *crit, uint32_t hint) {
1487 kmp_user_lock_p lck;
1488 #if OMPT_SUPPORT && OMPT_OPTIONAL
1489 ompt_state_t prev_state = ompt_state_undefined;
1490 ompt_thread_info_t ti;
1492 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1494 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1497 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1498 __kmp_assert_valid_gtid(global_tid);
1500 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1502 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1503 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1505 if (KMP_IS_D_LOCK(lockseq)) {
1506 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1507 KMP_GET_D_TAG(lockseq));
1509 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1515 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1516 lck = (kmp_user_lock_p)lk;
1517 if (__kmp_env_consistency_check) {
1518 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1519 __kmp_map_hint_to_lock(hint));
1522 __kmp_itt_critical_acquiring(lck);
1524 #if OMPT_SUPPORT && OMPT_OPTIONAL
1525 if (ompt_enabled.enabled) {
1526 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1528 prev_state = ti.state;
1529 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1530 ti.state = ompt_state_wait_critical;
1533 if (ompt_enabled.ompt_callback_mutex_acquire) {
1534 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1535 ompt_mutex_critical, (
unsigned int)hint,
1536 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1541 #if KMP_USE_INLINED_TAS
1542 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1543 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1545 #elif KMP_USE_INLINED_FUTEX
1546 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1547 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1551 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1554 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1556 if (__kmp_env_consistency_check) {
1557 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1558 __kmp_map_hint_to_lock(hint));
1561 __kmp_itt_critical_acquiring(lck);
1563 #if OMPT_SUPPORT && OMPT_OPTIONAL
1564 if (ompt_enabled.enabled) {
1565 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1567 prev_state = ti.state;
1568 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1569 ti.state = ompt_state_wait_critical;
1572 if (ompt_enabled.ompt_callback_mutex_acquire) {
1573 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1574 ompt_mutex_critical, (
unsigned int)hint,
1575 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1580 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1582 KMP_POP_PARTITIONED_TIMER();
1585 __kmp_itt_critical_acquired(lck);
1587 #if OMPT_SUPPORT && OMPT_OPTIONAL
1588 if (ompt_enabled.enabled) {
1590 ti.state = prev_state;
1594 if (ompt_enabled.ompt_callback_mutex_acquired) {
1595 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1596 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1601 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1602 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1617 kmp_critical_name *crit) {
1618 kmp_user_lock_p lck;
1620 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1622 #if KMP_USE_DYNAMIC_LOCK
1623 int locktag = KMP_EXTRACT_D_TAG(crit);
1625 lck = (kmp_user_lock_p)crit;
1626 KMP_ASSERT(lck != NULL);
1627 if (__kmp_env_consistency_check) {
1628 __kmp_pop_sync(global_tid, ct_critical, loc);
1631 __kmp_itt_critical_releasing(lck);
1633 #if KMP_USE_INLINED_TAS
1634 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1635 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1637 #elif KMP_USE_INLINED_FUTEX
1638 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1639 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1643 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1646 kmp_indirect_lock_t *ilk =
1647 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1648 KMP_ASSERT(ilk != NULL);
1650 if (__kmp_env_consistency_check) {
1651 __kmp_pop_sync(global_tid, ct_critical, loc);
1654 __kmp_itt_critical_releasing(lck);
1656 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1661 if ((__kmp_user_lock_kind == lk_tas) &&
1662 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1663 lck = (kmp_user_lock_p)crit;
1666 else if ((__kmp_user_lock_kind == lk_futex) &&
1667 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1668 lck = (kmp_user_lock_p)crit;
1672 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1675 KMP_ASSERT(lck != NULL);
1677 if (__kmp_env_consistency_check)
1678 __kmp_pop_sync(global_tid, ct_critical, loc);
1681 __kmp_itt_critical_releasing(lck);
1685 __kmp_release_user_lock_with_checks(lck, global_tid);
1689 #if OMPT_SUPPORT && OMPT_OPTIONAL
1692 OMPT_STORE_RETURN_ADDRESS(global_tid);
1693 if (ompt_enabled.ompt_callback_mutex_released) {
1694 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1695 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1696 OMPT_LOAD_RETURN_ADDRESS(0));
1700 KMP_POP_PARTITIONED_TIMER();
1701 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1715 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1716 __kmp_assert_valid_gtid(global_tid);
1718 if (!TCR_4(__kmp_init_parallel))
1719 __kmp_parallel_initialize();
1721 __kmp_resume_if_soft_paused();
1723 if (__kmp_env_consistency_check)
1724 __kmp_check_barrier(global_tid, ct_barrier, loc);
1727 ompt_frame_t *ompt_frame;
1728 if (ompt_enabled.enabled) {
1729 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1730 if (ompt_frame->enter_frame.ptr == NULL)
1731 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1733 OMPT_STORE_RETURN_ADDRESS(global_tid);
1736 __kmp_threads[global_tid]->th.th_ident = loc;
1738 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1739 #if OMPT_SUPPORT && OMPT_OPTIONAL
1740 if (ompt_enabled.enabled) {
1741 ompt_frame->enter_frame = ompt_data_none;
1745 return (status != 0) ? 0 : 1;
1758 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1759 __kmp_assert_valid_gtid(global_tid);
1760 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1775 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1776 __kmp_assert_valid_gtid(global_tid);
1778 if (!TCR_4(__kmp_init_parallel))
1779 __kmp_parallel_initialize();
1781 __kmp_resume_if_soft_paused();
1783 if (__kmp_env_consistency_check) {
1785 KMP_WARNING(ConstructIdentInvalid);
1787 __kmp_check_barrier(global_tid, ct_barrier, loc);
1791 ompt_frame_t *ompt_frame;
1792 if (ompt_enabled.enabled) {
1793 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1794 if (ompt_frame->enter_frame.ptr == NULL)
1795 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1797 OMPT_STORE_RETURN_ADDRESS(global_tid);
1800 __kmp_threads[global_tid]->th.th_ident = loc;
1802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1803 #if OMPT_SUPPORT && OMPT_OPTIONAL
1804 if (ompt_enabled.enabled) {
1805 ompt_frame->enter_frame = ompt_data_none;
1811 if (__kmp_env_consistency_check) {
1817 __kmp_pop_sync(global_tid, ct_master, loc);
1837 __kmp_assert_valid_gtid(global_tid);
1838 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1843 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1846 #if OMPT_SUPPORT && OMPT_OPTIONAL
1847 kmp_info_t *this_thr = __kmp_threads[global_tid];
1848 kmp_team_t *team = this_thr->th.th_team;
1849 int tid = __kmp_tid_from_gtid(global_tid);
1851 if (ompt_enabled.enabled) {
1853 if (ompt_enabled.ompt_callback_work) {
1854 ompt_callbacks.ompt_callback(ompt_callback_work)(
1855 ompt_work_single_executor, ompt_scope_begin,
1856 &(team->t.ompt_team_info.parallel_data),
1857 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1858 1, OMPT_GET_RETURN_ADDRESS(0));
1861 if (ompt_enabled.ompt_callback_work) {
1862 ompt_callbacks.ompt_callback(ompt_callback_work)(
1863 ompt_work_single_other, ompt_scope_begin,
1864 &(team->t.ompt_team_info.parallel_data),
1865 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1866 1, OMPT_GET_RETURN_ADDRESS(0));
1867 ompt_callbacks.ompt_callback(ompt_callback_work)(
1868 ompt_work_single_other, ompt_scope_end,
1869 &(team->t.ompt_team_info.parallel_data),
1870 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1871 1, OMPT_GET_RETURN_ADDRESS(0));
1890 __kmp_assert_valid_gtid(global_tid);
1891 __kmp_exit_single(global_tid);
1892 KMP_POP_PARTITIONED_TIMER();
1894 #if OMPT_SUPPORT && OMPT_OPTIONAL
1895 kmp_info_t *this_thr = __kmp_threads[global_tid];
1896 kmp_team_t *team = this_thr->th.th_team;
1897 int tid = __kmp_tid_from_gtid(global_tid);
1899 if (ompt_enabled.ompt_callback_work) {
1900 ompt_callbacks.ompt_callback(ompt_callback_work)(
1901 ompt_work_single_executor, ompt_scope_end,
1902 &(team->t.ompt_team_info.parallel_data),
1903 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1904 OMPT_GET_RETURN_ADDRESS(0));
1917 KMP_POP_PARTITIONED_TIMER();
1918 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1920 #if OMPT_SUPPORT && OMPT_OPTIONAL
1921 if (ompt_enabled.ompt_callback_work) {
1922 ompt_work_t ompt_work_type = ompt_work_loop;
1923 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1924 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1928 ompt_work_type = ompt_work_loop;
1930 ompt_work_type = ompt_work_sections;
1932 ompt_work_type = ompt_work_distribute;
1937 KMP_DEBUG_ASSERT(ompt_work_type);
1939 ompt_callbacks.ompt_callback(ompt_callback_work)(
1940 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1941 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1944 if (__kmp_env_consistency_check)
1945 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1951 void ompc_set_num_threads(
int arg) {
1953 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1956 void ompc_set_dynamic(
int flag) {
1960 thread = __kmp_entry_thread();
1962 __kmp_save_internal_controls(thread);
1964 set__dynamic(thread, flag ?
true :
false);
1967 void ompc_set_nested(
int flag) {
1971 thread = __kmp_entry_thread();
1973 __kmp_save_internal_controls(thread);
1975 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1978 void ompc_set_max_active_levels(
int max_active_levels) {
1983 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1986 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1988 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1991 int ompc_get_ancestor_thread_num(
int level) {
1992 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1995 int ompc_get_team_size(
int level) {
1996 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2000 void KMP_EXPAND_NAME(ompc_set_affinity_format)(
char const *format) {
2001 if (!__kmp_init_serial) {
2002 __kmp_serial_initialize();
2004 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2005 format, KMP_STRLEN(format) + 1);
2008 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(
char *buffer,
size_t size) {
2010 if (!__kmp_init_serial) {
2011 __kmp_serial_initialize();
2013 format_size = KMP_STRLEN(__kmp_affinity_format);
2014 if (buffer && size) {
2015 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2021 void KMP_EXPAND_NAME(ompc_display_affinity)(
char const *format) {
2023 if (!TCR_4(__kmp_init_middle)) {
2024 __kmp_middle_initialize();
2026 __kmp_assign_root_init_mask();
2027 gtid = __kmp_get_gtid();
2028 __kmp_aux_display_affinity(gtid, format);
2031 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(
char *buffer,
size_t buf_size,
2032 char const *format) {
2034 size_t num_required;
2035 kmp_str_buf_t capture_buf;
2036 if (!TCR_4(__kmp_init_middle)) {
2037 __kmp_middle_initialize();
2039 __kmp_assign_root_init_mask();
2040 gtid = __kmp_get_gtid();
2041 __kmp_str_buf_init(&capture_buf);
2042 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2043 if (buffer && buf_size) {
2044 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2045 capture_buf.used + 1);
2047 __kmp_str_buf_free(&capture_buf);
2048 return num_required;
2051 void kmpc_set_stacksize(
int arg) {
2053 __kmp_aux_set_stacksize(arg);
2056 void kmpc_set_stacksize_s(
size_t arg) {
2058 __kmp_aux_set_stacksize(arg);
2061 void kmpc_set_blocktime(
int arg) {
2065 gtid = __kmp_entry_gtid();
2066 tid = __kmp_tid_from_gtid(gtid);
2067 thread = __kmp_thread_from_gtid(gtid);
2069 __kmp_aux_set_blocktime(arg, thread, tid);
2072 void kmpc_set_library(
int arg) {
2074 __kmp_user_set_library((
enum library_type)arg);
2077 void kmpc_set_defaults(
char const *str) {
2079 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2082 void kmpc_set_disp_num_buffers(
int arg) {
2085 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2086 arg <= KMP_MAX_DISP_NUM_BUFF) {
2087 __kmp_dispatch_num_buffers = arg;
2091 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2092 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2095 if (!TCR_4(__kmp_init_middle)) {
2096 __kmp_middle_initialize();
2098 __kmp_assign_root_init_mask();
2099 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2103 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2104 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2107 if (!TCR_4(__kmp_init_middle)) {
2108 __kmp_middle_initialize();
2110 __kmp_assign_root_init_mask();
2111 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2115 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2116 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2119 if (!TCR_4(__kmp_init_middle)) {
2120 __kmp_middle_initialize();
2122 __kmp_assign_root_init_mask();
2123 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2173 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2176 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2177 __kmp_assert_valid_gtid(gtid);
2181 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2183 if (__kmp_env_consistency_check) {
2185 KMP_WARNING(ConstructIdentInvalid);
2192 *data_ptr = cpy_data;
2195 ompt_frame_t *ompt_frame;
2196 if (ompt_enabled.enabled) {
2197 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2198 if (ompt_frame->enter_frame.ptr == NULL)
2199 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2201 OMPT_STORE_RETURN_ADDRESS(gtid);
2205 __kmp_threads[gtid]->th.th_ident = loc;
2207 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2210 (*cpy_func)(cpy_data, *data_ptr);
2216 OMPT_STORE_RETURN_ADDRESS(gtid);
2219 __kmp_threads[gtid]->th.th_ident = loc;
2222 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2223 #if OMPT_SUPPORT && OMPT_OPTIONAL
2224 if (ompt_enabled.enabled) {
2225 ompt_frame->enter_frame = ompt_data_none;
2233 #define INIT_LOCK __kmp_init_user_lock_with_checks
2234 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2235 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2236 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2237 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2238 #define ACQUIRE_NESTED_LOCK_TIMED \
2239 __kmp_acquire_nested_user_lock_with_checks_timed
2240 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2241 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2242 #define TEST_LOCK __kmp_test_user_lock_with_checks
2243 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2244 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2245 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2250 #if KMP_USE_DYNAMIC_LOCK
2253 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2254 kmp_dyna_lockseq_t seq) {
2255 if (KMP_IS_D_LOCK(seq)) {
2256 KMP_INIT_D_LOCK(lock, seq);
2258 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2261 KMP_INIT_I_LOCK(lock, seq);
2263 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2264 __kmp_itt_lock_creating(ilk->lock, loc);
2270 static __forceinline
void
2271 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2272 kmp_dyna_lockseq_t seq) {
2275 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2276 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2277 seq = __kmp_user_lock_seq;
2281 seq = lockseq_nested_tas;
2285 seq = lockseq_nested_futex;
2288 case lockseq_ticket:
2289 seq = lockseq_nested_ticket;
2291 case lockseq_queuing:
2292 seq = lockseq_nested_queuing;
2295 seq = lockseq_nested_drdpa;
2298 seq = lockseq_nested_queuing;
2300 KMP_INIT_I_LOCK(lock, seq);
2302 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2303 __kmp_itt_lock_creating(ilk->lock, loc);
2308 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2310 KMP_DEBUG_ASSERT(__kmp_init_serial);
2311 if (__kmp_env_consistency_check && user_lock == NULL) {
2312 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2315 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2317 #if OMPT_SUPPORT && OMPT_OPTIONAL
2319 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2321 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2322 if (ompt_enabled.ompt_callback_lock_init) {
2323 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2324 ompt_mutex_lock, (omp_lock_hint_t)hint,
2325 __ompt_get_mutex_impl_type(user_lock),
2326 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2332 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2333 void **user_lock, uintptr_t hint) {
2334 KMP_DEBUG_ASSERT(__kmp_init_serial);
2335 if (__kmp_env_consistency_check && user_lock == NULL) {
2336 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2339 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2341 #if OMPT_SUPPORT && OMPT_OPTIONAL
2343 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2345 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2346 if (ompt_enabled.ompt_callback_lock_init) {
2347 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2348 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2349 __ompt_get_mutex_impl_type(user_lock),
2350 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2358 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2359 #if KMP_USE_DYNAMIC_LOCK
2361 KMP_DEBUG_ASSERT(__kmp_init_serial);
2362 if (__kmp_env_consistency_check && user_lock == NULL) {
2363 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2365 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2367 #if OMPT_SUPPORT && OMPT_OPTIONAL
2369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2371 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2372 if (ompt_enabled.ompt_callback_lock_init) {
2373 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2374 ompt_mutex_lock, omp_lock_hint_none,
2375 __ompt_get_mutex_impl_type(user_lock),
2376 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2382 static char const *
const func =
"omp_init_lock";
2383 kmp_user_lock_p lck;
2384 KMP_DEBUG_ASSERT(__kmp_init_serial);
2386 if (__kmp_env_consistency_check) {
2387 if (user_lock == NULL) {
2388 KMP_FATAL(LockIsUninitialized, func);
2392 KMP_CHECK_USER_LOCK_INIT();
2394 if ((__kmp_user_lock_kind == lk_tas) &&
2395 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2396 lck = (kmp_user_lock_p)user_lock;
2399 else if ((__kmp_user_lock_kind == lk_futex) &&
2400 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2401 lck = (kmp_user_lock_p)user_lock;
2405 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2408 __kmp_set_user_lock_location(lck, loc);
2410 #if OMPT_SUPPORT && OMPT_OPTIONAL
2412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2414 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2415 if (ompt_enabled.ompt_callback_lock_init) {
2416 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2417 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2418 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2423 __kmp_itt_lock_creating(lck);
2430 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2431 #if KMP_USE_DYNAMIC_LOCK
2433 KMP_DEBUG_ASSERT(__kmp_init_serial);
2434 if (__kmp_env_consistency_check && user_lock == NULL) {
2435 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2437 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2439 #if OMPT_SUPPORT && OMPT_OPTIONAL
2441 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2443 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2444 if (ompt_enabled.ompt_callback_lock_init) {
2445 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2446 ompt_mutex_nest_lock, omp_lock_hint_none,
2447 __ompt_get_mutex_impl_type(user_lock),
2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2454 static char const *
const func =
"omp_init_nest_lock";
2455 kmp_user_lock_p lck;
2456 KMP_DEBUG_ASSERT(__kmp_init_serial);
2458 if (__kmp_env_consistency_check) {
2459 if (user_lock == NULL) {
2460 KMP_FATAL(LockIsUninitialized, func);
2464 KMP_CHECK_USER_LOCK_INIT();
2466 if ((__kmp_user_lock_kind == lk_tas) &&
2467 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2468 OMP_NEST_LOCK_T_SIZE)) {
2469 lck = (kmp_user_lock_p)user_lock;
2472 else if ((__kmp_user_lock_kind == lk_futex) &&
2473 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2474 OMP_NEST_LOCK_T_SIZE)) {
2475 lck = (kmp_user_lock_p)user_lock;
2479 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2482 INIT_NESTED_LOCK(lck);
2483 __kmp_set_user_lock_location(lck, loc);
2485 #if OMPT_SUPPORT && OMPT_OPTIONAL
2487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2489 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2490 if (ompt_enabled.ompt_callback_lock_init) {
2491 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2492 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2493 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2498 __kmp_itt_lock_creating(lck);
2504 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2505 #if KMP_USE_DYNAMIC_LOCK
2508 kmp_user_lock_p lck;
2509 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2510 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2512 lck = (kmp_user_lock_p)user_lock;
2514 __kmp_itt_lock_destroyed(lck);
2516 #if OMPT_SUPPORT && OMPT_OPTIONAL
2518 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2520 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2521 if (ompt_enabled.ompt_callback_lock_destroy) {
2522 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2523 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2526 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2528 kmp_user_lock_p lck;
2530 if ((__kmp_user_lock_kind == lk_tas) &&
2531 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2532 lck = (kmp_user_lock_p)user_lock;
2535 else if ((__kmp_user_lock_kind == lk_futex) &&
2536 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2537 lck = (kmp_user_lock_p)user_lock;
2541 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2544 #if OMPT_SUPPORT && OMPT_OPTIONAL
2546 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2548 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2549 if (ompt_enabled.ompt_callback_lock_destroy) {
2550 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2551 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2556 __kmp_itt_lock_destroyed(lck);
2560 if ((__kmp_user_lock_kind == lk_tas) &&
2561 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2565 else if ((__kmp_user_lock_kind == lk_futex) &&
2566 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2571 __kmp_user_lock_free(user_lock, gtid, lck);
2577 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2578 #if KMP_USE_DYNAMIC_LOCK
2581 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2582 __kmp_itt_lock_destroyed(ilk->lock);
2584 #if OMPT_SUPPORT && OMPT_OPTIONAL
2586 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2588 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2589 if (ompt_enabled.ompt_callback_lock_destroy) {
2590 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2591 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2594 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2598 kmp_user_lock_p lck;
2600 if ((__kmp_user_lock_kind == lk_tas) &&
2601 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2602 OMP_NEST_LOCK_T_SIZE)) {
2603 lck = (kmp_user_lock_p)user_lock;
2606 else if ((__kmp_user_lock_kind == lk_futex) &&
2607 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2608 OMP_NEST_LOCK_T_SIZE)) {
2609 lck = (kmp_user_lock_p)user_lock;
2613 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2616 #if OMPT_SUPPORT && OMPT_OPTIONAL
2618 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2620 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2621 if (ompt_enabled.ompt_callback_lock_destroy) {
2622 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2623 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2628 __kmp_itt_lock_destroyed(lck);
2631 DESTROY_NESTED_LOCK(lck);
2633 if ((__kmp_user_lock_kind == lk_tas) &&
2634 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2635 OMP_NEST_LOCK_T_SIZE)) {
2639 else if ((__kmp_user_lock_kind == lk_futex) &&
2640 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2641 OMP_NEST_LOCK_T_SIZE)) {
2646 __kmp_user_lock_free(user_lock, gtid, lck);
2651 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2653 #if KMP_USE_DYNAMIC_LOCK
2654 int tag = KMP_EXTRACT_D_TAG(user_lock);
2656 __kmp_itt_lock_acquiring(
2660 #if OMPT_SUPPORT && OMPT_OPTIONAL
2662 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2664 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2665 if (ompt_enabled.ompt_callback_mutex_acquire) {
2666 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2667 ompt_mutex_lock, omp_lock_hint_none,
2668 __ompt_get_mutex_impl_type(user_lock),
2669 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2672 #if KMP_USE_INLINED_TAS
2673 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2674 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2676 #elif KMP_USE_INLINED_FUTEX
2677 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2678 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2682 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2685 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2687 #if OMPT_SUPPORT && OMPT_OPTIONAL
2688 if (ompt_enabled.ompt_callback_mutex_acquired) {
2689 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2690 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2696 kmp_user_lock_p lck;
2698 if ((__kmp_user_lock_kind == lk_tas) &&
2699 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2700 lck = (kmp_user_lock_p)user_lock;
2703 else if ((__kmp_user_lock_kind == lk_futex) &&
2704 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2705 lck = (kmp_user_lock_p)user_lock;
2709 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2713 __kmp_itt_lock_acquiring(lck);
2715 #if OMPT_SUPPORT && OMPT_OPTIONAL
2717 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2719 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2720 if (ompt_enabled.ompt_callback_mutex_acquire) {
2721 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2722 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2723 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2727 ACQUIRE_LOCK(lck, gtid);
2730 __kmp_itt_lock_acquired(lck);
2733 #if OMPT_SUPPORT && OMPT_OPTIONAL
2734 if (ompt_enabled.ompt_callback_mutex_acquired) {
2735 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2736 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2743 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2744 #if KMP_USE_DYNAMIC_LOCK
2747 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2749 #if OMPT_SUPPORT && OMPT_OPTIONAL
2751 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2753 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2754 if (ompt_enabled.enabled) {
2755 if (ompt_enabled.ompt_callback_mutex_acquire) {
2756 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2757 ompt_mutex_nest_lock, omp_lock_hint_none,
2758 __ompt_get_mutex_impl_type(user_lock),
2759 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2763 int acquire_status =
2764 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2765 (void)acquire_status;
2767 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2770 #if OMPT_SUPPORT && OMPT_OPTIONAL
2771 if (ompt_enabled.enabled) {
2772 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2773 if (ompt_enabled.ompt_callback_mutex_acquired) {
2775 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2776 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2780 if (ompt_enabled.ompt_callback_nest_lock) {
2782 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2783 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2791 kmp_user_lock_p lck;
2793 if ((__kmp_user_lock_kind == lk_tas) &&
2794 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2795 OMP_NEST_LOCK_T_SIZE)) {
2796 lck = (kmp_user_lock_p)user_lock;
2799 else if ((__kmp_user_lock_kind == lk_futex) &&
2800 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2801 OMP_NEST_LOCK_T_SIZE)) {
2802 lck = (kmp_user_lock_p)user_lock;
2806 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2810 __kmp_itt_lock_acquiring(lck);
2812 #if OMPT_SUPPORT && OMPT_OPTIONAL
2814 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2816 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2817 if (ompt_enabled.enabled) {
2818 if (ompt_enabled.ompt_callback_mutex_acquire) {
2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2820 ompt_mutex_nest_lock, omp_lock_hint_none,
2821 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2827 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2830 __kmp_itt_lock_acquired(lck);
2833 #if OMPT_SUPPORT && OMPT_OPTIONAL
2834 if (ompt_enabled.enabled) {
2835 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2836 if (ompt_enabled.ompt_callback_mutex_acquired) {
2838 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2839 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2842 if (ompt_enabled.ompt_callback_nest_lock) {
2844 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2845 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2854 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2855 #if KMP_USE_DYNAMIC_LOCK
2857 int tag = KMP_EXTRACT_D_TAG(user_lock);
2859 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2861 #if KMP_USE_INLINED_TAS
2862 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2863 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2865 #elif KMP_USE_INLINED_FUTEX
2866 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2867 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2871 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2874 #if OMPT_SUPPORT && OMPT_OPTIONAL
2876 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2878 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2879 if (ompt_enabled.ompt_callback_mutex_released) {
2880 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2881 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2887 kmp_user_lock_p lck;
2892 if ((__kmp_user_lock_kind == lk_tas) &&
2893 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2894 #if KMP_OS_LINUX && \
2895 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2898 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2900 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2903 #if OMPT_SUPPORT && OMPT_OPTIONAL
2905 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2908 if (ompt_enabled.ompt_callback_mutex_released) {
2909 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2910 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2916 lck = (kmp_user_lock_p)user_lock;
2920 else if ((__kmp_user_lock_kind == lk_futex) &&
2921 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2922 lck = (kmp_user_lock_p)user_lock;
2926 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2930 __kmp_itt_lock_releasing(lck);
2933 RELEASE_LOCK(lck, gtid);
2935 #if OMPT_SUPPORT && OMPT_OPTIONAL
2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2939 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2940 if (ompt_enabled.ompt_callback_mutex_released) {
2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2942 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2950 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2951 #if KMP_USE_DYNAMIC_LOCK
2954 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2956 int release_status =
2957 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2958 (void)release_status;
2960 #if OMPT_SUPPORT && OMPT_OPTIONAL
2962 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2964 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2965 if (ompt_enabled.enabled) {
2966 if (release_status == KMP_LOCK_RELEASED) {
2967 if (ompt_enabled.ompt_callback_mutex_released) {
2969 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2970 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2973 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2975 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2976 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2983 kmp_user_lock_p lck;
2987 if ((__kmp_user_lock_kind == lk_tas) &&
2988 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2989 OMP_NEST_LOCK_T_SIZE)) {
2990 #if KMP_OS_LINUX && \
2991 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2993 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2995 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2998 #if OMPT_SUPPORT && OMPT_OPTIONAL
2999 int release_status = KMP_LOCK_STILL_HELD;
3002 if (--(tl->lk.depth_locked) == 0) {
3003 TCW_4(tl->lk.poll, 0);
3004 #if OMPT_SUPPORT && OMPT_OPTIONAL
3005 release_status = KMP_LOCK_RELEASED;
3010 #if OMPT_SUPPORT && OMPT_OPTIONAL
3012 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3014 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3015 if (ompt_enabled.enabled) {
3016 if (release_status == KMP_LOCK_RELEASED) {
3017 if (ompt_enabled.ompt_callback_mutex_released) {
3019 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3020 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3022 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3024 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3025 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3032 lck = (kmp_user_lock_p)user_lock;
3036 else if ((__kmp_user_lock_kind == lk_futex) &&
3037 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3038 OMP_NEST_LOCK_T_SIZE)) {
3039 lck = (kmp_user_lock_p)user_lock;
3043 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
3047 __kmp_itt_lock_releasing(lck);
3051 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3052 #if OMPT_SUPPORT && OMPT_OPTIONAL
3054 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3056 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3057 if (ompt_enabled.enabled) {
3058 if (release_status == KMP_LOCK_RELEASED) {
3059 if (ompt_enabled.ompt_callback_mutex_released) {
3061 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3062 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3064 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3066 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3067 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3076 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3079 #if KMP_USE_DYNAMIC_LOCK
3081 int tag = KMP_EXTRACT_D_TAG(user_lock);
3083 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3085 #if OMPT_SUPPORT && OMPT_OPTIONAL
3087 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3089 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3090 if (ompt_enabled.ompt_callback_mutex_acquire) {
3091 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3092 ompt_mutex_lock, omp_lock_hint_none,
3093 __ompt_get_mutex_impl_type(user_lock),
3094 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3097 #if KMP_USE_INLINED_TAS
3098 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3099 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3101 #elif KMP_USE_INLINED_FUTEX
3102 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3103 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3107 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3111 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3113 #if OMPT_SUPPORT && OMPT_OPTIONAL
3114 if (ompt_enabled.ompt_callback_mutex_acquired) {
3115 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3116 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3122 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3129 kmp_user_lock_p lck;
3132 if ((__kmp_user_lock_kind == lk_tas) &&
3133 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3134 lck = (kmp_user_lock_p)user_lock;
3137 else if ((__kmp_user_lock_kind == lk_futex) &&
3138 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3139 lck = (kmp_user_lock_p)user_lock;
3143 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3147 __kmp_itt_lock_acquiring(lck);
3149 #if OMPT_SUPPORT && OMPT_OPTIONAL
3151 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3153 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3154 if (ompt_enabled.ompt_callback_mutex_acquire) {
3155 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3156 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3157 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3161 rc = TEST_LOCK(lck, gtid);
3164 __kmp_itt_lock_acquired(lck);
3166 __kmp_itt_lock_cancelled(lck);
3169 #if OMPT_SUPPORT && OMPT_OPTIONAL
3170 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3171 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3172 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3176 return (rc ? FTN_TRUE : FTN_FALSE);
3184 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3185 #if KMP_USE_DYNAMIC_LOCK
3188 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3190 #if OMPT_SUPPORT && OMPT_OPTIONAL
3192 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3194 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3195 if (ompt_enabled.ompt_callback_mutex_acquire) {
3196 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3197 ompt_mutex_nest_lock, omp_lock_hint_none,
3198 __ompt_get_mutex_impl_type(user_lock),
3199 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3202 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3205 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3207 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3210 #if OMPT_SUPPORT && OMPT_OPTIONAL
3211 if (ompt_enabled.enabled && rc) {
3213 if (ompt_enabled.ompt_callback_mutex_acquired) {
3215 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3216 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3220 if (ompt_enabled.ompt_callback_nest_lock) {
3222 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3223 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3232 kmp_user_lock_p lck;
3235 if ((__kmp_user_lock_kind == lk_tas) &&
3236 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3237 OMP_NEST_LOCK_T_SIZE)) {
3238 lck = (kmp_user_lock_p)user_lock;
3241 else if ((__kmp_user_lock_kind == lk_futex) &&
3242 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3243 OMP_NEST_LOCK_T_SIZE)) {
3244 lck = (kmp_user_lock_p)user_lock;
3248 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3252 __kmp_itt_lock_acquiring(lck);
3255 #if OMPT_SUPPORT && OMPT_OPTIONAL
3257 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3259 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3260 if (ompt_enabled.enabled) &&
3261 ompt_enabled.ompt_callback_mutex_acquire) {
3262 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3263 ompt_mutex_nest_lock, omp_lock_hint_none,
3264 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3269 rc = TEST_NESTED_LOCK(lck, gtid);
3272 __kmp_itt_lock_acquired(lck);
3274 __kmp_itt_lock_cancelled(lck);
3277 #if OMPT_SUPPORT && OMPT_OPTIONAL
3278 if (ompt_enabled.enabled && rc) {
3280 if (ompt_enabled.ompt_callback_mutex_acquired) {
3282 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3283 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3286 if (ompt_enabled.ompt_callback_nest_lock) {
3288 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3289 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3308 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3309 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3311 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3312 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3318 static __forceinline
void
3319 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3320 kmp_critical_name *crit) {
3326 kmp_user_lock_p lck;
3328 #if KMP_USE_DYNAMIC_LOCK
3330 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3333 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3334 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3335 KMP_GET_D_TAG(__kmp_user_lock_seq));
3337 __kmp_init_indirect_csptr(crit, loc, global_tid,
3338 KMP_GET_I_TAG(__kmp_user_lock_seq));
3344 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3345 lck = (kmp_user_lock_p)lk;
3346 KMP_DEBUG_ASSERT(lck != NULL);
3347 if (__kmp_env_consistency_check) {
3348 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3350 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3352 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3354 KMP_DEBUG_ASSERT(lck != NULL);
3355 if (__kmp_env_consistency_check) {
3356 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3358 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3366 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3367 lck = (kmp_user_lock_p)crit;
3369 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3371 KMP_DEBUG_ASSERT(lck != NULL);
3373 if (__kmp_env_consistency_check)
3374 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3376 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3382 static __forceinline
void
3383 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3384 kmp_critical_name *crit) {
3386 kmp_user_lock_p lck;
3388 #if KMP_USE_DYNAMIC_LOCK
3390 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3391 lck = (kmp_user_lock_p)crit;
3392 if (__kmp_env_consistency_check)
3393 __kmp_pop_sync(global_tid, ct_critical, loc);
3394 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3396 kmp_indirect_lock_t *ilk =
3397 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3398 if (__kmp_env_consistency_check)
3399 __kmp_pop_sync(global_tid, ct_critical, loc);
3400 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3408 if (__kmp_base_user_lock_size > 32) {
3409 lck = *((kmp_user_lock_p *)crit);
3410 KMP_ASSERT(lck != NULL);
3412 lck = (kmp_user_lock_p)crit;
3415 if (__kmp_env_consistency_check)
3416 __kmp_pop_sync(global_tid, ct_critical, loc);
3418 __kmp_release_user_lock_with_checks(lck, global_tid);
3423 static __forceinline
int
3424 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3429 if (th->th.th_teams_microtask) {
3430 *team_p = team = th->th.th_team;
3431 if (team->t.t_level == th->th.th_teams_level) {
3433 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3435 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3436 th->th.th_team = team->t.t_parent;
3437 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3438 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3439 *task_state = th->th.th_task_state;
3440 th->th.th_task_state = 0;
3448 static __forceinline
void
3449 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3451 th->th.th_info.ds.ds_tid = 0;
3452 th->th.th_team = team;
3453 th->th.th_team_nproc = team->t.t_nproc;
3454 th->th.th_task_team = team->t.t_task_team[task_state];
3455 __kmp_type_convert(task_state, &(th->th.th_task_state));
3476 size_t reduce_size,
void *reduce_data,
3477 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3478 kmp_critical_name *lck) {
3482 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3485 int teams_swapped = 0, task_state;
3486 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3487 __kmp_assert_valid_gtid(global_tid);
3495 if (!TCR_4(__kmp_init_parallel))
3496 __kmp_parallel_initialize();
3498 __kmp_resume_if_soft_paused();
3501 #if KMP_USE_DYNAMIC_LOCK
3502 if (__kmp_env_consistency_check)
3503 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3505 if (__kmp_env_consistency_check)
3506 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3509 th = __kmp_thread_from_gtid(global_tid);
3510 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3528 packed_reduction_method = __kmp_determine_reduction_method(
3529 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3530 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3532 OMPT_REDUCTION_DECL(th, global_tid);
3533 if (packed_reduction_method == critical_reduce_block) {
3535 OMPT_REDUCTION_BEGIN;
3537 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3540 }
else if (packed_reduction_method == empty_reduce_block) {
3542 OMPT_REDUCTION_BEGIN;
3548 }
else if (packed_reduction_method == atomic_reduce_block) {
3558 if (__kmp_env_consistency_check)
3559 __kmp_pop_sync(global_tid, ct_reduce, loc);
3561 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3562 tree_reduce_block)) {
3582 ompt_frame_t *ompt_frame;
3583 if (ompt_enabled.enabled) {
3584 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3585 if (ompt_frame->enter_frame.ptr == NULL)
3586 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3588 OMPT_STORE_RETURN_ADDRESS(global_tid);
3591 __kmp_threads[global_tid]->th.th_ident = loc;
3594 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3595 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3596 retval = (retval != 0) ? (0) : (1);
3597 #if OMPT_SUPPORT && OMPT_OPTIONAL
3598 if (ompt_enabled.enabled) {
3599 ompt_frame->enter_frame = ompt_data_none;
3605 if (__kmp_env_consistency_check) {
3607 __kmp_pop_sync(global_tid, ct_reduce, loc);
3616 if (teams_swapped) {
3617 __kmp_restore_swapped_teams(th, team, task_state);
3621 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3622 global_tid, packed_reduction_method, retval));
3636 kmp_critical_name *lck) {
3638 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3640 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3641 __kmp_assert_valid_gtid(global_tid);
3643 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3645 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3647 if (packed_reduction_method == critical_reduce_block) {
3649 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3652 }
else if (packed_reduction_method == empty_reduce_block) {
3659 }
else if (packed_reduction_method == atomic_reduce_block) {
3666 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3667 tree_reduce_block)) {
3678 if (__kmp_env_consistency_check)
3679 __kmp_pop_sync(global_tid, ct_reduce, loc);
3681 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3682 global_tid, packed_reduction_method));
3705 size_t reduce_size,
void *reduce_data,
3706 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3707 kmp_critical_name *lck) {
3710 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3713 int teams_swapped = 0, task_state;
3715 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3716 __kmp_assert_valid_gtid(global_tid);
3724 if (!TCR_4(__kmp_init_parallel))
3725 __kmp_parallel_initialize();
3727 __kmp_resume_if_soft_paused();
3730 #if KMP_USE_DYNAMIC_LOCK
3731 if (__kmp_env_consistency_check)
3732 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3734 if (__kmp_env_consistency_check)
3735 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3738 th = __kmp_thread_from_gtid(global_tid);
3739 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3741 packed_reduction_method = __kmp_determine_reduction_method(
3742 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3743 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3745 OMPT_REDUCTION_DECL(th, global_tid);
3747 if (packed_reduction_method == critical_reduce_block) {
3749 OMPT_REDUCTION_BEGIN;
3750 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3753 }
else if (packed_reduction_method == empty_reduce_block) {
3755 OMPT_REDUCTION_BEGIN;
3760 }
else if (packed_reduction_method == atomic_reduce_block) {
3764 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3765 tree_reduce_block)) {
3771 ompt_frame_t *ompt_frame;
3772 if (ompt_enabled.enabled) {
3773 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3774 if (ompt_frame->enter_frame.ptr == NULL)
3775 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3777 OMPT_STORE_RETURN_ADDRESS(global_tid);
3780 __kmp_threads[global_tid]->th.th_ident =
3784 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3785 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3786 retval = (retval != 0) ? (0) : (1);
3787 #if OMPT_SUPPORT && OMPT_OPTIONAL
3788 if (ompt_enabled.enabled) {
3789 ompt_frame->enter_frame = ompt_data_none;
3795 if (__kmp_env_consistency_check) {
3797 __kmp_pop_sync(global_tid, ct_reduce, loc);
3806 if (teams_swapped) {
3807 __kmp_restore_swapped_teams(th, team, task_state);
3811 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3812 global_tid, packed_reduction_method, retval));
3827 kmp_critical_name *lck) {
3829 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3832 int teams_swapped = 0, task_state;
3834 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3835 __kmp_assert_valid_gtid(global_tid);
3837 th = __kmp_thread_from_gtid(global_tid);
3838 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3840 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3844 OMPT_REDUCTION_DECL(th, global_tid);
3846 if (packed_reduction_method == critical_reduce_block) {
3847 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3853 ompt_frame_t *ompt_frame;
3854 if (ompt_enabled.enabled) {
3855 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3856 if (ompt_frame->enter_frame.ptr == NULL)
3857 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3859 OMPT_STORE_RETURN_ADDRESS(global_tid);
3862 __kmp_threads[global_tid]->th.th_ident = loc;
3864 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3865 #if OMPT_SUPPORT && OMPT_OPTIONAL
3866 if (ompt_enabled.enabled) {
3867 ompt_frame->enter_frame = ompt_data_none;
3871 }
else if (packed_reduction_method == empty_reduce_block) {
3879 ompt_frame_t *ompt_frame;
3880 if (ompt_enabled.enabled) {
3881 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3882 if (ompt_frame->enter_frame.ptr == NULL)
3883 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3885 OMPT_STORE_RETURN_ADDRESS(global_tid);
3888 __kmp_threads[global_tid]->th.th_ident = loc;
3890 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3891 #if OMPT_SUPPORT && OMPT_OPTIONAL
3892 if (ompt_enabled.enabled) {
3893 ompt_frame->enter_frame = ompt_data_none;
3897 }
else if (packed_reduction_method == atomic_reduce_block) {
3900 ompt_frame_t *ompt_frame;
3901 if (ompt_enabled.enabled) {
3902 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3903 if (ompt_frame->enter_frame.ptr == NULL)
3904 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3906 OMPT_STORE_RETURN_ADDRESS(global_tid);
3910 __kmp_threads[global_tid]->th.th_ident = loc;
3912 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3913 #if OMPT_SUPPORT && OMPT_OPTIONAL
3914 if (ompt_enabled.enabled) {
3915 ompt_frame->enter_frame = ompt_data_none;
3919 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3920 tree_reduce_block)) {
3923 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3931 if (teams_swapped) {
3932 __kmp_restore_swapped_teams(th, team, task_state);
3935 if (__kmp_env_consistency_check)
3936 __kmp_pop_sync(global_tid, ct_reduce, loc);
3938 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3939 global_tid, packed_reduction_method));
3944 #undef __KMP_GET_REDUCTION_METHOD
3945 #undef __KMP_SET_REDUCTION_METHOD
3949 kmp_uint64 __kmpc_get_taskid() {
3954 gtid = __kmp_get_gtid();
3958 thread = __kmp_thread_from_gtid(gtid);
3959 return thread->th.th_current_task->td_task_id;
3963 kmp_uint64 __kmpc_get_parent_taskid() {
3967 kmp_taskdata_t *parent_task;
3969 gtid = __kmp_get_gtid();
3973 thread = __kmp_thread_from_gtid(gtid);
3974 parent_task = thread->th.th_current_task->td_parent;
3975 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3991 const struct kmp_dim *dims) {
3992 __kmp_assert_valid_gtid(gtid);
3994 kmp_int64 last, trace_count;
3995 kmp_info_t *th = __kmp_threads[gtid];
3996 kmp_team_t *team = th->th.th_team;
3998 kmp_disp_t *pr_buf = th->th.th_dispatch;
3999 dispatch_shared_info_t *sh_buf;
4003 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4004 gtid, num_dims, !team->t.t_serialized));
4005 KMP_DEBUG_ASSERT(dims != NULL);
4006 KMP_DEBUG_ASSERT(num_dims > 0);
4008 if (team->t.t_serialized) {
4009 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
4012 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4013 idx = pr_buf->th_doacross_buf_idx++;
4015 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4018 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4019 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4020 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
4021 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4022 pr_buf->th_doacross_info[0] =
4023 (kmp_int64)num_dims;
4026 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4027 pr_buf->th_doacross_info[2] = dims[0].lo;
4028 pr_buf->th_doacross_info[3] = dims[0].up;
4029 pr_buf->th_doacross_info[4] = dims[0].st;
4031 for (j = 1; j < num_dims; ++j) {
4034 if (dims[j].st == 1) {
4036 range_length = dims[j].up - dims[j].lo + 1;
4038 if (dims[j].st > 0) {
4039 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4040 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4042 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4044 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4047 pr_buf->th_doacross_info[last++] = range_length;
4048 pr_buf->th_doacross_info[last++] = dims[j].lo;
4049 pr_buf->th_doacross_info[last++] = dims[j].up;
4050 pr_buf->th_doacross_info[last++] = dims[j].st;
4055 if (dims[0].st == 1) {
4056 trace_count = dims[0].up - dims[0].lo + 1;
4057 }
else if (dims[0].st > 0) {
4058 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4059 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4061 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4062 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4064 for (j = 1; j < num_dims; ++j) {
4065 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
4067 KMP_DEBUG_ASSERT(trace_count > 0);
4071 if (idx != sh_buf->doacross_buf_idx) {
4073 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4080 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4081 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4083 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4084 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4086 if (flags == NULL) {
4089 (size_t)trace_count / 8 + 8;
4090 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4092 sh_buf->doacross_flags = flags;
4093 }
else if (flags == (kmp_uint32 *)1) {
4096 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4098 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4105 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4106 pr_buf->th_doacross_flags =
4107 sh_buf->doacross_flags;
4109 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4112 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4113 __kmp_assert_valid_gtid(gtid);
4117 kmp_int64 iter_number;
4118 kmp_info_t *th = __kmp_threads[gtid];
4119 kmp_team_t *team = th->th.th_team;
4121 kmp_int64 lo, up, st;
4123 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4124 if (team->t.t_serialized) {
4125 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4130 pr_buf = th->th.th_dispatch;
4131 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4132 num_dims = (size_t)pr_buf->th_doacross_info[0];
4133 lo = pr_buf->th_doacross_info[2];
4134 up = pr_buf->th_doacross_info[3];
4135 st = pr_buf->th_doacross_info[4];
4136 #
if OMPT_SUPPORT && OMPT_OPTIONAL
4137 ompt_dependence_t deps[num_dims];
4140 if (vec[0] < lo || vec[0] > up) {
4141 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4142 "bounds [%lld,%lld]\n",
4143 gtid, vec[0], lo, up));
4146 iter_number = vec[0] - lo;
4147 }
else if (st > 0) {
4148 if (vec[0] < lo || vec[0] > up) {
4149 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4150 "bounds [%lld,%lld]\n",
4151 gtid, vec[0], lo, up));
4154 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4156 if (vec[0] > lo || vec[0] < up) {
4157 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4158 "bounds [%lld,%lld]\n",
4159 gtid, vec[0], lo, up));
4162 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4164 #if OMPT_SUPPORT && OMPT_OPTIONAL
4165 deps[0].variable.value = iter_number;
4166 deps[0].dependence_type = ompt_dependence_type_sink;
4168 for (i = 1; i < num_dims; ++i) {
4171 ln = pr_buf->th_doacross_info[j + 1];
4172 lo = pr_buf->th_doacross_info[j + 2];
4173 up = pr_buf->th_doacross_info[j + 3];
4174 st = pr_buf->th_doacross_info[j + 4];
4176 if (vec[i] < lo || vec[i] > up) {
4177 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4178 "bounds [%lld,%lld]\n",
4179 gtid, vec[i], lo, up));
4183 }
else if (st > 0) {
4184 if (vec[i] < lo || vec[i] > up) {
4185 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4186 "bounds [%lld,%lld]\n",
4187 gtid, vec[i], lo, up));
4190 iter = (kmp_uint64)(vec[i] - lo) / st;
4192 if (vec[i] > lo || vec[i] < up) {
4193 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4194 "bounds [%lld,%lld]\n",
4195 gtid, vec[i], lo, up));
4198 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4200 iter_number = iter + ln * iter_number;
4201 #if OMPT_SUPPORT && OMPT_OPTIONAL
4202 deps[i].variable.value = iter;
4203 deps[i].dependence_type = ompt_dependence_type_sink;
4206 shft = iter_number % 32;
4209 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4213 #if OMPT_SUPPORT && OMPT_OPTIONAL
4214 if (ompt_enabled.ompt_callback_dependences) {
4215 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4216 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4220 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4221 gtid, (iter_number << 5) + shft));
4224 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4225 __kmp_assert_valid_gtid(gtid);
4229 kmp_int64 iter_number;
4230 kmp_info_t *th = __kmp_threads[gtid];
4231 kmp_team_t *team = th->th.th_team;
4235 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4236 if (team->t.t_serialized) {
4237 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4243 pr_buf = th->th.th_dispatch;
4244 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4245 num_dims = (size_t)pr_buf->th_doacross_info[0];
4246 lo = pr_buf->th_doacross_info[2];
4247 st = pr_buf->th_doacross_info[4];
4248 #
if OMPT_SUPPORT && OMPT_OPTIONAL
4249 ompt_dependence_t deps[num_dims];
4252 iter_number = vec[0] - lo;
4253 }
else if (st > 0) {
4254 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4256 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4258 #if OMPT_SUPPORT && OMPT_OPTIONAL
4259 deps[0].variable.value = iter_number;
4260 deps[0].dependence_type = ompt_dependence_type_source;
4262 for (i = 1; i < num_dims; ++i) {
4265 ln = pr_buf->th_doacross_info[j + 1];
4266 lo = pr_buf->th_doacross_info[j + 2];
4267 st = pr_buf->th_doacross_info[j + 4];
4270 }
else if (st > 0) {
4271 iter = (kmp_uint64)(vec[i] - lo) / st;
4273 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4275 iter_number = iter + ln * iter_number;
4276 #if OMPT_SUPPORT && OMPT_OPTIONAL
4277 deps[i].variable.value = iter;
4278 deps[i].dependence_type = ompt_dependence_type_source;
4281 #if OMPT_SUPPORT && OMPT_OPTIONAL
4282 if (ompt_enabled.ompt_callback_dependences) {
4283 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4284 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4287 shft = iter_number % 32;
4291 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4292 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4293 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4294 (iter_number << 5) + shft));
4297 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4298 __kmp_assert_valid_gtid(gtid);
4300 kmp_info_t *th = __kmp_threads[gtid];
4301 kmp_team_t *team = th->th.th_team;
4302 kmp_disp_t *pr_buf = th->th.th_dispatch;
4304 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4305 if (team->t.t_serialized) {
4306 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4310 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4311 if (num_done == th->th.th_team_nproc) {
4313 int idx = pr_buf->th_doacross_buf_idx - 1;
4314 dispatch_shared_info_t *sh_buf =
4315 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4316 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4317 (kmp_int64)&sh_buf->doacross_num_done);
4318 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4319 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4320 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4321 sh_buf->doacross_flags = NULL;
4322 sh_buf->doacross_num_done = 0;
4323 sh_buf->doacross_buf_idx +=
4324 __kmp_dispatch_num_buffers;
4327 pr_buf->th_doacross_flags = NULL;
4328 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4329 pr_buf->th_doacross_info = NULL;
4330 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4334 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4335 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4338 void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4339 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
4342 void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4343 omp_allocator_handle_t free_allocator) {
4344 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4348 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4349 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4352 int __kmpc_get_target_offload(
void) {
4353 if (!__kmp_init_serial) {
4354 __kmp_serial_initialize();
4356 return __kmp_target_offload;
4359 int __kmpc_pause_resource(kmp_pause_status_t level) {
4360 if (!__kmp_init_serial) {
4363 return __kmp_pause_resource(level);
4366 void __kmpc_error(
ident_t *loc,
int severity,
const char *message) {
4367 if (!__kmp_init_serial)
4368 __kmp_serial_initialize();
4370 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4373 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4374 ompt_callbacks.ompt_callback(ompt_callback_error)(
4375 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4376 OMPT_GET_RETURN_ADDRESS(0));
4382 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->
psource,
false);
4384 __kmp_str_format(
"%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4385 __kmp_str_loc_free(&str_loc);
4387 src_loc = __kmp_str_format(
"unknown");
4390 if (severity == severity_warning)
4391 KMP_WARNING(UserDirectedWarning, src_loc, message);
4393 KMP_FATAL(UserDirectedError, src_loc, message);
4395 __kmp_str_free(&src_loc);
4398 #ifdef KMP_USE_VERSION_SYMBOLS
4407 #ifdef omp_set_affinity_format
4408 #undef omp_set_affinity_format
4410 #ifdef omp_get_affinity_format
4411 #undef omp_get_affinity_format
4413 #ifdef omp_display_affinity
4414 #undef omp_display_affinity
4416 #ifdef omp_capture_affinity
4417 #undef omp_capture_affinity
4419 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4421 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4423 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4425 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)