21 #include "kmp_error.h"
24 #include "kmp_stats.h"
26 #if KMP_USE_X87CONTROL
30 #include "kmp_dispatch.h"
31 #if KMP_USE_HIER_SCHED
32 #include "kmp_dispatch_hier.h"
36 #include "ompt-specific.h"
42 void __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
45 KMP_DEBUG_ASSERT(gtid_ref);
47 if (__kmp_env_consistency_check) {
48 th = __kmp_threads[*gtid_ref];
49 if (th->th.th_root->r.r_active &&
50 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
51 #if KMP_USE_DYNAMIC_LOCK
52 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0);
54 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL);
60 void __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
63 if (__kmp_env_consistency_check) {
64 th = __kmp_threads[*gtid_ref];
65 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
66 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
73 bool use_hier =
false) {
76 int monotonicity = SCHEDULE_MONOTONIC;
80 if (loc != NULL && loc->get_openmp_version() < 50)
81 monotonicity = SCHEDULE_MONOTONIC;
83 if (use_hier || __kmp_force_monotonic)
84 monotonicity = SCHEDULE_MONOTONIC;
85 else if (SCHEDULE_HAS_NONMONOTONIC(schedule))
86 monotonicity = SCHEDULE_NONMONOTONIC;
87 else if (SCHEDULE_HAS_MONOTONIC(schedule))
88 monotonicity = SCHEDULE_MONOTONIC;
93 #if KMP_STATIC_STEAL_ENABLED
119 template <
typename T>
120 void __kmp_dispatch_init_algorithm(
ident_t *loc,
int gtid,
121 dispatch_private_info_template<T> *pr,
123 typename traits_t<T>::signed_t st,
125 kmp_uint64 *cur_chunk,
127 typename traits_t<T>::signed_t chunk,
129 typedef typename traits_t<T>::unsigned_t UT;
130 typedef typename traits_t<T>::floating_t DBL;
140 typedef typename traits_t<T>::signed_t ST;
144 buff = __kmp_str_format(
"__kmp_dispatch_init_algorithm: T#%%d called "
145 "pr:%%p lb:%%%s ub:%%%s st:%%%s "
146 "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
147 traits_t<T>::spec, traits_t<T>::spec,
148 traits_t<ST>::spec, traits_t<ST>::spec,
149 traits_t<T>::spec, traits_t<T>::spec);
150 KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid));
151 __kmp_str_free(&buff);
155 th = __kmp_threads[gtid];
156 team = th->th.th_team;
157 active = !team->t.t_serialized;
160 int itt_need_metadata_reporting =
161 __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
162 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
163 team->t.t_active_level == 1;
166 #if KMP_USE_HIER_SCHED
167 use_hier = pr->flags.use_hier;
173 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
174 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
178 pr->flags.nomerge = TRUE;
182 pr->flags.nomerge = FALSE;
184 pr->type_size = traits_t<T>::type_size;
186 pr->flags.ordered = TRUE;
190 pr->flags.ordered = FALSE;
193 if (pr->flags.ordered) {
194 monotonicity = SCHEDULE_MONOTONIC;
198 schedule = __kmp_static;
200 if (schedule == kmp_sch_runtime) {
203 schedule = team->t.t_sched.r_sched_type;
204 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
205 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
206 if (pr->flags.ordered)
207 monotonicity = SCHEDULE_MONOTONIC;
211 schedule = __kmp_guided;
213 schedule = __kmp_static;
217 chunk = team->t.t_sched.chunk;
226 buff = __kmp_str_format(
"__kmp_dispatch_init_algorithm: T#%%d new: "
227 "schedule:%%d chunk:%%%s\n",
229 KD_TRACE(10, (buff, gtid, schedule, chunk));
230 __kmp_str_free(&buff);
235 schedule = __kmp_guided;
238 chunk = KMP_DEFAULT_CHUNK;
244 schedule = __kmp_auto;
249 buff = __kmp_str_format(
250 "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: "
251 "schedule:%%d chunk:%%%s\n",
253 KD_TRACE(10, (buff, gtid, schedule, chunk));
254 __kmp_str_free(&buff);
258 #if KMP_STATIC_STEAL_ENABLED
260 if (schedule == kmp_sch_dynamic_chunked) {
261 if (monotonicity == SCHEDULE_NONMONOTONIC)
262 schedule = kmp_sch_static_steal;
266 if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
267 schedule = kmp_sch_guided_iterative_chunked;
268 KMP_WARNING(DispatchManyThreads);
272 schedule = team->t.t_sched.r_sched_type;
273 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
274 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
278 schedule == __kmp_static) {
279 schedule = kmp_sch_static_balanced_chunked;
284 chunk = team->t.t_sched.chunk * chunk;
294 buff = __kmp_str_format(
295 "__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d"
298 KD_TRACE(10, (buff, gtid, schedule, chunk));
299 __kmp_str_free(&buff);
303 pr->u.p.parm1 = chunk;
306 "unknown scheduling type");
310 if (__kmp_env_consistency_check) {
312 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
313 (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc);
327 tc = (UT)(lb - ub) / (-st) + 1;
335 tc = (UT)(ub - lb) / st + 1;
341 #if KMP_STATS_ENABLED
342 if (KMP_MASTER_GTID(gtid)) {
353 pr->u.p.last_upper = ub + st;
359 if (pr->flags.ordered) {
360 pr->ordered_bumped = 0;
361 pr->u.p.ordered_lower = 1;
362 pr->u.p.ordered_upper = 0;
367 #if KMP_STATIC_STEAL_ENABLED
368 case kmp_sch_static_steal: {
372 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",
375 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
376 if (nproc > 1 && ntc >= nproc) {
379 T small_chunk, extras;
380 kmp_uint32 old = UNUSED;
381 int claimed = pr->steal_flag.compare_exchange_strong(old, CLAIMED);
382 if (traits_t<T>::type_size > 4) {
388 pr->u.p.steal_lock = (kmp_lock_t *)__kmp_allocate(
sizeof(kmp_lock_t));
389 __kmp_init_lock(pr->u.p.steal_lock);
391 small_chunk = ntc / nproc;
392 extras = ntc % nproc;
394 init =
id * small_chunk + (
id < extras ? id : extras);
395 pr->u.p.count = init;
397 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
400 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
403 KMP_DEBUG_ASSERT(pr->steal_flag == THIEF);
409 pr->u.p.parm3 = nproc;
410 pr->u.p.parm4 = (
id + 1) % nproc;
414 schedule = kmp_sch_dynamic_chunked;
415 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d switching to "
416 "kmp_sch_dynamic_chunked\n",
423 case kmp_sch_static_balanced: {
428 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",
438 pr->u.p.parm1 = (
id == tc - 1);
441 pr->u.p.parm1 = FALSE;
445 T small_chunk = tc / nproc;
446 T extras = tc % nproc;
447 init =
id * small_chunk + (
id < extras ? id : extras);
448 limit = init + small_chunk - (
id < extras ? 0 : 1);
449 pr->u.p.parm1 = (
id == nproc - 1);
455 pr->u.p.parm1 = TRUE;
459 pr->u.p.parm1 = FALSE;
465 if (itt_need_metadata_reporting)
467 *cur_chunk = limit - init + 1;
470 pr->u.p.lb = lb + init;
471 pr->u.p.ub = lb + limit;
474 T ub_tmp = lb + limit * st;
475 pr->u.p.lb = lb + init * st;
479 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
481 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
484 if (pr->flags.ordered) {
485 pr->u.p.ordered_lower = init;
486 pr->u.p.ordered_upper = limit;
490 case kmp_sch_static_balanced_chunked: {
493 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
494 " -> falling-through to static_greedy\n",
496 schedule = kmp_sch_static_greedy;
498 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
504 case kmp_sch_guided_iterative_chunked: {
507 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
512 if ((2L * chunk + 1) * nproc >= tc) {
514 schedule = kmp_sch_dynamic_chunked;
518 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
519 *(
double *)&pr->u.p.parm3 =
520 guided_flt_param / (
double)nproc;
523 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d falling-through to "
524 "kmp_sch_static_greedy\n",
526 schedule = kmp_sch_static_greedy;
530 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
536 case kmp_sch_guided_analytical_chunked: {
537 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d "
538 "kmp_sch_guided_analytical_chunked case\n",
542 if ((2L * chunk + 1) * nproc >= tc) {
544 schedule = kmp_sch_dynamic_chunked;
550 #if KMP_USE_X87CONTROL
560 unsigned int oldFpcw = _control87(0, 0);
561 _control87(_PC_64, _MCW_PC);
564 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
571 x = 1.0 - 0.5 / (double)nproc;
582 ptrdiff_t natural_alignment =
583 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
587 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0);
592 *(DBL *)&pr->u.p.parm3 = x;
605 p = __kmp_pow<UT>(x, right);
610 }
while (p > target && right < (1 << 27));
618 while (left + 1 < right) {
619 mid = (left + right) / 2;
620 if (__kmp_pow<UT>(x, mid) > target) {
629 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&
630 __kmp_pow<UT>(x, cross) <= target);
633 pr->u.p.parm2 = cross;
636 #if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8))
637 #define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3)
639 #define GUIDED_ANALYTICAL_WORKAROUND (x)
643 __kmp_dispatch_guided_remaining(
644 tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
646 #if KMP_USE_X87CONTROL
648 _control87(oldFpcw, _MCW_PC);
652 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d falling-through to "
653 "kmp_sch_static_greedy\n",
655 schedule = kmp_sch_static_greedy;
661 case kmp_sch_static_greedy:
664 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
666 pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
668 case kmp_sch_static_chunked:
669 case kmp_sch_dynamic_chunked:
671 if (pr->u.p.parm1 <= 0)
672 pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
673 else if (pr->u.p.parm1 > tc)
677 pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
678 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d "
679 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
682 case kmp_sch_trapezoidal: {
685 T parm1, parm2, parm3, parm4;
687 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",
693 parm2 = (tc / (2 * nproc));
703 }
else if (parm1 > parm2) {
708 parm3 = (parm2 + parm1);
709 parm3 = (2 * tc + parm3 - 1) / parm3;
717 parm4 = (parm2 - parm1) / parm4;
724 pr->u.p.parm1 = parm1;
725 pr->u.p.parm2 = parm2;
726 pr->u.p.parm3 = parm3;
727 pr->u.p.parm4 = parm4;
732 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
733 KMP_HNT(GetNewerLibrary),
738 pr->schedule = schedule;
741 #if KMP_USE_HIER_SCHED
742 template <
typename T>
743 inline void __kmp_dispatch_init_hier_runtime(
ident_t *loc, T lb, T ub,
744 typename traits_t<T>::signed_t st);
747 __kmp_dispatch_init_hier_runtime<kmp_int32>(
ident_t *loc, kmp_int32 lb,
748 kmp_int32 ub, kmp_int32 st) {
749 __kmp_dispatch_init_hierarchy<kmp_int32>(
750 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
751 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
755 __kmp_dispatch_init_hier_runtime<kmp_uint32>(
ident_t *loc, kmp_uint32 lb,
756 kmp_uint32 ub, kmp_int32 st) {
757 __kmp_dispatch_init_hierarchy<kmp_uint32>(
758 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
759 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
763 __kmp_dispatch_init_hier_runtime<kmp_int64>(
ident_t *loc, kmp_int64 lb,
764 kmp_int64 ub, kmp_int64 st) {
765 __kmp_dispatch_init_hierarchy<kmp_int64>(
766 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
767 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
771 __kmp_dispatch_init_hier_runtime<kmp_uint64>(
ident_t *loc, kmp_uint64 lb,
772 kmp_uint64 ub, kmp_int64 st) {
773 __kmp_dispatch_init_hierarchy<kmp_uint64>(
774 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
775 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
779 void __kmp_dispatch_free_hierarchies(kmp_team_t *team) {
780 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
781 for (
int i = 0; i < num_disp_buff; ++i) {
784 reinterpret_cast<dispatch_shared_info_template<kmp_int32>
volatile *
>(
785 &team->t.t_disp_buffer[i]);
787 sh->hier->deallocate();
788 __kmp_free(sh->hier);
796 template <
typename T>
799 T ub,
typename traits_t<T>::signed_t st,
800 typename traits_t<T>::signed_t chunk,
int push_ws) {
801 typedef typename traits_t<T>::unsigned_t UT;
806 kmp_uint32 my_buffer_index;
807 dispatch_private_info_template<T> *pr;
808 dispatch_shared_info_template<T>
volatile *sh;
810 KMP_BUILD_ASSERT(
sizeof(dispatch_private_info_template<T>) ==
811 sizeof(dispatch_private_info));
812 KMP_BUILD_ASSERT(
sizeof(dispatch_shared_info_template<UT>) ==
813 sizeof(dispatch_shared_info));
814 __kmp_assert_valid_gtid(gtid);
816 if (!TCR_4(__kmp_init_parallel))
817 __kmp_parallel_initialize();
819 __kmp_resume_if_soft_paused();
821 #if INCLUDE_SSC_MARKS
822 SSC_MARK_DISPATCH_INIT();
825 typedef typename traits_t<T>::signed_t ST;
829 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d called: schedule:%%d "
830 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
831 traits_t<ST>::spec, traits_t<T>::spec,
832 traits_t<T>::spec, traits_t<ST>::spec);
833 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
834 __kmp_str_free(&buff);
838 th = __kmp_threads[gtid];
839 team = th->th.th_team;
840 active = !team->t.t_serialized;
841 th->th.th_ident = loc;
846 if (schedule == __kmp_static) {
852 #if KMP_USE_HIER_SCHED
858 my_buffer_index = th->th.th_dispatch->th_disp_index;
859 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
861 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
862 my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched);
867 if (pr->flags.use_hier) {
869 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d ordered loop detected. "
870 "Disabling hierarchical scheduling.\n",
872 pr->flags.use_hier = FALSE;
875 if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) {
878 if (!ordered && !pr->flags.use_hier)
879 __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st);
884 kmp_uint64 cur_chunk = chunk;
885 int itt_need_metadata_reporting =
886 __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
887 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
888 team->t.t_active_level == 1;
891 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
892 th->th.th_dispatch->th_disp_buffer);
894 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
895 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
897 my_buffer_index = th->th.th_dispatch->th_disp_index++;
900 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
902 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
903 sh =
reinterpret_cast<dispatch_shared_info_template<T>
volatile *
>(
904 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
905 KD_TRACE(10, (
"__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,
907 if (sh->buffer_index != my_buffer_index) {
908 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"
909 " sh->buffer_index:%d\n",
910 gtid, my_buffer_index, sh->buffer_index));
911 __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
912 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
915 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
916 "sh->buffer_index:%d\n",
917 gtid, my_buffer_index, sh->buffer_index));
921 __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st,
925 chunk, (T)th->th.th_team_nproc,
926 (T)th->th.th_info.ds.ds_tid);
928 if (pr->flags.ordered == 0) {
929 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
930 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
932 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
933 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
935 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
936 th->th.th_dispatch->th_dispatch_sh_current =
937 CCAST(dispatch_shared_info_t *, (
volatile dispatch_shared_info_t *)sh);
939 if (pr->flags.ordered) {
940 __kmp_itt_ordered_init(gtid);
943 if (itt_need_metadata_reporting) {
945 kmp_uint64 schedtype = 0;
947 case kmp_sch_static_chunked:
948 case kmp_sch_static_balanced:
950 case kmp_sch_static_greedy:
951 cur_chunk = pr->u.p.parm1;
953 case kmp_sch_dynamic_chunked:
956 case kmp_sch_guided_iterative_chunked:
957 case kmp_sch_guided_analytical_chunked:
967 __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk);
969 #if KMP_USE_HIER_SCHED
970 if (pr->flags.use_hier) {
972 pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0;
982 buff = __kmp_str_format(
983 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s "
985 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s"
986 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
987 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
988 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
989 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
990 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
991 KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,
992 pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,
993 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
994 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4));
995 __kmp_str_free(&buff);
998 #if OMPT_SUPPORT && OMPT_OPTIONAL
999 if (ompt_enabled.ompt_callback_work) {
1000 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1001 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1002 ompt_callbacks.ompt_callback(ompt_callback_work)(
1003 ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
1004 &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
1007 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic);
1015 template <
typename UT>
1016 static void __kmp_dispatch_finish(
int gtid,
ident_t *loc) {
1017 typedef typename traits_t<UT>::signed_t ST;
1018 __kmp_assert_valid_gtid(gtid);
1019 kmp_info_t *th = __kmp_threads[gtid];
1021 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid));
1022 if (!th->th.th_team->t.t_serialized) {
1024 dispatch_private_info_template<UT> *pr =
1025 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1026 th->th.th_dispatch->th_dispatch_pr_current);
1027 dispatch_shared_info_template<UT>
volatile *sh =
1028 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1029 th->th.th_dispatch->th_dispatch_sh_current);
1030 KMP_DEBUG_ASSERT(pr);
1031 KMP_DEBUG_ASSERT(sh);
1032 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1033 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1035 if (pr->ordered_bumped) {
1038 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1040 pr->ordered_bumped = 0;
1042 UT lower = pr->u.p.ordered_lower;
1048 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d before wait: "
1049 "ordered_iteration:%%%s lower:%%%s\n",
1050 traits_t<UT>::spec, traits_t<UT>::spec);
1051 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1052 __kmp_str_free(&buff);
1056 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
1057 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1063 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d after wait: "
1064 "ordered_iteration:%%%s lower:%%%s\n",
1065 traits_t<UT>::spec, traits_t<UT>::spec);
1066 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1067 __kmp_str_free(&buff);
1071 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
1074 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid));
1077 #ifdef KMP_GOMP_COMPAT
1079 template <
typename UT>
1080 static void __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc) {
1081 typedef typename traits_t<UT>::signed_t ST;
1082 __kmp_assert_valid_gtid(gtid);
1083 kmp_info_t *th = __kmp_threads[gtid];
1085 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
1086 if (!th->th.th_team->t.t_serialized) {
1087 dispatch_private_info_template<UT> *pr =
1088 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1089 th->th.th_dispatch->th_dispatch_pr_current);
1090 dispatch_shared_info_template<UT>
volatile *sh =
1091 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1092 th->th.th_dispatch->th_dispatch_sh_current);
1093 KMP_DEBUG_ASSERT(pr);
1094 KMP_DEBUG_ASSERT(sh);
1095 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1096 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1098 UT lower = pr->u.p.ordered_lower;
1099 UT upper = pr->u.p.ordered_upper;
1100 UT inc = upper - lower + 1;
1102 if (pr->ordered_bumped == inc) {
1105 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1107 pr->ordered_bumped = 0;
1109 inc -= pr->ordered_bumped;
1115 buff = __kmp_str_format(
1116 "__kmp_dispatch_finish_chunk: T#%%d before wait: "
1117 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1118 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1119 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper));
1120 __kmp_str_free(&buff);
1124 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
1125 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1128 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting "
1129 "ordered_bumped to zero\n",
1131 pr->ordered_bumped = 0;
1137 buff = __kmp_str_format(
1138 "__kmp_dispatch_finish_chunk: T#%%d after wait: "
1139 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1140 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1141 traits_t<UT>::spec);
1143 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper));
1144 __kmp_str_free(&buff);
1148 test_then_add<ST>((
volatile ST *)&sh->u.s.ordered_iteration, inc);
1152 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
1157 template <
typename T>
1158 int __kmp_dispatch_next_algorithm(
int gtid,
1159 dispatch_private_info_template<T> *pr,
1160 dispatch_shared_info_template<T>
volatile *sh,
1161 kmp_int32 *p_last, T *p_lb, T *p_ub,
1162 typename traits_t<T>::signed_t *p_st, T nproc,
1164 typedef typename traits_t<T>::unsigned_t UT;
1165 typedef typename traits_t<T>::signed_t ST;
1166 typedef typename traits_t<T>::floating_t DBL;
1171 UT limit, trip, init;
1172 kmp_info_t *th = __kmp_threads[gtid];
1173 kmp_team_t *team = th->th.th_team;
1175 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1176 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1177 KMP_DEBUG_ASSERT(pr);
1178 KMP_DEBUG_ASSERT(sh);
1179 KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc);
1185 __kmp_str_format(
"__kmp_dispatch_next_algorithm: T#%%d called pr:%%p "
1186 "sh:%%p nproc:%%%s tid:%%%s\n",
1187 traits_t<T>::spec, traits_t<T>::spec);
1188 KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid));
1189 __kmp_str_free(&buff);
1194 if (pr->u.p.tc == 0) {
1196 (
"__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
1202 switch (pr->schedule) {
1203 #if KMP_STATIC_STEAL_ENABLED
1204 case kmp_sch_static_steal: {
1205 T chunk = pr->u.p.parm1;
1206 UT nchunks = pr->u.p.parm2;
1208 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",
1211 trip = pr->u.p.tc - 1;
1213 if (traits_t<T>::type_size > 4) {
1216 kmp_lock_t *lck = pr->u.p.steal_lock;
1217 KMP_DEBUG_ASSERT(lck != NULL);
1218 if (pr->u.p.count < (UT)pr->u.p.ub) {
1219 KMP_DEBUG_ASSERT(pr->steal_flag == READY);
1220 __kmp_acquire_lock(lck, gtid);
1222 init = (pr->u.p.count)++;
1223 status = (init < (UT)pr->u.p.ub);
1224 __kmp_release_lock(lck, gtid);
1230 T while_limit = pr->u.p.parm3;
1232 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1233 __kmp_dispatch_num_buffers;
1235 KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF);
1236 while ((!status) && (while_limit != ++while_index)) {
1237 dispatch_private_info_template<T> *v;
1239 T victimId = pr->u.p.parm4;
1240 T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1241 v =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1242 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1243 KMP_DEBUG_ASSERT(v);
1244 while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) &&
1245 oldVictimId != victimId) {
1246 victimId = (victimId + 1) % nproc;
1247 v =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1248 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1249 KMP_DEBUG_ASSERT(v);
1251 if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) {
1254 if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) {
1255 kmp_uint32 old = UNUSED;
1257 status = v->steal_flag.compare_exchange_strong(old, THIEF);
1261 T small_chunk, extras;
1262 small_chunk = nchunks / nproc;
1263 extras = nchunks % nproc;
1264 init =
id * small_chunk + (
id < extras ? id : extras);
1265 __kmp_acquire_lock(lck, gtid);
1266 pr->u.p.count = init + 1;
1267 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
1268 __kmp_release_lock(lck, gtid);
1269 pr->u.p.parm4 = (
id + 1) % nproc;
1275 buff = __kmp_str_format(
1276 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1277 "count:%%%s ub:%%%s\n",
1278 traits_t<UT>::spec, traits_t<T>::spec);
1279 KD_TRACE(10, (buff, gtid,
id, pr->u.p.count, pr->u.p.ub));
1280 __kmp_str_free(&buff);
1284 if (pr->u.p.count < (UT)pr->u.p.ub)
1285 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1289 if (KMP_ATOMIC_LD_RLX(&v->steal_flag) != READY ||
1290 v->u.p.count >= (UT)v->u.p.ub) {
1291 pr->u.p.parm4 = (victimId + 1) % nproc;
1294 lckv = v->u.p.steal_lock;
1295 KMP_ASSERT(lckv != NULL);
1296 __kmp_acquire_lock(lckv, gtid);
1298 if (v->u.p.count >= limit) {
1299 __kmp_release_lock(lckv, gtid);
1300 pr->u.p.parm4 = (victimId + 1) % nproc;
1306 remaining = limit - v->u.p.count;
1307 if (remaining > 7) {
1309 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2);
1310 init = (v->u.p.ub -= (remaining >> 2));
1313 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1);
1314 init = (v->u.p.ub -= 1);
1316 __kmp_release_lock(lckv, gtid);
1321 buff = __kmp_str_format(
1322 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1323 "count:%%%s ub:%%%s\n",
1324 traits_t<UT>::spec, traits_t<UT>::spec);
1325 KD_TRACE(10, (buff, gtid, victimId, init, limit));
1326 __kmp_str_free(&buff);
1329 KMP_DEBUG_ASSERT(init + 1 <= limit);
1330 pr->u.p.parm4 = victimId;
1333 __kmp_acquire_lock(lck, gtid);
1334 pr->u.p.count = init + 1;
1336 __kmp_release_lock(lck, gtid);
1338 if (init + 1 < limit)
1339 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1352 union_i4 vold, vnew;
1353 if (pr->u.p.count < (UT)pr->u.p.ub) {
1354 KMP_DEBUG_ASSERT(pr->steal_flag == READY);
1355 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1358 while (!KMP_COMPARE_AND_STORE_REL64(
1359 (
volatile kmp_int64 *)&pr->u.p.count,
1360 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1361 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1363 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1367 init = vold.p.count;
1368 status = (init < (UT)vold.p.ub);
1373 T while_limit = pr->u.p.parm3;
1375 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1376 __kmp_dispatch_num_buffers;
1378 KMP_ATOMIC_ST_REL(&pr->steal_flag, THIEF);
1379 while ((!status) && (while_limit != ++while_index)) {
1380 dispatch_private_info_template<T> *v;
1382 T victimId = pr->u.p.parm4;
1383 T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1384 v =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1385 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1386 KMP_DEBUG_ASSERT(v);
1387 while ((v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) &&
1388 oldVictimId != victimId) {
1389 victimId = (victimId + 1) % nproc;
1390 v =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1391 &team->t.t_dispatch[victimId].th_disp_buffer[idx]);
1392 KMP_DEBUG_ASSERT(v);
1394 if (v == pr || KMP_ATOMIC_LD_RLX(&v->steal_flag) == THIEF) {
1397 if (KMP_ATOMIC_LD_RLX(&v->steal_flag) == UNUSED) {
1398 kmp_uint32 old = UNUSED;
1400 status = v->steal_flag.compare_exchange_strong(old, THIEF);
1404 T small_chunk, extras;
1405 small_chunk = nchunks / nproc;
1406 extras = nchunks % nproc;
1407 init =
id * small_chunk + (
id < extras ? id : extras);
1408 vnew.p.count = init + 1;
1409 vnew.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
1412 KMP_XCHG_FIXED64((
volatile kmp_int64 *)(&pr->u.p.count), vnew.b);
1414 *(
volatile kmp_int64 *)(&pr->u.p.count) = vnew.b;
1416 pr->u.p.parm4 = (
id + 1) % nproc;
1422 buff = __kmp_str_format(
1423 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1424 "count:%%%s ub:%%%s\n",
1425 traits_t<UT>::spec, traits_t<T>::spec);
1426 KD_TRACE(10, (buff, gtid,
id, pr->u.p.count, pr->u.p.ub));
1427 __kmp_str_free(&buff);
1431 if (pr->u.p.count < (UT)pr->u.p.ub)
1432 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1438 vold.b = *(
volatile kmp_int64 *)(&v->u.p.count);
1439 if (KMP_ATOMIC_LD_ACQ(&v->steal_flag) != READY ||
1440 vold.p.count >= (UT)vold.p.ub) {
1441 pr->u.p.parm4 = (victimId + 1) % nproc;
1445 remaining = vold.p.ub - vold.p.count;
1448 if (remaining > 7) {
1449 vnew.p.ub -= remaining >> 2;
1453 KMP_DEBUG_ASSERT(vnew.p.ub * (UT)chunk <= trip);
1454 if (KMP_COMPARE_AND_STORE_REL64(
1455 (
volatile kmp_int64 *)&v->u.p.count,
1456 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1457 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1463 buff = __kmp_str_format(
1464 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1465 "count:%%%s ub:%%%s\n",
1466 traits_t<T>::spec, traits_t<T>::spec);
1467 KD_TRACE(10, (buff, gtid, victimId, vnew.p.ub, vold.p.ub));
1468 __kmp_str_free(&buff);
1471 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,
1472 vold.p.ub - vnew.p.ub);
1474 pr->u.p.parm4 = victimId;
1477 vold.p.count = init + 1;
1479 KMP_XCHG_FIXED64((
volatile kmp_int64 *)(&pr->u.p.count), vold.b);
1481 *(
volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1484 if (vold.p.count < (UT)vold.p.ub)
1485 KMP_ATOMIC_ST_REL(&pr->steal_flag, READY);
1501 limit = chunk + init - 1;
1503 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1);
1505 KMP_DEBUG_ASSERT(init <= trip);
1509 if ((last = (limit >= trip)) != 0)
1515 *p_lb = start + init;
1516 *p_ub = start + limit;
1518 *p_lb = start + init * incr;
1519 *p_ub = start + limit * incr;
1525 case kmp_sch_static_balanced: {
1528 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",
1531 if ((status = !pr->u.p.count) != 0) {
1535 last = (pr->u.p.parm1 != 0);
1539 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1543 case kmp_sch_static_greedy:
1545 case kmp_sch_static_chunked: {
1548 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d "
1549 "kmp_sch_static_[affinity|chunked] case\n",
1551 parm1 = pr->u.p.parm1;
1553 trip = pr->u.p.tc - 1;
1554 init = parm1 * (pr->u.p.count + tid);
1556 if ((status = (init <= trip)) != 0) {
1559 limit = parm1 + init - 1;
1561 if ((last = (limit >= trip)) != 0)
1567 pr->u.p.count += nproc;
1570 *p_lb = start + init;
1571 *p_ub = start + limit;
1573 *p_lb = start + init * incr;
1574 *p_ub = start + limit * incr;
1577 if (pr->flags.ordered) {
1578 pr->u.p.ordered_lower = init;
1579 pr->u.p.ordered_upper = limit;
1585 case kmp_sch_dynamic_chunked: {
1587 UT chunk_size = pr->u.p.parm1;
1588 UT nchunks = pr->u.p.parm2;
1592 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
1595 chunk_number = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1596 status = (chunk_number < nchunks);
1603 init = chunk_size * chunk_number;
1604 trip = pr->u.p.tc - 1;
1608 if ((last = (trip - init < (UT)chunk_size)))
1611 limit = chunk_size + init - 1;
1617 *p_lb = start + init;
1618 *p_ub = start + limit;
1620 *p_lb = start + init * incr;
1621 *p_ub = start + limit * incr;
1624 if (pr->flags.ordered) {
1625 pr->u.p.ordered_lower = init;
1626 pr->u.p.ordered_upper = limit;
1632 case kmp_sch_guided_iterative_chunked: {
1633 T chunkspec = pr->u.p.parm1;
1634 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
1641 init = sh->u.s.iteration;
1642 remaining = trip - init;
1643 if (remaining <= 0) {
1652 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1654 remaining = trip - init;
1655 if (remaining <= 0) {
1660 if ((T)remaining > chunkspec) {
1661 limit = init + chunkspec - 1;
1664 limit = init + remaining - 1;
1669 limit = init + (UT)((
double)remaining *
1670 *(
double *)&pr->u.p.parm3);
1671 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1672 (ST)init, (ST)limit)) {
1684 *p_lb = start + init * incr;
1685 *p_ub = start + limit * incr;
1686 if (pr->flags.ordered) {
1687 pr->u.p.ordered_lower = init;
1688 pr->u.p.ordered_upper = limit;
1702 T chunk = pr->u.p.parm1;
1704 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",
1710 init = sh->u.s.iteration;
1711 remaining = trip - init;
1712 if (remaining <= 0) {
1716 KMP_DEBUG_ASSERT(init % chunk == 0);
1718 if ((T)remaining < pr->u.p.parm2) {
1721 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1723 remaining = trip - init;
1724 if (remaining <= 0) {
1729 if ((T)remaining > chunk) {
1730 limit = init + chunk - 1;
1733 limit = init + remaining - 1;
1740 __kmp_type_convert((
double)remaining * (*(
double *)&pr->u.p.parm3),
1742 UT rem = span % chunk;
1744 span += chunk - rem;
1745 limit = init + span;
1746 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1747 (ST)init, (ST)limit)) {
1759 *p_lb = start + init * incr;
1760 *p_ub = start + limit * incr;
1761 if (pr->flags.ordered) {
1762 pr->u.p.ordered_lower = init;
1763 pr->u.p.ordered_upper = limit;
1774 case kmp_sch_guided_analytical_chunked: {
1775 T chunkspec = pr->u.p.parm1;
1777 #if KMP_USE_X87CONTROL
1780 unsigned int oldFpcw;
1781 unsigned int fpcwSet = 0;
1783 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d "
1784 "kmp_sch_guided_analytical_chunked case\n",
1789 KMP_DEBUG_ASSERT(nproc > 1);
1790 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip);
1794 chunkIdx = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1795 if (chunkIdx >= (UT)pr->u.p.parm2) {
1798 init = chunkIdx * chunkspec + pr->u.p.count;
1801 if ((status = (init > 0 && init <= trip)) != 0) {
1802 limit = init + chunkspec - 1;
1804 if ((last = (limit >= trip)) != 0)
1814 #if KMP_USE_X87CONTROL
1819 oldFpcw = _control87(0, 0);
1820 _control87(_PC_64, _MCW_PC);
1825 init = __kmp_dispatch_guided_remaining<T>(
1826 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
1827 KMP_DEBUG_ASSERT(init);
1831 limit = trip - __kmp_dispatch_guided_remaining<T>(
1832 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
1833 KMP_ASSERT(init <= limit);
1835 KMP_DEBUG_ASSERT(limit <= trip);
1842 #if KMP_USE_X87CONTROL
1846 if (fpcwSet && (oldFpcw & fpcwSet))
1847 _control87(oldFpcw, _MCW_PC);
1854 *p_lb = start + init * incr;
1855 *p_ub = start + limit * incr;
1856 if (pr->flags.ordered) {
1857 pr->u.p.ordered_lower = init;
1858 pr->u.p.ordered_upper = limit;
1869 case kmp_sch_trapezoidal: {
1871 T parm2 = pr->u.p.parm2;
1872 T parm3 = pr->u.p.parm3;
1873 T parm4 = pr->u.p.parm4;
1875 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",
1878 index = test_then_inc<ST>((
volatile ST *)&sh->u.s.iteration);
1880 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
1881 trip = pr->u.p.tc - 1;
1883 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
1890 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
1893 if ((last = (limit >= trip)) != 0)
1900 *p_lb = start + init;
1901 *p_ub = start + limit;
1903 *p_lb = start + init * incr;
1904 *p_ub = start + limit * incr;
1907 if (pr->flags.ordered) {
1908 pr->u.p.ordered_lower = init;
1909 pr->u.p.ordered_upper = limit;
1916 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
1917 KMP_HNT(GetNewerLibrary),
1925 if (pr->flags.ordered) {
1928 buff = __kmp_str_format(
"__kmp_dispatch_next_algorithm: T#%%d "
1929 "ordered_lower:%%%s ordered_upper:%%%s\n",
1930 traits_t<UT>::spec, traits_t<UT>::spec);
1931 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper));
1932 __kmp_str_free(&buff);
1937 buff = __kmp_str_format(
1938 "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d "
1939 "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
1940 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1941 KMP_DEBUG_ASSERT(p_last);
1942 KMP_DEBUG_ASSERT(p_st);
1943 KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st));
1944 __kmp_str_free(&buff);
1953 #if OMPT_SUPPORT && OMPT_OPTIONAL
1954 #define OMPT_LOOP_END \
1955 if (status == 0) { \
1956 if (ompt_enabled.ompt_callback_work) { \
1957 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
1958 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
1959 ompt_callbacks.ompt_callback(ompt_callback_work)( \
1960 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
1961 &(task_info->task_data), 0, codeptr); \
1966 #define OMPT_LOOP_END
1969 #if KMP_STATS_ENABLED
1970 #define KMP_STATS_LOOP_END \
1972 kmp_int64 u, l, t, i; \
1973 l = (kmp_int64)(*p_lb); \
1974 u = (kmp_int64)(*p_ub); \
1975 i = (kmp_int64)(pr->u.p.st); \
1976 if (status == 0) { \
1978 KMP_POP_PARTITIONED_TIMER(); \
1979 } else if (i == 1) { \
1984 } else if (i < 0) { \
1986 t = (l - u) / (-i) + 1; \
1991 t = (u - l) / i + 1; \
1995 KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t); \
1998 #define KMP_STATS_LOOP_END
2001 template <
typename T>
2002 static int __kmp_dispatch_next(
ident_t *loc,
int gtid, kmp_int32 *p_last,
2004 typename traits_t<T>::signed_t *p_st
2005 #
if OMPT_SUPPORT && OMPT_OPTIONAL
2011 typedef typename traits_t<T>::unsigned_t UT;
2012 typedef typename traits_t<T>::signed_t ST;
2017 KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling);
2020 dispatch_private_info_template<T> *pr;
2021 __kmp_assert_valid_gtid(gtid);
2022 kmp_info_t *th = __kmp_threads[gtid];
2023 kmp_team_t *team = th->th.th_team;
2025 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st);
2028 (
"__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",
2029 gtid, p_lb, p_ub, p_st, p_last));
2031 if (team->t.t_serialized) {
2033 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
2034 th->th.th_dispatch->th_disp_buffer);
2035 KMP_DEBUG_ASSERT(pr);
2037 if ((status = (pr->u.p.tc != 0)) == 0) {
2044 if (__kmp_env_consistency_check) {
2045 if (pr->pushed_ws != ct_none) {
2046 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2049 }
else if (pr->flags.nomerge) {
2052 UT limit, trip, init;
2054 T chunk = pr->u.p.parm1;
2056 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
2059 init = chunk * pr->u.p.count++;
2060 trip = pr->u.p.tc - 1;
2062 if ((status = (init <= trip)) == 0) {
2069 if (__kmp_env_consistency_check) {
2070 if (pr->pushed_ws != ct_none) {
2071 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2076 limit = chunk + init - 1;
2079 if ((last = (limit >= trip)) != 0) {
2082 pr->u.p.last_upper = pr->u.p.ub;
2090 *p_lb = start + init;
2091 *p_ub = start + limit;
2093 *p_lb = start + init * incr;
2094 *p_ub = start + limit * incr;
2097 if (pr->flags.ordered) {
2098 pr->u.p.ordered_lower = init;
2099 pr->u.p.ordered_upper = limit;
2104 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d "
2105 "ordered_lower:%%%s ordered_upper:%%%s\n",
2106 traits_t<UT>::spec, traits_t<UT>::spec);
2107 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2108 pr->u.p.ordered_upper));
2109 __kmp_str_free(&buff);
2119 pr->u.p.last_upper = *p_ub;
2130 buff = __kmp_str_format(
2131 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s "
2132 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
2133 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2134 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last,
2135 (p_last ? *p_last : 0), status));
2136 __kmp_str_free(&buff);
2139 #if INCLUDE_SSC_MARKS
2140 SSC_MARK_DISPATCH_NEXT();
2147 dispatch_shared_info_template<T>
volatile *sh;
2149 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
2150 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2152 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
2153 th->th.th_dispatch->th_dispatch_pr_current);
2154 KMP_DEBUG_ASSERT(pr);
2155 sh =
reinterpret_cast<dispatch_shared_info_template<T>
volatile *
>(
2156 th->th.th_dispatch->th_dispatch_sh_current);
2157 KMP_DEBUG_ASSERT(sh);
2159 #if KMP_USE_HIER_SCHED
2160 if (pr->flags.use_hier)
2161 status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st);
2164 status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
2165 p_st, th->th.th_team_nproc,
2166 th->th.th_info.ds.ds_tid);
2170 num_done = test_then_inc<ST>(&sh->u.s.num_done);
2175 buff = __kmp_str_format(
2176 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2177 traits_t<ST>::spec);
2178 KD_TRACE(10, (buff, gtid, sh->u.s.num_done));
2179 __kmp_str_free(&buff);
2183 #if KMP_USE_HIER_SCHED
2184 pr->flags.use_hier = FALSE;
2186 if (num_done == th->th.th_team_nproc - 1) {
2187 #if KMP_STATIC_STEAL_ENABLED
2188 if (pr->schedule == kmp_sch_static_steal) {
2190 int idx = (th->th.th_dispatch->th_disp_index - 1) %
2191 __kmp_dispatch_num_buffers;
2193 for (i = 0; i < th->th.th_team_nproc; ++i) {
2194 dispatch_private_info_template<T> *buf =
2195 reinterpret_cast<dispatch_private_info_template<T> *
>(
2196 &team->t.t_dispatch[i].th_disp_buffer[idx]);
2197 KMP_ASSERT(buf->steal_flag == THIEF);
2198 KMP_ATOMIC_ST_RLX(&buf->steal_flag, UNUSED);
2199 if (traits_t<T>::type_size > 4) {
2201 kmp_lock_t *lck = buf->u.p.steal_lock;
2202 KMP_ASSERT(lck != NULL);
2203 __kmp_destroy_lock(lck);
2205 buf->u.p.steal_lock = NULL;
2214 sh->u.s.num_done = 0;
2215 sh->u.s.iteration = 0;
2218 if (pr->flags.ordered) {
2219 sh->u.s.ordered_iteration = 0;
2222 sh->buffer_index += __kmp_dispatch_num_buffers;
2223 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2224 gtid, sh->buffer_index));
2229 if (__kmp_env_consistency_check) {
2230 if (pr->pushed_ws != ct_none) {
2231 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2235 th->th.th_dispatch->th_deo_fcn = NULL;
2236 th->th.th_dispatch->th_dxo_fcn = NULL;
2237 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2238 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2242 pr->u.p.last_upper = pr->u.p.ub;
2245 if (p_last != NULL && status != 0)
2253 buff = __kmp_str_format(
2254 "__kmp_dispatch_next: T#%%d normal case: "
2255 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
2256 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2257 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,
2258 (p_last ? *p_last : 0), status));
2259 __kmp_str_free(&buff);
2262 #if INCLUDE_SSC_MARKS
2263 SSC_MARK_DISPATCH_NEXT();
2270 template <
typename T>
2271 static void __kmp_dist_get_bounds(
ident_t *loc, kmp_int32 gtid,
2272 kmp_int32 *plastiter, T *plower, T *pupper,
2273 typename traits_t<T>::signed_t incr) {
2274 typedef typename traits_t<T>::unsigned_t UT;
2281 KMP_DEBUG_ASSERT(plastiter && plower && pupper);
2282 KE_TRACE(10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2284 typedef typename traits_t<T>::signed_t ST;
2288 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d "
2289 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2290 traits_t<T>::spec, traits_t<T>::spec,
2291 traits_t<ST>::spec, traits_t<T>::spec);
2292 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
2293 __kmp_str_free(&buff);
2297 if (__kmp_env_consistency_check) {
2299 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2302 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2312 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2315 __kmp_assert_valid_gtid(gtid);
2316 th = __kmp_threads[gtid];
2317 team = th->th.th_team;
2318 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2319 nteams = th->th.th_teams_size.nteams;
2320 team_id = team->t.t_master_tid;
2321 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
2325 trip_count = *pupper - *plower + 1;
2326 }
else if (incr == -1) {
2327 trip_count = *plower - *pupper + 1;
2328 }
else if (incr > 0) {
2330 trip_count = (UT)(*pupper - *plower) / incr + 1;
2332 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2335 if (trip_count <= nteams) {
2337 __kmp_static == kmp_sch_static_greedy ||
2339 kmp_sch_static_balanced);
2341 if (team_id < trip_count) {
2342 *pupper = *plower = *plower + team_id * incr;
2344 *plower = *pupper + incr;
2346 if (plastiter != NULL)
2347 *plastiter = (team_id == trip_count - 1);
2349 if (__kmp_static == kmp_sch_static_balanced) {
2350 UT chunk = trip_count / nteams;
2351 UT extras = trip_count % nteams;
2353 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2354 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2355 if (plastiter != NULL)
2356 *plastiter = (team_id == nteams - 1);
2359 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2361 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
2363 *plower += team_id * chunk_inc_count;
2364 *pupper = *plower + chunk_inc_count - incr;
2367 if (*pupper < *plower)
2368 *pupper = traits_t<T>::max_value;
2369 if (plastiter != NULL)
2370 *plastiter = *plower <= upper && *pupper > upper - incr;
2371 if (*pupper > upper)
2374 if (*pupper > *plower)
2375 *pupper = traits_t<T>::min_value;
2376 if (plastiter != NULL)
2377 *plastiter = *plower >= upper && *pupper < upper - incr;
2378 if (*pupper < upper)
2410 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2411 KMP_DEBUG_ASSERT(__kmp_init_serial);
2412 #if OMPT_SUPPORT && OMPT_OPTIONAL
2413 OMPT_STORE_RETURN_ADDRESS(gtid);
2415 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2422 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2423 KMP_DEBUG_ASSERT(__kmp_init_serial);
2424 #if OMPT_SUPPORT && OMPT_OPTIONAL
2425 OMPT_STORE_RETURN_ADDRESS(gtid);
2427 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2435 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2436 KMP_DEBUG_ASSERT(__kmp_init_serial);
2437 #if OMPT_SUPPORT && OMPT_OPTIONAL
2438 OMPT_STORE_RETURN_ADDRESS(gtid);
2440 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2448 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2449 KMP_DEBUG_ASSERT(__kmp_init_serial);
2450 #if OMPT_SUPPORT && OMPT_OPTIONAL
2451 OMPT_STORE_RETURN_ADDRESS(gtid);
2453 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2467 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2469 KMP_DEBUG_ASSERT(__kmp_init_serial);
2470 #if OMPT_SUPPORT && OMPT_OPTIONAL
2471 OMPT_STORE_RETURN_ADDRESS(gtid);
2473 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2474 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2477 void __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2479 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2481 KMP_DEBUG_ASSERT(__kmp_init_serial);
2482 #if OMPT_SUPPORT && OMPT_OPTIONAL
2483 OMPT_STORE_RETURN_ADDRESS(gtid);
2485 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2486 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2489 void __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2491 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2493 KMP_DEBUG_ASSERT(__kmp_init_serial);
2494 #if OMPT_SUPPORT && OMPT_OPTIONAL
2495 OMPT_STORE_RETURN_ADDRESS(gtid);
2497 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2498 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2501 void __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2503 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2505 KMP_DEBUG_ASSERT(__kmp_init_serial);
2506 #if OMPT_SUPPORT && OMPT_OPTIONAL
2507 OMPT_STORE_RETURN_ADDRESS(gtid);
2509 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2510 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2527 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2528 #if OMPT_SUPPORT && OMPT_OPTIONAL
2529 OMPT_STORE_RETURN_ADDRESS(gtid);
2531 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
2532 #
if OMPT_SUPPORT && OMPT_OPTIONAL
2534 OMPT_LOAD_RETURN_ADDRESS(gtid)
2543 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2545 #if OMPT_SUPPORT && OMPT_OPTIONAL
2546 OMPT_STORE_RETURN_ADDRESS(gtid);
2548 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
2549 #
if OMPT_SUPPORT && OMPT_OPTIONAL
2551 OMPT_LOAD_RETURN_ADDRESS(gtid)
2560 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2561 #if OMPT_SUPPORT && OMPT_OPTIONAL
2562 OMPT_STORE_RETURN_ADDRESS(gtid);
2564 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
2565 #
if OMPT_SUPPORT && OMPT_OPTIONAL
2567 OMPT_LOAD_RETURN_ADDRESS(gtid)
2576 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2578 #if OMPT_SUPPORT && OMPT_OPTIONAL
2579 OMPT_STORE_RETURN_ADDRESS(gtid);
2581 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
2582 #
if OMPT_SUPPORT && OMPT_OPTIONAL
2584 OMPT_LOAD_RETURN_ADDRESS(gtid)
2596 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2603 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2610 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2617 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2624 kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2625 return value == checker;
2628 kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2629 return value != checker;
2632 kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2633 return value < checker;
2636 kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2637 return value >= checker;
2640 kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2641 return value <= checker;
2645 __kmp_wait_4(
volatile kmp_uint32 *spinner, kmp_uint32 checker,
2646 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2650 volatile kmp_uint32 *spin = spinner;
2651 kmp_uint32 check = checker;
2653 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2656 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
2657 KMP_INIT_YIELD(spins);
2659 while (!f(r = TCR_4(*spin), check)) {
2660 KMP_FSYNC_SPIN_PREPARE(obj);
2665 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2667 KMP_FSYNC_SPIN_ACQUIRED(obj);
2671 void __kmp_wait_4_ptr(
void *spinner, kmp_uint32 checker,
2672 kmp_uint32 (*pred)(
void *, kmp_uint32),
2676 void *spin = spinner;
2677 kmp_uint32 check = checker;
2679 kmp_uint32 (*f)(
void *, kmp_uint32) = pred;
2681 KMP_FSYNC_SPIN_INIT(obj, spin);
2682 KMP_INIT_YIELD(spins);
2684 while (!f(spin, check)) {
2685 KMP_FSYNC_SPIN_PREPARE(obj);
2688 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2690 KMP_FSYNC_SPIN_ACQUIRED(obj);
2695 #ifdef KMP_GOMP_COMPAT
2697 void __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
2699 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2701 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2705 void __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2707 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2709 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2713 void __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2715 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2717 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2721 void __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2723 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2725 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2729 void __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid) {
2730 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2733 void __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid) {
2734 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2737 void __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid) {
2738 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2741 void __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid) {
2742 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)