21 #include "kmp_error.h" 24 #include "kmp_stats.h" 26 #if KMP_USE_X87CONTROL 30 #include "kmp_dispatch.h" 31 #if KMP_USE_HIER_SCHED 32 #include "kmp_dispatch_hier.h" 36 #include "ompt-specific.h" 42 void __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
45 KMP_DEBUG_ASSERT(gtid_ref);
47 if (__kmp_env_consistency_check) {
48 th = __kmp_threads[*gtid_ref];
49 if (th->th.th_root->r.r_active &&
50 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
51 #if KMP_USE_DYNAMIC_LOCK 52 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0);
54 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL);
60 void __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
63 if (__kmp_env_consistency_check) {
64 th = __kmp_threads[*gtid_ref];
65 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
66 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
73 bool use_hier =
false) {
76 int monotonicity = SCHEDULE_MONOTONIC;
80 if (loc->get_openmp_version() < 50)
81 monotonicity = SCHEDULE_MONOTONIC;
83 if (use_hier || __kmp_force_monotonic)
84 monotonicity = SCHEDULE_MONOTONIC;
85 else if (SCHEDULE_HAS_NONMONOTONIC(schedule))
86 monotonicity = SCHEDULE_NONMONOTONIC;
87 else if (SCHEDULE_HAS_MONOTONIC(schedule))
88 monotonicity = SCHEDULE_MONOTONIC;
103 template <
typename T>
104 void __kmp_dispatch_init_algorithm(
ident_t *loc,
int gtid,
105 dispatch_private_info_template<T> *pr,
107 typename traits_t<T>::signed_t st,
109 kmp_uint64 *cur_chunk,
111 typename traits_t<T>::signed_t chunk,
113 typedef typename traits_t<T>::unsigned_t UT;
114 typedef typename traits_t<T>::floating_t DBL;
124 typedef typename traits_t<T>::signed_t ST;
128 buff = __kmp_str_format(
"__kmp_dispatch_init_algorithm: T#%%d called " 129 "pr:%%p lb:%%%s ub:%%%s st:%%%s " 130 "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
131 traits_t<T>::spec, traits_t<T>::spec,
132 traits_t<ST>::spec, traits_t<ST>::spec,
133 traits_t<T>::spec, traits_t<T>::spec);
134 KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid));
135 __kmp_str_free(&buff);
139 th = __kmp_threads[gtid];
140 team = th->th.th_team;
141 active = !team->t.t_serialized;
144 int itt_need_metadata_reporting =
145 __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
146 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
147 team->t.t_active_level == 1;
150 #if KMP_USE_HIER_SCHED 151 use_hier = pr->flags.use_hier;
157 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
158 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
162 pr->flags.nomerge = TRUE;
166 pr->flags.nomerge = FALSE;
168 pr->type_size = traits_t<T>::type_size;
170 pr->flags.ordered = TRUE;
174 pr->flags.ordered = FALSE;
177 if (pr->flags.ordered) {
178 monotonicity = SCHEDULE_MONOTONIC;
182 schedule = __kmp_static;
184 if (schedule == kmp_sch_runtime) {
187 schedule = team->t.t_sched.r_sched_type;
188 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
189 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
193 schedule = __kmp_guided;
195 schedule = __kmp_static;
199 chunk = team->t.t_sched.chunk;
208 buff = __kmp_str_format(
"__kmp_dispatch_init_algorithm: T#%%d new: " 209 "schedule:%%d chunk:%%%s\n",
211 KD_TRACE(10, (buff, gtid, schedule, chunk));
212 __kmp_str_free(&buff);
217 schedule = __kmp_guided;
220 chunk = KMP_DEFAULT_CHUNK;
226 schedule = __kmp_auto;
231 buff = __kmp_str_format(
232 "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: " 233 "schedule:%%d chunk:%%%s\n",
235 KD_TRACE(10, (buff, gtid, schedule, chunk));
236 __kmp_str_free(&buff);
240 #if KMP_STATIC_STEAL_ENABLED 242 if (schedule == kmp_sch_dynamic_chunked) {
243 if (monotonicity == SCHEDULE_NONMONOTONIC)
244 schedule = kmp_sch_static_steal;
248 if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
249 schedule = kmp_sch_guided_iterative_chunked;
250 KMP_WARNING(DispatchManyThreads);
254 schedule = team->t.t_sched.r_sched_type;
255 monotonicity = __kmp_get_monotonicity(loc, schedule, use_hier);
256 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
260 schedule == __kmp_static) {
261 schedule = kmp_sch_static_balanced_chunked;
266 chunk = team->t.t_sched.chunk * chunk;
276 buff = __kmp_str_format(
277 "__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d" 280 KD_TRACE(10, (buff, gtid, schedule, chunk));
281 __kmp_str_free(&buff);
285 pr->u.p.parm1 = chunk;
288 "unknown scheduling type");
292 if (__kmp_env_consistency_check) {
294 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
295 (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc);
309 tc = (UT)(lb - ub) / (-st) + 1;
317 tc = (UT)(ub - lb) / st + 1;
323 #if KMP_STATS_ENABLED 324 if (KMP_MASTER_GTID(gtid)) {
335 pr->u.p.last_upper = ub + st;
341 if (pr->flags.ordered) {
342 pr->ordered_bumped = 0;
343 pr->u.p.ordered_lower = 1;
344 pr->u.p.ordered_upper = 0;
349 #if (KMP_STATIC_STEAL_ENABLED) 350 case kmp_sch_static_steal: {
354 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",
357 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
358 if (nproc > 1 && ntc >= nproc) {
361 T small_chunk, extras;
363 small_chunk = ntc / nproc;
364 extras = ntc % nproc;
366 init =
id * small_chunk + (
id < extras ?
id : extras);
367 pr->u.p.count = init;
368 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
374 pr->u.p.parm3 = KMP_MIN(small_chunk + extras, nproc);
375 pr->u.p.parm4 = (
id + 1) % nproc;
377 if (traits_t<T>::type_size > 4) {
383 KMP_DEBUG_ASSERT(pr->u.p.th_steal_lock == NULL);
384 pr->u.p.th_steal_lock =
385 (kmp_lock_t *)__kmp_allocate(
sizeof(kmp_lock_t));
386 __kmp_init_lock(pr->u.p.th_steal_lock);
391 schedule = kmp_sch_dynamic_chunked;
392 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d switching to " 393 "kmp_sch_dynamic_chunked\n",
400 case kmp_sch_static_balanced: {
405 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",
415 pr->u.p.parm1 = (
id == tc - 1);
418 pr->u.p.parm1 = FALSE;
422 T small_chunk = tc / nproc;
423 T extras = tc % nproc;
424 init =
id * small_chunk + (
id < extras ?
id : extras);
425 limit = init + small_chunk - (
id < extras ? 0 : 1);
426 pr->u.p.parm1 = (
id == nproc - 1);
432 pr->u.p.parm1 = TRUE;
436 pr->u.p.parm1 = FALSE;
442 if (itt_need_metadata_reporting)
444 *cur_chunk = limit - init + 1;
447 pr->u.p.lb = lb + init;
448 pr->u.p.ub = lb + limit;
451 T ub_tmp = lb + limit * st;
452 pr->u.p.lb = lb + init * st;
456 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
458 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
461 if (pr->flags.ordered) {
462 pr->u.p.ordered_lower = init;
463 pr->u.p.ordered_upper = limit;
467 case kmp_sch_static_balanced_chunked: {
470 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)" 471 " -> falling-through to static_greedy\n",
473 schedule = kmp_sch_static_greedy;
475 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
481 case kmp_sch_guided_iterative_chunked: {
484 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked" 489 if ((2L * chunk + 1) * nproc >= tc) {
491 schedule = kmp_sch_dynamic_chunked;
495 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
496 *(
double *)&pr->u.p.parm3 =
497 guided_flt_param / (
double)nproc;
500 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d falling-through to " 501 "kmp_sch_static_greedy\n",
503 schedule = kmp_sch_static_greedy;
507 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
513 case kmp_sch_guided_analytical_chunked: {
514 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d " 515 "kmp_sch_guided_analytical_chunked case\n",
519 if ((2L * chunk + 1) * nproc >= tc) {
521 schedule = kmp_sch_dynamic_chunked;
527 #if KMP_USE_X87CONTROL 537 unsigned int oldFpcw = _control87(0, 0);
538 _control87(_PC_64, _MCW_PC);
541 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
548 x = 1.0 - 0.5 / (double)nproc;
559 ptrdiff_t natural_alignment =
560 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
564 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0);
569 *(DBL *)&pr->u.p.parm3 = x;
582 p = __kmp_pow<UT>(x, right);
587 }
while (p > target && right < (1 << 27));
595 while (left + 1 < right) {
596 mid = (left + right) / 2;
597 if (__kmp_pow<UT>(x, mid) > target) {
606 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&
607 __kmp_pow<UT>(x, cross) <= target);
610 pr->u.p.parm2 = cross;
613 #if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8)) 614 #define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3) 616 #define GUIDED_ANALYTICAL_WORKAROUND (x) 620 __kmp_dispatch_guided_remaining(
621 tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
623 #if KMP_USE_X87CONTROL 625 _control87(oldFpcw, _MCW_PC);
629 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d falling-through to " 630 "kmp_sch_static_greedy\n",
632 schedule = kmp_sch_static_greedy;
638 case kmp_sch_static_greedy:
641 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
643 pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
645 case kmp_sch_static_chunked:
646 case kmp_sch_dynamic_chunked:
648 if (pr->u.p.parm1 <= 0)
649 pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
650 else if (pr->u.p.parm1 > tc)
654 pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0);
655 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d " 656 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
659 case kmp_sch_trapezoidal: {
662 T parm1, parm2, parm3, parm4;
664 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",
670 parm2 = (tc / (2 * nproc));
680 }
else if (parm1 > parm2) {
685 parm3 = (parm2 + parm1);
686 parm3 = (2 * tc + parm3 - 1) / parm3;
694 parm4 = (parm2 - parm1) / parm4;
701 pr->u.p.parm1 = parm1;
702 pr->u.p.parm2 = parm2;
703 pr->u.p.parm3 = parm3;
704 pr->u.p.parm4 = parm4;
709 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
710 KMP_HNT(GetNewerLibrary),
715 pr->schedule = schedule;
718 #if KMP_USE_HIER_SCHED 719 template <
typename T>
720 inline void __kmp_dispatch_init_hier_runtime(
ident_t *loc, T lb, T ub,
721 typename traits_t<T>::signed_t st);
724 __kmp_dispatch_init_hier_runtime<kmp_int32>(
ident_t *loc, kmp_int32 lb,
725 kmp_int32 ub, kmp_int32 st) {
726 __kmp_dispatch_init_hierarchy<kmp_int32>(
727 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
728 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
732 __kmp_dispatch_init_hier_runtime<kmp_uint32>(
ident_t *loc, kmp_uint32 lb,
733 kmp_uint32 ub, kmp_int32 st) {
734 __kmp_dispatch_init_hierarchy<kmp_uint32>(
735 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
736 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
740 __kmp_dispatch_init_hier_runtime<kmp_int64>(
ident_t *loc, kmp_int64 lb,
741 kmp_int64 ub, kmp_int64 st) {
742 __kmp_dispatch_init_hierarchy<kmp_int64>(
743 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
744 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
748 __kmp_dispatch_init_hier_runtime<kmp_uint64>(
ident_t *loc, kmp_uint64 lb,
749 kmp_uint64 ub, kmp_int64 st) {
750 __kmp_dispatch_init_hierarchy<kmp_uint64>(
751 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
752 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
756 void __kmp_dispatch_free_hierarchies(kmp_team_t *team) {
757 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
758 for (
int i = 0; i < num_disp_buff; ++i) {
761 reinterpret_cast<dispatch_shared_info_template<kmp_int32>
volatile *
>(
762 &team->t.t_disp_buffer[i]);
764 sh->hier->deallocate();
765 __kmp_free(sh->hier);
773 template <
typename T>
776 T ub,
typename traits_t<T>::signed_t st,
777 typename traits_t<T>::signed_t chunk,
int push_ws) {
778 typedef typename traits_t<T>::unsigned_t UT;
783 kmp_uint32 my_buffer_index;
784 dispatch_private_info_template<T> *pr;
785 dispatch_shared_info_template<T>
volatile *sh;
787 KMP_BUILD_ASSERT(
sizeof(dispatch_private_info_template<T>) ==
788 sizeof(dispatch_private_info));
789 KMP_BUILD_ASSERT(
sizeof(dispatch_shared_info_template<UT>) ==
790 sizeof(dispatch_shared_info));
791 __kmp_assert_valid_gtid(gtid);
793 if (!TCR_4(__kmp_init_parallel))
794 __kmp_parallel_initialize();
796 __kmp_resume_if_soft_paused();
798 #if INCLUDE_SSC_MARKS 799 SSC_MARK_DISPATCH_INIT();
802 typedef typename traits_t<T>::signed_t ST;
806 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d called: schedule:%%d " 807 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
808 traits_t<ST>::spec, traits_t<T>::spec,
809 traits_t<T>::spec, traits_t<ST>::spec);
810 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
811 __kmp_str_free(&buff);
815 th = __kmp_threads[gtid];
816 team = th->th.th_team;
817 active = !team->t.t_serialized;
818 th->th.th_ident = loc;
823 if (schedule == __kmp_static) {
829 #if KMP_USE_HIER_SCHED 835 my_buffer_index = th->th.th_dispatch->th_disp_index;
836 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
838 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
839 my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched);
844 if (pr->flags.use_hier) {
846 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d ordered loop detected. " 847 "Disabling hierarchical scheduling.\n",
849 pr->flags.use_hier = FALSE;
852 if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) {
855 if (!ordered && !pr->flags.use_hier)
856 __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st);
858 #endif // KMP_USE_HIER_SCHED 861 kmp_uint64 cur_chunk = chunk;
862 int itt_need_metadata_reporting =
863 __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
864 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
865 team->t.t_active_level == 1;
868 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
869 th->th.th_dispatch->th_disp_buffer);
871 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
872 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
874 my_buffer_index = th->th.th_dispatch->th_disp_index++;
877 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
879 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
880 sh =
reinterpret_cast<dispatch_shared_info_template<T>
volatile *
>(
881 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
882 KD_TRACE(10, (
"__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,
886 __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st,
890 chunk, (T)th->th.th_team_nproc,
891 (T)th->th.th_info.ds.ds_tid);
893 if (pr->flags.ordered == 0) {
894 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
895 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
897 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
898 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
906 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " 907 "sh->buffer_index:%d\n",
908 gtid, my_buffer_index, sh->buffer_index));
909 __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
910 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
914 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " 915 "sh->buffer_index:%d\n",
916 gtid, my_buffer_index, sh->buffer_index));
918 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
919 th->th.th_dispatch->th_dispatch_sh_current =
920 CCAST(dispatch_shared_info_t *, (
volatile dispatch_shared_info_t *)sh);
922 if (pr->flags.ordered) {
923 __kmp_itt_ordered_init(gtid);
926 if (itt_need_metadata_reporting) {
928 kmp_uint64 schedtype = 0;
930 case kmp_sch_static_chunked:
931 case kmp_sch_static_balanced:
933 case kmp_sch_static_greedy:
934 cur_chunk = pr->u.p.parm1;
936 case kmp_sch_dynamic_chunked:
939 case kmp_sch_guided_iterative_chunked:
940 case kmp_sch_guided_analytical_chunked:
950 __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk);
952 #if KMP_USE_HIER_SCHED 953 if (pr->flags.use_hier) {
955 pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0;
957 #endif // KMP_USER_HIER_SCHED 965 buff = __kmp_str_format(
966 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " 968 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" 969 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
970 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
971 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
972 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
973 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
974 KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,
975 pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,
976 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
977 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4));
978 __kmp_str_free(&buff);
981 #if (KMP_STATIC_STEAL_ENABLED) 987 if (pr->schedule == kmp_sch_static_steal) {
991 volatile T *p = &pr->u.p.static_steal_counter;
994 #endif // ( KMP_STATIC_STEAL_ENABLED ) 996 #if OMPT_SUPPORT && OMPT_OPTIONAL 997 if (ompt_enabled.ompt_callback_work) {
998 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
999 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1000 ompt_callbacks.ompt_callback(ompt_callback_work)(
1001 ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
1002 &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
1005 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic);
1013 template <
typename UT>
1014 static void __kmp_dispatch_finish(
int gtid,
ident_t *loc) {
1015 typedef typename traits_t<UT>::signed_t ST;
1016 __kmp_assert_valid_gtid(gtid);
1017 kmp_info_t *th = __kmp_threads[gtid];
1019 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid));
1020 if (!th->th.th_team->t.t_serialized) {
1022 dispatch_private_info_template<UT> *pr =
1023 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1024 th->th.th_dispatch->th_dispatch_pr_current);
1025 dispatch_shared_info_template<UT>
volatile *sh =
1026 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1027 th->th.th_dispatch->th_dispatch_sh_current);
1028 KMP_DEBUG_ASSERT(pr);
1029 KMP_DEBUG_ASSERT(sh);
1030 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1031 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1033 if (pr->ordered_bumped) {
1036 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1038 pr->ordered_bumped = 0;
1040 UT lower = pr->u.p.ordered_lower;
1046 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d before wait: " 1047 "ordered_iteration:%%%s lower:%%%s\n",
1048 traits_t<UT>::spec, traits_t<UT>::spec);
1049 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1050 __kmp_str_free(&buff);
1054 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
1055 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1061 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d after wait: " 1062 "ordered_iteration:%%%s lower:%%%s\n",
1063 traits_t<UT>::spec, traits_t<UT>::spec);
1064 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1065 __kmp_str_free(&buff);
1069 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
1072 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid));
1075 #ifdef KMP_GOMP_COMPAT 1077 template <
typename UT>
1078 static void __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc) {
1079 typedef typename traits_t<UT>::signed_t ST;
1080 __kmp_assert_valid_gtid(gtid);
1081 kmp_info_t *th = __kmp_threads[gtid];
1083 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
1084 if (!th->th.th_team->t.t_serialized) {
1086 dispatch_private_info_template<UT> *pr =
1087 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1088 th->th.th_dispatch->th_dispatch_pr_current);
1089 dispatch_shared_info_template<UT>
volatile *sh =
1090 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1091 th->th.th_dispatch->th_dispatch_sh_current);
1092 KMP_DEBUG_ASSERT(pr);
1093 KMP_DEBUG_ASSERT(sh);
1094 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1095 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1098 UT lower = pr->u.p.ordered_lower;
1099 UT upper = pr->u.p.ordered_upper;
1100 UT inc = upper - lower + 1;
1102 if (pr->ordered_bumped == inc) {
1105 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1107 pr->ordered_bumped = 0;
1109 inc -= pr->ordered_bumped;
1115 buff = __kmp_str_format(
1116 "__kmp_dispatch_finish_chunk: T#%%d before wait: " 1117 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1118 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1119 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper));
1120 __kmp_str_free(&buff);
1124 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
1125 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1128 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting " 1129 "ordered_bumped to zero\n",
1131 pr->ordered_bumped = 0;
1137 buff = __kmp_str_format(
1138 "__kmp_dispatch_finish_chunk: T#%%d after wait: " 1139 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1140 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1141 traits_t<UT>::spec);
1143 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper));
1144 __kmp_str_free(&buff);
1148 test_then_add<ST>((
volatile ST *)&sh->u.s.ordered_iteration, inc);
1152 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
1157 template <
typename T>
1158 int __kmp_dispatch_next_algorithm(
int gtid,
1159 dispatch_private_info_template<T> *pr,
1160 dispatch_shared_info_template<T>
volatile *sh,
1161 kmp_int32 *p_last, T *p_lb, T *p_ub,
1162 typename traits_t<T>::signed_t *p_st, T nproc,
1164 typedef typename traits_t<T>::unsigned_t UT;
1165 typedef typename traits_t<T>::signed_t ST;
1166 typedef typename traits_t<T>::floating_t DBL;
1171 UT limit, trip, init;
1172 kmp_info_t *th = __kmp_threads[gtid];
1173 kmp_team_t *team = th->th.th_team;
1175 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1176 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1177 KMP_DEBUG_ASSERT(pr);
1178 KMP_DEBUG_ASSERT(sh);
1179 KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc);
1185 __kmp_str_format(
"__kmp_dispatch_next_algorithm: T#%%d called pr:%%p " 1186 "sh:%%p nproc:%%%s tid:%%%s\n",
1187 traits_t<T>::spec, traits_t<T>::spec);
1188 KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid));
1189 __kmp_str_free(&buff);
1194 if (pr->u.p.tc == 0) {
1196 (
"__kmp_dispatch_next_algorithm: T#%d early exit trip count is " 1202 switch (pr->schedule) {
1203 #if (KMP_STATIC_STEAL_ENABLED) 1204 case kmp_sch_static_steal: {
1205 T chunk = pr->u.p.parm1;
1208 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",
1211 trip = pr->u.p.tc - 1;
1213 if (traits_t<T>::type_size > 4) {
1216 kmp_lock_t *lck = pr->u.p.th_steal_lock;
1217 KMP_DEBUG_ASSERT(lck != NULL);
1218 if (pr->u.p.count < (UT)pr->u.p.ub) {
1219 __kmp_acquire_lock(lck, gtid);
1221 init = (pr->u.p.count)++;
1222 status = (init < (UT)pr->u.p.ub);
1223 __kmp_release_lock(lck, gtid);
1228 kmp_info_t **other_threads = team->t.t_threads;
1229 T while_limit = pr->u.p.parm3;
1231 T
id = pr->u.p.static_steal_counter;
1232 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1233 __kmp_dispatch_num_buffers;
1237 while ((!status) && (while_limit != ++while_index)) {
1238 dispatch_private_info_template<T> *victim;
1240 T victimIdx = pr->u.p.parm4;
1241 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1242 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1243 &other_threads[victimIdx]->th.th_dispatch->th_disp_buffer[idx]);
1244 KMP_DEBUG_ASSERT(victim);
1245 while ((victim == pr ||
id != victim->u.p.static_steal_counter) &&
1246 oldVictimIdx != victimIdx) {
1247 victimIdx = (victimIdx + 1) % nproc;
1248 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1249 &other_threads[victimIdx]->th.th_dispatch->th_disp_buffer[idx]);
1250 KMP_DEBUG_ASSERT(victim);
1252 if (victim == pr ||
id != victim->u.p.static_steal_counter) {
1257 if (victim->u.p.count + 2 > (UT)victim->u.p.ub) {
1258 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1262 lck = victim->u.p.th_steal_lock;
1263 KMP_ASSERT(lck != NULL);
1264 __kmp_acquire_lock(lck, gtid);
1265 limit = victim->u.p.ub;
1266 if (victim->u.p.count >= limit ||
1267 (remaining = limit - victim->u.p.count) < 2) {
1268 __kmp_release_lock(lck, gtid);
1269 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1274 if (remaining > 3) {
1276 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, remaining >> 2);
1277 init = (victim->u.p.ub -= (remaining >> 2));
1280 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen, 1);
1281 init = (victim->u.p.ub -= 1);
1283 __kmp_release_lock(lck, gtid);
1285 KMP_DEBUG_ASSERT(init + 1 <= limit);
1286 pr->u.p.parm4 = victimIdx;
1290 __kmp_acquire_lock(pr->u.p.th_steal_lock, gtid);
1291 pr->u.p.count = init + 1;
1293 __kmp_release_lock(pr->u.p.th_steal_lock, gtid);
1308 union_i4 vold, vnew;
1309 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1312 while (!KMP_COMPARE_AND_STORE_ACQ64(
1313 (
volatile kmp_int64 *)&pr->u.p.count,
1314 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1315 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1317 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1322 init = vnew.p.count;
1323 status = (init < (UT)vnew.p.ub);
1327 kmp_info_t **other_threads = team->t.t_threads;
1328 T while_limit = pr->u.p.parm3;
1330 T
id = pr->u.p.static_steal_counter;
1331 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1332 __kmp_dispatch_num_buffers;
1336 while ((!status) && (while_limit != ++while_index)) {
1337 dispatch_private_info_template<T> *victim;
1338 union_i4 vold, vnew;
1340 T victimIdx = pr->u.p.parm4;
1341 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1342 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1343 &other_threads[victimIdx]->th.th_dispatch->th_disp_buffer[idx]);
1344 KMP_DEBUG_ASSERT(victim);
1345 while ((victim == pr ||
id != victim->u.p.static_steal_counter) &&
1346 oldVictimIdx != victimIdx) {
1347 victimIdx = (victimIdx + 1) % nproc;
1348 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1349 &other_threads[victimIdx]->th.th_dispatch->th_disp_buffer[idx]);
1350 KMP_DEBUG_ASSERT(victim);
1352 if (victim == pr ||
id != victim->u.p.static_steal_counter) {
1357 pr->u.p.parm4 = victimIdx;
1359 vold.b = *(
volatile kmp_int64 *)(&victim->u.p.count);
1362 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1363 if (vnew.p.count >= (UT)vnew.p.ub ||
1364 (remaining = vnew.p.ub - vnew.p.count) < 2) {
1365 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1368 if (remaining > 3) {
1370 vnew.p.ub -= remaining >> 2;
1374 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1376 if (KMP_COMPARE_AND_STORE_ACQ64(
1377 (
volatile kmp_int64 *)&victim->u.p.count,
1378 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1379 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1381 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,
1382 vold.p.ub - vnew.p.ub);
1387 vold.p.count = init + 1;
1389 KMP_XCHG_FIXED64((
volatile kmp_int64 *)(&pr->u.p.count), vold.b);
1391 *(
volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1406 start = pr->u.p.parm2;
1408 limit = chunk + init - 1;
1410 KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_chunks, 1);
1412 KMP_DEBUG_ASSERT(init <= trip);
1413 if ((last = (limit >= trip)) != 0)
1419 *p_lb = start + init;
1420 *p_ub = start + limit;
1422 *p_lb = start + init * incr;
1423 *p_ub = start + limit * incr;
1426 if (pr->flags.ordered) {
1427 pr->u.p.ordered_lower = init;
1428 pr->u.p.ordered_upper = limit;
1433 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1434 case kmp_sch_static_balanced: {
1437 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",
1440 if ((status = !pr->u.p.count) != 0) {
1444 last = (pr->u.p.parm1 != 0);
1448 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1452 case kmp_sch_static_greedy:
1454 case kmp_sch_static_chunked: {
1457 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d " 1458 "kmp_sch_static_[affinity|chunked] case\n",
1460 parm1 = pr->u.p.parm1;
1462 trip = pr->u.p.tc - 1;
1463 init = parm1 * (pr->u.p.count + tid);
1465 if ((status = (init <= trip)) != 0) {
1468 limit = parm1 + init - 1;
1470 if ((last = (limit >= trip)) != 0)
1476 pr->u.p.count += nproc;
1479 *p_lb = start + init;
1480 *p_ub = start + limit;
1482 *p_lb = start + init * incr;
1483 *p_ub = start + limit * incr;
1486 if (pr->flags.ordered) {
1487 pr->u.p.ordered_lower = init;
1488 pr->u.p.ordered_upper = limit;
1494 case kmp_sch_dynamic_chunked: {
1496 UT chunk_size = pr->u.p.parm1;
1497 UT nchunks = pr->u.p.parm2;
1501 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
1504 chunk_number = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1505 status = (chunk_number < nchunks);
1512 init = chunk_size * chunk_number;
1513 trip = pr->u.p.tc - 1;
1517 if ((last = (trip - init < (UT)chunk_size)))
1520 limit = chunk_size + init - 1;
1526 *p_lb = start + init;
1527 *p_ub = start + limit;
1529 *p_lb = start + init * incr;
1530 *p_ub = start + limit * incr;
1533 if (pr->flags.ordered) {
1534 pr->u.p.ordered_lower = init;
1535 pr->u.p.ordered_upper = limit;
1541 case kmp_sch_guided_iterative_chunked: {
1542 T chunkspec = pr->u.p.parm1;
1543 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked " 1550 init = sh->u.s.iteration;
1551 remaining = trip - init;
1552 if (remaining <= 0) {
1561 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1563 remaining = trip - init;
1564 if (remaining <= 0) {
1569 if ((T)remaining > chunkspec) {
1570 limit = init + chunkspec - 1;
1573 limit = init + remaining - 1;
1578 limit = init + (UT)((
double)remaining *
1579 *(
double *)&pr->u.p.parm3);
1580 if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
1581 (ST)init, (ST)limit)) {
1593 *p_lb = start + init * incr;
1594 *p_ub = start + limit * incr;
1595 if (pr->flags.ordered) {
1596 pr->u.p.ordered_lower = init;
1597 pr->u.p.ordered_upper = limit;
1611 T chunk = pr->u.p.parm1;
1613 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",
1619 init = sh->u.s.iteration;
1620 remaining = trip - init;
1621 if (remaining <= 0) {
1625 KMP_DEBUG_ASSERT(init % chunk == 0);
1627 if ((T)remaining < pr->u.p.parm2) {
1630 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1632 remaining = trip - init;
1633 if (remaining <= 0) {
1638 if ((T)remaining > chunk) {
1639 limit = init + chunk - 1;
1642 limit = init + remaining - 1;
1649 __kmp_type_convert((
double)remaining * (*(
double *)&pr->u.p.parm3),
1651 UT rem = span % chunk;
1653 span += chunk - rem;
1654 limit = init + span;
1655 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1656 (ST)init, (ST)limit)) {
1668 *p_lb = start + init * incr;
1669 *p_ub = start + limit * incr;
1670 if (pr->flags.ordered) {
1671 pr->u.p.ordered_lower = init;
1672 pr->u.p.ordered_upper = limit;
1683 case kmp_sch_guided_analytical_chunked: {
1684 T chunkspec = pr->u.p.parm1;
1686 #if KMP_USE_X87CONTROL 1689 unsigned int oldFpcw;
1690 unsigned int fpcwSet = 0;
1692 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d " 1693 "kmp_sch_guided_analytical_chunked case\n",
1698 KMP_DEBUG_ASSERT(nproc > 1);
1699 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip);
1703 chunkIdx = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1704 if (chunkIdx >= (UT)pr->u.p.parm2) {
1707 init = chunkIdx * chunkspec + pr->u.p.count;
1710 if ((status = (init > 0 && init <= trip)) != 0) {
1711 limit = init + chunkspec - 1;
1713 if ((last = (limit >= trip)) != 0)
1723 #if KMP_USE_X87CONTROL 1728 oldFpcw = _control87(0, 0);
1729 _control87(_PC_64, _MCW_PC);
1734 init = __kmp_dispatch_guided_remaining<T>(
1735 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
1736 KMP_DEBUG_ASSERT(init);
1740 limit = trip - __kmp_dispatch_guided_remaining<T>(
1741 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
1742 KMP_ASSERT(init <= limit);
1744 KMP_DEBUG_ASSERT(limit <= trip);
1751 #if KMP_USE_X87CONTROL 1755 if (fpcwSet && (oldFpcw & fpcwSet))
1756 _control87(oldFpcw, _MCW_PC);
1763 *p_lb = start + init * incr;
1764 *p_ub = start + limit * incr;
1765 if (pr->flags.ordered) {
1766 pr->u.p.ordered_lower = init;
1767 pr->u.p.ordered_upper = limit;
1778 case kmp_sch_trapezoidal: {
1780 T parm2 = pr->u.p.parm2;
1781 T parm3 = pr->u.p.parm3;
1782 T parm4 = pr->u.p.parm4;
1784 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",
1787 index = test_then_inc<ST>((
volatile ST *)&sh->u.s.iteration);
1789 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
1790 trip = pr->u.p.tc - 1;
1792 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
1799 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
1802 if ((last = (limit >= trip)) != 0)
1809 *p_lb = start + init;
1810 *p_ub = start + limit;
1812 *p_lb = start + init * incr;
1813 *p_ub = start + limit * incr;
1816 if (pr->flags.ordered) {
1817 pr->u.p.ordered_lower = init;
1818 pr->u.p.ordered_upper = limit;
1825 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
1826 KMP_HNT(GetNewerLibrary),
1834 if (pr->flags.ordered) {
1837 buff = __kmp_str_format(
"__kmp_dispatch_next_algorithm: T#%%d " 1838 "ordered_lower:%%%s ordered_upper:%%%s\n",
1839 traits_t<UT>::spec, traits_t<UT>::spec);
1840 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper));
1841 __kmp_str_free(&buff);
1846 buff = __kmp_str_format(
1847 "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d " 1848 "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
1849 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1850 KMP_DEBUG_ASSERT(p_last);
1851 KMP_DEBUG_ASSERT(p_st);
1852 KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st));
1853 __kmp_str_free(&buff);
1862 #if OMPT_SUPPORT && OMPT_OPTIONAL 1863 #define OMPT_LOOP_END \ 1864 if (status == 0) { \ 1865 if (ompt_enabled.ompt_callback_work) { \ 1866 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ 1867 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ 1868 ompt_callbacks.ompt_callback(ompt_callback_work)( \ 1869 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \ 1870 &(task_info->task_data), 0, codeptr); \ 1875 #define OMPT_LOOP_END // no-op 1878 #if KMP_STATS_ENABLED 1879 #define KMP_STATS_LOOP_END \ 1881 kmp_int64 u, l, t, i; \ 1882 l = (kmp_int64)(*p_lb); \ 1883 u = (kmp_int64)(*p_ub); \ 1884 i = (kmp_int64)(pr->u.p.st); \ 1885 if (status == 0) { \ 1887 KMP_POP_PARTITIONED_TIMER(); \ 1888 } else if (i == 1) { \ 1893 } else if (i < 0) { \ 1895 t = (l - u) / (-i) + 1; \ 1900 t = (u - l) / i + 1; \ 1904 KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t); \ 1907 #define KMP_STATS_LOOP_END 1910 template <
typename T>
1911 static int __kmp_dispatch_next(
ident_t *loc,
int gtid, kmp_int32 *p_last,
1913 typename traits_t<T>::signed_t *p_st
1914 #
if OMPT_SUPPORT && OMPT_OPTIONAL
1920 typedef typename traits_t<T>::unsigned_t UT;
1921 typedef typename traits_t<T>::signed_t ST;
1926 KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling);
1929 dispatch_private_info_template<T> *pr;
1930 __kmp_assert_valid_gtid(gtid);
1931 kmp_info_t *th = __kmp_threads[gtid];
1932 kmp_team_t *team = th->th.th_team;
1934 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st);
1937 (
"__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",
1938 gtid, p_lb, p_ub, p_st, p_last));
1940 if (team->t.t_serialized) {
1942 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1943 th->th.th_dispatch->th_disp_buffer);
1944 KMP_DEBUG_ASSERT(pr);
1946 if ((status = (pr->u.p.tc != 0)) == 0) {
1953 if (__kmp_env_consistency_check) {
1954 if (pr->pushed_ws != ct_none) {
1955 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1958 }
else if (pr->flags.nomerge) {
1961 UT limit, trip, init;
1963 T chunk = pr->u.p.parm1;
1965 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1968 init = chunk * pr->u.p.count++;
1969 trip = pr->u.p.tc - 1;
1971 if ((status = (init <= trip)) == 0) {
1978 if (__kmp_env_consistency_check) {
1979 if (pr->pushed_ws != ct_none) {
1980 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1985 limit = chunk + init - 1;
1988 if ((last = (limit >= trip)) != 0) {
1991 pr->u.p.last_upper = pr->u.p.ub;
1999 *p_lb = start + init;
2000 *p_ub = start + limit;
2002 *p_lb = start + init * incr;
2003 *p_ub = start + limit * incr;
2006 if (pr->flags.ordered) {
2007 pr->u.p.ordered_lower = init;
2008 pr->u.p.ordered_upper = limit;
2013 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2014 "ordered_lower:%%%s ordered_upper:%%%s\n",
2015 traits_t<UT>::spec, traits_t<UT>::spec);
2016 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2017 pr->u.p.ordered_upper));
2018 __kmp_str_free(&buff);
2028 pr->u.p.last_upper = *p_ub;
2039 buff = __kmp_str_format(
2040 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " 2041 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
2042 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2043 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last,
2044 (p_last ? *p_last : 0), status));
2045 __kmp_str_free(&buff);
2048 #if INCLUDE_SSC_MARKS 2049 SSC_MARK_DISPATCH_NEXT();
2056 dispatch_shared_info_template<T>
volatile *sh;
2058 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
2059 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2061 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
2062 th->th.th_dispatch->th_dispatch_pr_current);
2063 KMP_DEBUG_ASSERT(pr);
2064 sh =
reinterpret_cast<dispatch_shared_info_template<T>
volatile *
>(
2065 th->th.th_dispatch->th_dispatch_sh_current);
2066 KMP_DEBUG_ASSERT(sh);
2068 #if KMP_USE_HIER_SCHED 2069 if (pr->flags.use_hier)
2070 status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st);
2072 #endif // KMP_USE_HIER_SCHED 2073 status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
2074 p_st, th->th.th_team_nproc,
2075 th->th.th_info.ds.ds_tid);
2080 num_done = test_then_inc<ST>((
volatile ST *)&sh->u.s.num_done);
2085 buff = __kmp_str_format(
2086 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2087 traits_t<UT>::spec);
2088 KD_TRACE(10, (buff, gtid, sh->u.s.num_done));
2089 __kmp_str_free(&buff);
2093 #if KMP_USE_HIER_SCHED 2094 pr->flags.use_hier = FALSE;
2096 if ((ST)num_done == th->th.th_team_nproc - 1) {
2097 #if (KMP_STATIC_STEAL_ENABLED) 2098 if (pr->schedule == kmp_sch_static_steal &&
2099 traits_t<T>::type_size > 4) {
2101 int idx = (th->th.th_dispatch->th_disp_index - 1) %
2102 __kmp_dispatch_num_buffers;
2103 kmp_info_t **other_threads = team->t.t_threads;
2105 for (i = 0; i < th->th.th_team_nproc; ++i) {
2106 dispatch_private_info_template<T> *buf =
2107 reinterpret_cast<dispatch_private_info_template<T> *
>(
2108 &other_threads[i]->th.th_dispatch->th_disp_buffer[idx]);
2109 kmp_lock_t *lck = buf->u.p.th_steal_lock;
2110 KMP_ASSERT(lck != NULL);
2111 __kmp_destroy_lock(lck);
2113 buf->u.p.th_steal_lock = NULL;
2121 sh->u.s.num_done = 0;
2122 sh->u.s.iteration = 0;
2125 if (pr->flags.ordered) {
2126 sh->u.s.ordered_iteration = 0;
2131 sh->buffer_index += __kmp_dispatch_num_buffers;
2132 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2133 gtid, sh->buffer_index));
2138 if (__kmp_env_consistency_check) {
2139 if (pr->pushed_ws != ct_none) {
2140 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2144 th->th.th_dispatch->th_deo_fcn = NULL;
2145 th->th.th_dispatch->th_dxo_fcn = NULL;
2146 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2147 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2151 pr->u.p.last_upper = pr->u.p.ub;
2154 if (p_last != NULL && status != 0)
2162 buff = __kmp_str_format(
2163 "__kmp_dispatch_next: T#%%d normal case: " 2164 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
2165 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2166 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,
2167 (p_last ? *p_last : 0), status));
2168 __kmp_str_free(&buff);
2171 #if INCLUDE_SSC_MARKS 2172 SSC_MARK_DISPATCH_NEXT();
2179 template <
typename T>
2180 static void __kmp_dist_get_bounds(
ident_t *loc, kmp_int32 gtid,
2181 kmp_int32 *plastiter, T *plower, T *pupper,
2182 typename traits_t<T>::signed_t incr) {
2183 typedef typename traits_t<T>::unsigned_t UT;
2190 KMP_DEBUG_ASSERT(plastiter && plower && pupper);
2191 KE_TRACE(10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2193 typedef typename traits_t<T>::signed_t ST;
2197 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d " 2198 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2199 traits_t<T>::spec, traits_t<T>::spec,
2200 traits_t<ST>::spec, traits_t<T>::spec);
2201 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
2202 __kmp_str_free(&buff);
2206 if (__kmp_env_consistency_check) {
2208 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2211 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2221 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2224 __kmp_assert_valid_gtid(gtid);
2225 th = __kmp_threads[gtid];
2226 team = th->th.th_team;
2227 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2228 nteams = th->th.th_teams_size.nteams;
2229 team_id = team->t.t_master_tid;
2230 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
2234 trip_count = *pupper - *plower + 1;
2235 }
else if (incr == -1) {
2236 trip_count = *plower - *pupper + 1;
2237 }
else if (incr > 0) {
2239 trip_count = (UT)(*pupper - *plower) / incr + 1;
2241 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2244 if (trip_count <= nteams) {
2246 __kmp_static == kmp_sch_static_greedy ||
2248 kmp_sch_static_balanced);
2250 if (team_id < trip_count) {
2251 *pupper = *plower = *plower + team_id * incr;
2253 *plower = *pupper + incr;
2255 if (plastiter != NULL)
2256 *plastiter = (team_id == trip_count - 1);
2258 if (__kmp_static == kmp_sch_static_balanced) {
2259 UT chunk = trip_count / nteams;
2260 UT extras = trip_count % nteams;
2262 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2263 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2264 if (plastiter != NULL)
2265 *plastiter = (team_id == nteams - 1);
2268 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2270 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
2272 *plower += team_id * chunk_inc_count;
2273 *pupper = *plower + chunk_inc_count - incr;
2276 if (*pupper < *plower)
2277 *pupper = traits_t<T>::max_value;
2278 if (plastiter != NULL)
2279 *plastiter = *plower <= upper && *pupper > upper - incr;
2280 if (*pupper > upper)
2283 if (*pupper > *plower)
2284 *pupper = traits_t<T>::min_value;
2285 if (plastiter != NULL)
2286 *plastiter = *plower >= upper && *pupper < upper - incr;
2287 if (*pupper < upper)
2319 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2320 KMP_DEBUG_ASSERT(__kmp_init_serial);
2321 #if OMPT_SUPPORT && OMPT_OPTIONAL 2322 OMPT_STORE_RETURN_ADDRESS(gtid);
2324 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2331 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2332 KMP_DEBUG_ASSERT(__kmp_init_serial);
2333 #if OMPT_SUPPORT && OMPT_OPTIONAL 2334 OMPT_STORE_RETURN_ADDRESS(gtid);
2336 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2344 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2345 KMP_DEBUG_ASSERT(__kmp_init_serial);
2346 #if OMPT_SUPPORT && OMPT_OPTIONAL 2347 OMPT_STORE_RETURN_ADDRESS(gtid);
2349 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2357 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2358 KMP_DEBUG_ASSERT(__kmp_init_serial);
2359 #if OMPT_SUPPORT && OMPT_OPTIONAL 2360 OMPT_STORE_RETURN_ADDRESS(gtid);
2362 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2376 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2378 KMP_DEBUG_ASSERT(__kmp_init_serial);
2379 #if OMPT_SUPPORT && OMPT_OPTIONAL 2380 OMPT_STORE_RETURN_ADDRESS(gtid);
2382 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2383 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2386 void __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2388 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2390 KMP_DEBUG_ASSERT(__kmp_init_serial);
2391 #if OMPT_SUPPORT && OMPT_OPTIONAL 2392 OMPT_STORE_RETURN_ADDRESS(gtid);
2394 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2395 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2398 void __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2400 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2402 KMP_DEBUG_ASSERT(__kmp_init_serial);
2403 #if OMPT_SUPPORT && OMPT_OPTIONAL 2404 OMPT_STORE_RETURN_ADDRESS(gtid);
2406 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2407 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2410 void __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2412 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2414 KMP_DEBUG_ASSERT(__kmp_init_serial);
2415 #if OMPT_SUPPORT && OMPT_OPTIONAL 2416 OMPT_STORE_RETURN_ADDRESS(gtid);
2418 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2419 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2436 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2437 #if OMPT_SUPPORT && OMPT_OPTIONAL 2438 OMPT_STORE_RETURN_ADDRESS(gtid);
2440 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
2441 #if OMPT_SUPPORT && OMPT_OPTIONAL 2443 OMPT_LOAD_RETURN_ADDRESS(gtid)
2452 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2454 #if OMPT_SUPPORT && OMPT_OPTIONAL 2455 OMPT_STORE_RETURN_ADDRESS(gtid);
2457 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
2458 #if OMPT_SUPPORT && OMPT_OPTIONAL 2460 OMPT_LOAD_RETURN_ADDRESS(gtid)
2469 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2470 #if OMPT_SUPPORT && OMPT_OPTIONAL 2471 OMPT_STORE_RETURN_ADDRESS(gtid);
2473 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
2474 #if OMPT_SUPPORT && OMPT_OPTIONAL 2476 OMPT_LOAD_RETURN_ADDRESS(gtid)
2485 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2487 #if OMPT_SUPPORT && OMPT_OPTIONAL 2488 OMPT_STORE_RETURN_ADDRESS(gtid);
2490 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
2491 #if OMPT_SUPPORT && OMPT_OPTIONAL 2493 OMPT_LOAD_RETURN_ADDRESS(gtid)
2505 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2512 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2519 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2526 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2533 kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2534 return value == checker;
2537 kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2538 return value != checker;
2541 kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2542 return value < checker;
2545 kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2546 return value >= checker;
2549 kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2550 return value <= checker;
2554 __kmp_wait_4(
volatile kmp_uint32 *spinner, kmp_uint32 checker,
2555 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2559 volatile kmp_uint32 *spin = spinner;
2560 kmp_uint32 check = checker;
2562 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2565 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
2566 KMP_INIT_YIELD(spins);
2568 while (!f(r = TCR_4(*spin), check)) {
2569 KMP_FSYNC_SPIN_PREPARE(obj);
2574 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2576 KMP_FSYNC_SPIN_ACQUIRED(obj);
2580 void __kmp_wait_4_ptr(
void *spinner, kmp_uint32 checker,
2581 kmp_uint32 (*pred)(
void *, kmp_uint32),
2585 void *spin = spinner;
2586 kmp_uint32 check = checker;
2588 kmp_uint32 (*f)(
void *, kmp_uint32) = pred;
2590 KMP_FSYNC_SPIN_INIT(obj, spin);
2591 KMP_INIT_YIELD(spins);
2593 while (!f(spin, check)) {
2594 KMP_FSYNC_SPIN_PREPARE(obj);
2597 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2599 KMP_FSYNC_SPIN_ACQUIRED(obj);
2604 #ifdef KMP_GOMP_COMPAT 2606 void __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
2608 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2610 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2614 void __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2616 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2618 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2622 void __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2624 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2626 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2630 void __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2632 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2634 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2638 void __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid) {
2639 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2642 void __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid) {
2643 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2646 void __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid) {
2647 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2650 void __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid) {
2651 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)