LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  __kmp_assign_root_init_mask();
43  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44  } else if (__kmp_ignore_mppbeg() == FALSE) {
45  // By default __kmp_ignore_mppbeg() returns TRUE.
46  __kmp_internal_begin();
47  KC_TRACE(10, ("__kmpc_begin: called\n"));
48  }
49 }
50 
59 void __kmpc_end(ident_t *loc) {
60  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63  // returns FALSE and __kmpc_end() will unregister this root (it can cause
64  // library shut down).
65  if (__kmp_ignore_mppend() == FALSE) {
66  KC_TRACE(10, ("__kmpc_end: called\n"));
67  KA_TRACE(30, ("__kmpc_end\n"));
68 
69  __kmp_internal_end_thread(-1);
70  }
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72  // Normal exit process on Windows does not allow worker threads of the final
73  // parallel region to finish reporting their events, so shutting down the
74  // library here fixes the issue at least for the cases where __kmpc_end() is
75  // placed properly.
76  if (ompt_enabled.enabled)
77  __kmp_internal_end_library(__kmp_gtid_get_specific());
78 #endif
79 }
80 
100  kmp_int32 gtid = __kmp_entry_gtid();
101 
102  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103 
104  return gtid;
105 }
106 
122  KC_TRACE(10,
123  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 
125  return TCR_4(__kmp_all_nth);
126 }
127 
135  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136  return __kmp_tid_from_gtid(__kmp_entry_gtid());
137 }
138 
145  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146 
147  return __kmp_entry_thread()->th.th_team->t.t_nproc;
148 }
149 
156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157 #ifndef KMP_DEBUG
158 
159  return TRUE;
160 
161 #else
162 
163  const char *semi2;
164  const char *semi3;
165  int line_no;
166 
167  if (__kmp_par_range == 0) {
168  return TRUE;
169  }
170  semi2 = loc->psource;
171  if (semi2 == NULL) {
172  return TRUE;
173  }
174  semi2 = strchr(semi2, ';');
175  if (semi2 == NULL) {
176  return TRUE;
177  }
178  semi2 = strchr(semi2 + 1, ';');
179  if (semi2 == NULL) {
180  return TRUE;
181  }
182  if (__kmp_par_range_filename[0]) {
183  const char *name = semi2 - 1;
184  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185  name--;
186  }
187  if ((*name == '/') || (*name == ';')) {
188  name++;
189  }
190  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191  return __kmp_par_range < 0;
192  }
193  }
194  semi3 = strchr(semi2 + 1, ';');
195  if (__kmp_par_range_routine[0]) {
196  if ((semi3 != NULL) && (semi3 > semi2) &&
197  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198  return __kmp_par_range < 0;
199  }
200  }
201  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203  return __kmp_par_range > 0;
204  }
205  return __kmp_par_range < 0;
206  }
207  return TRUE;
208 
209 #endif /* KMP_DEBUG */
210 }
211 
218 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219  return __kmp_entry_thread()->th.th_root->r.r_active;
220 }
221 
231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232  kmp_int32 num_threads) {
233  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234  global_tid, num_threads));
235  __kmp_assert_valid_gtid(global_tid);
236  __kmp_push_num_threads(loc, global_tid, num_threads);
237 }
238 
239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241  /* the num_threads are automatically popped */
242 }
243 
244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245  kmp_int32 proc_bind) {
246  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247  proc_bind));
248  __kmp_assert_valid_gtid(global_tid);
249  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251 
262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263  int gtid = __kmp_entry_gtid();
264 
265 #if (KMP_STATS_ENABLED)
266  // If we were in a serial region, then stop the serial timer, record
267  // the event, and start parallel region timer
268  stats_state_e previous_state = KMP_GET_THREAD_STATE();
269  if (previous_state == stats_state_e::SERIAL_REGION) {
270  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271  } else {
272  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273  }
274  int inParallel = __kmpc_in_parallel(loc);
275  if (inParallel) {
276  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277  } else {
278  KMP_COUNT_BLOCK(OMP_PARALLEL);
279  }
280 #endif
281 
282  // maybe to save thr_state is enough here
283  {
284  va_list ap;
285  va_start(ap, microtask);
286 
287 #if OMPT_SUPPORT
288  ompt_frame_t *ompt_frame;
289  if (ompt_enabled.enabled) {
290  kmp_info_t *master_th = __kmp_threads[gtid];
291  ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293  }
294  OMPT_STORE_RETURN_ADDRESS(gtid);
295 #endif
296 
297 #if INCLUDE_SSC_MARKS
298  SSC_MARK_FORKING();
299 #endif
300  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303  kmp_va_addr_of(ap));
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_JOINING();
306 #endif
307  __kmp_join_call(loc, gtid
308 #if OMPT_SUPPORT
309  ,
310  fork_context_intel
311 #endif
312  );
313 
314  va_end(ap);
315 
316 #if OMPT_SUPPORT
317  if (ompt_enabled.enabled) {
318  ompt_frame->enter_frame = ompt_data_none;
319  }
320 #endif
321  }
322 
323 #if KMP_STATS_ENABLED
324  if (previous_state == stats_state_e::SERIAL_REGION) {
325  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326  KMP_SET_THREAD_STATE(previous_state);
327  } else {
328  KMP_POP_PARTITIONED_TIMER();
329  }
330 #endif // KMP_STATS_ENABLED
331 }
332 
343 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
344  kmp_int32 cond, void *args) {
345  int gtid = __kmp_entry_gtid();
346  if (cond) {
347  if (args)
348  __kmpc_fork_call(loc, argc, microtask, args);
349  else
350  __kmpc_fork_call(loc, argc, microtask);
351  } else {
352  __kmpc_serialized_parallel(loc, gtid);
353 
354 #if OMPT_SUPPORT
355  void *exit_frame_ptr;
356 #endif
357 
358  if (args)
359  __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
360  /*npr=*/0,
361  /*argc=*/1, &args
362 #if OMPT_SUPPORT
363  ,
364  &exit_frame_ptr
365 #endif
366  );
367  else
368  __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
369  /*npr=*/0,
370  /*argc=*/0,
371  /*args=*/nullptr
372 #if OMPT_SUPPORT
373  ,
374  &exit_frame_ptr
375 #endif
376  );
377 
379  }
380 }
381 
393 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
394  kmp_int32 num_teams, kmp_int32 num_threads) {
395  KA_TRACE(20,
396  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
397  global_tid, num_teams, num_threads));
398  __kmp_assert_valid_gtid(global_tid);
399  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
400 }
401 
412 void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
413  kmp_int32 thread_limit) {
414  __kmp_assert_valid_gtid(global_tid);
415  kmp_info_t *thread = __kmp_threads[global_tid];
416  if (thread_limit > 0)
417  thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
418 }
419 
436 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
437  kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
438  kmp_int32 num_threads) {
439  KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
440  " num_teams_ub=%d num_threads=%d\n",
441  global_tid, num_teams_lb, num_teams_ub, num_threads));
442  __kmp_assert_valid_gtid(global_tid);
443  __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
444  num_threads);
445 }
446 
457 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
458  ...) {
459  int gtid = __kmp_entry_gtid();
460  kmp_info_t *this_thr = __kmp_threads[gtid];
461  va_list ap;
462  va_start(ap, microtask);
463 
464 #if KMP_STATS_ENABLED
465  KMP_COUNT_BLOCK(OMP_TEAMS);
466  stats_state_e previous_state = KMP_GET_THREAD_STATE();
467  if (previous_state == stats_state_e::SERIAL_REGION) {
468  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
469  } else {
470  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
471  }
472 #endif
473 
474  // remember teams entry point and nesting level
475  this_thr->th.th_teams_microtask = microtask;
476  this_thr->th.th_teams_level =
477  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
478 
479 #if OMPT_SUPPORT
480  kmp_team_t *parent_team = this_thr->th.th_team;
481  int tid = __kmp_tid_from_gtid(gtid);
482  if (ompt_enabled.enabled) {
483  parent_team->t.t_implicit_task_taskdata[tid]
484  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
485  }
486  OMPT_STORE_RETURN_ADDRESS(gtid);
487 #endif
488 
489  // check if __kmpc_push_num_teams called, set default number of teams
490  // otherwise
491  if (this_thr->th.th_teams_size.nteams == 0) {
492  __kmp_push_num_teams(loc, gtid, 0, 0);
493  }
494  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
495  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
496  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
497 
498  __kmp_fork_call(
499  loc, gtid, fork_context_intel, argc,
500  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
501  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
502  __kmp_join_call(loc, gtid
503 #if OMPT_SUPPORT
504  ,
505  fork_context_intel
506 #endif
507  );
508 
509  // Pop current CG root off list
510  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
511  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
512  this_thr->th.th_cg_roots = tmp->up;
513  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
514  " to node %p. cg_nthreads was %d\n",
515  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
516  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
517  int i = tmp->cg_nthreads--;
518  if (i == 1) { // check is we are the last thread in CG (not always the case)
519  __kmp_free(tmp);
520  }
521  // Restore current task's thread_limit from CG root
522  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
523  this_thr->th.th_current_task->td_icvs.thread_limit =
524  this_thr->th.th_cg_roots->cg_thread_limit;
525 
526  this_thr->th.th_teams_microtask = NULL;
527  this_thr->th.th_teams_level = 0;
528  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
529  va_end(ap);
530 #if KMP_STATS_ENABLED
531  if (previous_state == stats_state_e::SERIAL_REGION) {
532  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
533  KMP_SET_THREAD_STATE(previous_state);
534  } else {
535  KMP_POP_PARTITIONED_TIMER();
536  }
537 #endif // KMP_STATS_ENABLED
538 }
539 
540 // I don't think this function should ever have been exported.
541 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
542 // openmp code ever called it, but it's been exported from the RTL for so
543 // long that I'm afraid to remove the definition.
544 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
545 
558 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
559  // The implementation is now in kmp_runtime.cpp so that it can share static
560  // functions with kmp_fork_call since the tasks to be done are similar in
561  // each case.
562  __kmp_assert_valid_gtid(global_tid);
563 #if OMPT_SUPPORT
564  OMPT_STORE_RETURN_ADDRESS(global_tid);
565 #endif
566  __kmp_serialized_parallel(loc, global_tid);
567 }
568 
576 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
577  kmp_internal_control_t *top;
578  kmp_info_t *this_thr;
579  kmp_team_t *serial_team;
580 
581  KC_TRACE(10,
582  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
583 
584  /* skip all this code for autopar serialized loops since it results in
585  unacceptable overhead */
586  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
587  return;
588 
589  // Not autopar code
590  __kmp_assert_valid_gtid(global_tid);
591  if (!TCR_4(__kmp_init_parallel))
592  __kmp_parallel_initialize();
593 
594  __kmp_resume_if_soft_paused();
595 
596  this_thr = __kmp_threads[global_tid];
597  serial_team = this_thr->th.th_serial_team;
598 
599  kmp_task_team_t *task_team = this_thr->th.th_task_team;
600  // we need to wait for the proxy tasks before finishing the thread
601  if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
602  task_team->tt.tt_hidden_helper_task_encountered))
603  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
604 
605  KMP_MB();
606  KMP_DEBUG_ASSERT(serial_team);
607  KMP_ASSERT(serial_team->t.t_serialized);
608  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
609  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
610  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
611  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
612 
613 #if OMPT_SUPPORT
614  if (ompt_enabled.enabled &&
615  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
616  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
617  if (ompt_enabled.ompt_callback_implicit_task) {
618  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
619  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
620  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
621  }
622 
623  // reset clear the task id only after unlinking the task
624  ompt_data_t *parent_task_data;
625  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
626 
627  if (ompt_enabled.ompt_callback_parallel_end) {
628  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
629  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
630  ompt_parallel_invoker_program | ompt_parallel_team,
631  OMPT_LOAD_RETURN_ADDRESS(global_tid));
632  }
633  __ompt_lw_taskteam_unlink(this_thr);
634  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
635  }
636 #endif
637 
638  /* If necessary, pop the internal control stack values and replace the team
639  * values */
640  top = serial_team->t.t_control_stack_top;
641  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
642  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
643  serial_team->t.t_control_stack_top = top->next;
644  __kmp_free(top);
645  }
646 
647  /* pop dispatch buffers stack */
648  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
649  {
650  dispatch_private_info_t *disp_buffer =
651  serial_team->t.t_dispatch->th_disp_buffer;
652  serial_team->t.t_dispatch->th_disp_buffer =
653  serial_team->t.t_dispatch->th_disp_buffer->next;
654  __kmp_free(disp_buffer);
655  }
656  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
657 
658  --serial_team->t.t_serialized;
659  if (serial_team->t.t_serialized == 0) {
660 
661  /* return to the parallel section */
662 
663 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
664  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
665  __kmp_clear_x87_fpu_status_word();
666  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
667  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
668  }
669 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
670 
671  __kmp_pop_current_task_from_thread(this_thr);
672 #if OMPD_SUPPORT
673  if (ompd_state & OMPD_ENABLE_BP)
674  ompd_bp_parallel_end();
675 #endif
676 
677  this_thr->th.th_team = serial_team->t.t_parent;
678  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
679 
680  /* restore values cached in the thread */
681  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
682  this_thr->th.th_team_master =
683  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
684  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
685 
686  /* TODO the below shouldn't need to be adjusted for serialized teams */
687  this_thr->th.th_dispatch =
688  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
689 
690  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
691  this_thr->th.th_current_task->td_flags.executing = 1;
692 
693  if (__kmp_tasking_mode != tskm_immediate_exec) {
694  // Copy the task team from the new child / old parent team to the thread.
695  this_thr->th.th_task_team =
696  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
697  KA_TRACE(20,
698  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
699  "team %p\n",
700  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
701  }
702 #if KMP_AFFINITY_SUPPORTED
703  if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
704  __kmp_reset_root_init_mask(global_tid);
705  }
706 #endif
707  } else {
708  if (__kmp_tasking_mode != tskm_immediate_exec) {
709  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
710  "depth of serial team %p to %d\n",
711  global_tid, serial_team, serial_team->t.t_serialized));
712  }
713  }
714 
715  serial_team->t.t_level--;
716  if (__kmp_env_consistency_check)
717  __kmp_pop_parallel(global_tid, NULL);
718 #if OMPT_SUPPORT
719  if (ompt_enabled.enabled)
720  this_thr->th.ompt_thread_info.state =
721  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
722  : ompt_state_work_parallel);
723 #endif
724 }
725 
734 void __kmpc_flush(ident_t *loc) {
735  KC_TRACE(10, ("__kmpc_flush: called\n"));
736 
737  /* need explicit __mf() here since use volatile instead in library */
738  KMP_MFENCE(); /* Flush all pending memory write invalidates. */
739 
740 #if OMPT_SUPPORT && OMPT_OPTIONAL
741  if (ompt_enabled.ompt_callback_flush) {
742  ompt_callbacks.ompt_callback(ompt_callback_flush)(
743  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
744  }
745 #endif
746 }
747 
748 /* -------------------------------------------------------------------------- */
756 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
757  KMP_COUNT_BLOCK(OMP_BARRIER);
758  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
759  __kmp_assert_valid_gtid(global_tid);
760 
761  if (!TCR_4(__kmp_init_parallel))
762  __kmp_parallel_initialize();
763 
764  __kmp_resume_if_soft_paused();
765 
766  if (__kmp_env_consistency_check) {
767  if (loc == 0) {
768  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
769  }
770  __kmp_check_barrier(global_tid, ct_barrier, loc);
771  }
772 
773 #if OMPT_SUPPORT
774  ompt_frame_t *ompt_frame;
775  if (ompt_enabled.enabled) {
776  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
777  if (ompt_frame->enter_frame.ptr == NULL)
778  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
779  }
780  OMPT_STORE_RETURN_ADDRESS(global_tid);
781 #endif
782  __kmp_threads[global_tid]->th.th_ident = loc;
783  // TODO: explicit barrier_wait_id:
784  // this function is called when 'barrier' directive is present or
785  // implicit barrier at the end of a worksharing construct.
786  // 1) better to add a per-thread barrier counter to a thread data structure
787  // 2) set to 0 when a new team is created
788  // 4) no sync is required
789 
790  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
791 #if OMPT_SUPPORT && OMPT_OPTIONAL
792  if (ompt_enabled.enabled) {
793  ompt_frame->enter_frame = ompt_data_none;
794  }
795 #endif
796 }
797 
798 /* The BARRIER for a MASTER section is always explicit */
805 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
806  int status = 0;
807 
808  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
809  __kmp_assert_valid_gtid(global_tid);
810 
811  if (!TCR_4(__kmp_init_parallel))
812  __kmp_parallel_initialize();
813 
814  __kmp_resume_if_soft_paused();
815 
816  if (KMP_MASTER_GTID(global_tid)) {
817  KMP_COUNT_BLOCK(OMP_MASTER);
818  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
819  status = 1;
820  }
821 
822 #if OMPT_SUPPORT && OMPT_OPTIONAL
823  if (status) {
824  if (ompt_enabled.ompt_callback_masked) {
825  kmp_info_t *this_thr = __kmp_threads[global_tid];
826  kmp_team_t *team = this_thr->th.th_team;
827 
828  int tid = __kmp_tid_from_gtid(global_tid);
829  ompt_callbacks.ompt_callback(ompt_callback_masked)(
830  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
831  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
832  OMPT_GET_RETURN_ADDRESS(0));
833  }
834  }
835 #endif
836 
837  if (__kmp_env_consistency_check) {
838 #if KMP_USE_DYNAMIC_LOCK
839  if (status)
840  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
841  else
842  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
843 #else
844  if (status)
845  __kmp_push_sync(global_tid, ct_master, loc, NULL);
846  else
847  __kmp_check_sync(global_tid, ct_master, loc, NULL);
848 #endif
849  }
850 
851  return status;
852 }
853 
862 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
863  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
864  __kmp_assert_valid_gtid(global_tid);
865  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
866  KMP_POP_PARTITIONED_TIMER();
867 
868 #if OMPT_SUPPORT && OMPT_OPTIONAL
869  kmp_info_t *this_thr = __kmp_threads[global_tid];
870  kmp_team_t *team = this_thr->th.th_team;
871  if (ompt_enabled.ompt_callback_masked) {
872  int tid = __kmp_tid_from_gtid(global_tid);
873  ompt_callbacks.ompt_callback(ompt_callback_masked)(
874  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
875  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
876  OMPT_GET_RETURN_ADDRESS(0));
877  }
878 #endif
879 
880  if (__kmp_env_consistency_check) {
881  if (KMP_MASTER_GTID(global_tid))
882  __kmp_pop_sync(global_tid, ct_master, loc);
883  }
884 }
885 
894 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
895  int status = 0;
896  int tid;
897  KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
898  __kmp_assert_valid_gtid(global_tid);
899 
900  if (!TCR_4(__kmp_init_parallel))
901  __kmp_parallel_initialize();
902 
903  __kmp_resume_if_soft_paused();
904 
905  tid = __kmp_tid_from_gtid(global_tid);
906  if (tid == filter) {
907  KMP_COUNT_BLOCK(OMP_MASKED);
908  KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
909  status = 1;
910  }
911 
912 #if OMPT_SUPPORT && OMPT_OPTIONAL
913  if (status) {
914  if (ompt_enabled.ompt_callback_masked) {
915  kmp_info_t *this_thr = __kmp_threads[global_tid];
916  kmp_team_t *team = this_thr->th.th_team;
917  ompt_callbacks.ompt_callback(ompt_callback_masked)(
918  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
919  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
920  OMPT_GET_RETURN_ADDRESS(0));
921  }
922  }
923 #endif
924 
925  if (__kmp_env_consistency_check) {
926 #if KMP_USE_DYNAMIC_LOCK
927  if (status)
928  __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
929  else
930  __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
931 #else
932  if (status)
933  __kmp_push_sync(global_tid, ct_masked, loc, NULL);
934  else
935  __kmp_check_sync(global_tid, ct_masked, loc, NULL);
936 #endif
937  }
938 
939  return status;
940 }
941 
950 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
951  KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
952  __kmp_assert_valid_gtid(global_tid);
953  KMP_POP_PARTITIONED_TIMER();
954 
955 #if OMPT_SUPPORT && OMPT_OPTIONAL
956  kmp_info_t *this_thr = __kmp_threads[global_tid];
957  kmp_team_t *team = this_thr->th.th_team;
958  if (ompt_enabled.ompt_callback_masked) {
959  int tid = __kmp_tid_from_gtid(global_tid);
960  ompt_callbacks.ompt_callback(ompt_callback_masked)(
961  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
962  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
963  OMPT_GET_RETURN_ADDRESS(0));
964  }
965 #endif
966 
967  if (__kmp_env_consistency_check) {
968  __kmp_pop_sync(global_tid, ct_masked, loc);
969  }
970 }
971 
979 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
980  int cid = 0;
981  kmp_info_t *th;
982  KMP_DEBUG_ASSERT(__kmp_init_serial);
983 
984  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
985  __kmp_assert_valid_gtid(gtid);
986 
987  if (!TCR_4(__kmp_init_parallel))
988  __kmp_parallel_initialize();
989 
990  __kmp_resume_if_soft_paused();
991 
992 #if USE_ITT_BUILD
993  __kmp_itt_ordered_prep(gtid);
994 // TODO: ordered_wait_id
995 #endif /* USE_ITT_BUILD */
996 
997  th = __kmp_threads[gtid];
998 
999 #if OMPT_SUPPORT && OMPT_OPTIONAL
1000  kmp_team_t *team;
1001  ompt_wait_id_t lck;
1002  void *codeptr_ra;
1003  OMPT_STORE_RETURN_ADDRESS(gtid);
1004  if (ompt_enabled.enabled) {
1005  team = __kmp_team_from_gtid(gtid);
1006  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1007  /* OMPT state update */
1008  th->th.ompt_thread_info.wait_id = lck;
1009  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1010 
1011  /* OMPT event callback */
1012  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1013  if (ompt_enabled.ompt_callback_mutex_acquire) {
1014  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1015  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1016  codeptr_ra);
1017  }
1018  }
1019 #endif
1020 
1021  if (th->th.th_dispatch->th_deo_fcn != 0)
1022  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1023  else
1024  __kmp_parallel_deo(&gtid, &cid, loc);
1025 
1026 #if OMPT_SUPPORT && OMPT_OPTIONAL
1027  if (ompt_enabled.enabled) {
1028  /* OMPT state update */
1029  th->th.ompt_thread_info.state = ompt_state_work_parallel;
1030  th->th.ompt_thread_info.wait_id = 0;
1031 
1032  /* OMPT event callback */
1033  if (ompt_enabled.ompt_callback_mutex_acquired) {
1034  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1035  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1036  }
1037  }
1038 #endif
1039 
1040 #if USE_ITT_BUILD
1041  __kmp_itt_ordered_start(gtid);
1042 #endif /* USE_ITT_BUILD */
1043 }
1044 
1052 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1053  int cid = 0;
1054  kmp_info_t *th;
1055 
1056  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1057  __kmp_assert_valid_gtid(gtid);
1058 
1059 #if USE_ITT_BUILD
1060  __kmp_itt_ordered_end(gtid);
1061 // TODO: ordered_wait_id
1062 #endif /* USE_ITT_BUILD */
1063 
1064  th = __kmp_threads[gtid];
1065 
1066  if (th->th.th_dispatch->th_dxo_fcn != 0)
1067  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1068  else
1069  __kmp_parallel_dxo(&gtid, &cid, loc);
1070 
1071 #if OMPT_SUPPORT && OMPT_OPTIONAL
1072  OMPT_STORE_RETURN_ADDRESS(gtid);
1073  if (ompt_enabled.ompt_callback_mutex_released) {
1074  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1075  ompt_mutex_ordered,
1076  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1077  ->t.t_ordered.dt.t_value,
1078  OMPT_LOAD_RETURN_ADDRESS(gtid));
1079  }
1080 #endif
1081 }
1082 
1083 #if KMP_USE_DYNAMIC_LOCK
1084 
1085 static __forceinline void
1086 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1087  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1088  // Pointer to the allocated indirect lock is written to crit, while indexing
1089  // is ignored.
1090  void *idx;
1091  kmp_indirect_lock_t **lck;
1092  lck = (kmp_indirect_lock_t **)crit;
1093  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1094  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1095  KMP_SET_I_LOCK_LOCATION(ilk, loc);
1096  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1097  KA_TRACE(20,
1098  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1099 #if USE_ITT_BUILD
1100  __kmp_itt_critical_creating(ilk->lock, loc);
1101 #endif
1102  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1103  if (status == 0) {
1104 #if USE_ITT_BUILD
1105  __kmp_itt_critical_destroyed(ilk->lock);
1106 #endif
1107  // We don't really need to destroy the unclaimed lock here since it will be
1108  // cleaned up at program exit.
1109  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1110  }
1111  KMP_DEBUG_ASSERT(*lck != NULL);
1112 }
1113 
1114 // Fast-path acquire tas lock
1115 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1116  { \
1117  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1118  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1119  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1120  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1121  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1122  kmp_uint32 spins; \
1123  KMP_FSYNC_PREPARE(l); \
1124  KMP_INIT_YIELD(spins); \
1125  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1126  do { \
1127  if (TCR_4(__kmp_nth) > \
1128  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1129  KMP_YIELD(TRUE); \
1130  } else { \
1131  KMP_YIELD_SPIN(spins); \
1132  } \
1133  __kmp_spin_backoff(&backoff); \
1134  } while ( \
1135  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1136  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1137  } \
1138  KMP_FSYNC_ACQUIRED(l); \
1139  }
1140 
1141 // Fast-path test tas lock
1142 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1143  { \
1144  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1145  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1146  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1147  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1148  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1149  }
1150 
1151 // Fast-path release tas lock
1152 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1153  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1154 
1155 #if KMP_USE_FUTEX
1156 
1157 #include <sys/syscall.h>
1158 #include <unistd.h>
1159 #ifndef FUTEX_WAIT
1160 #define FUTEX_WAIT 0
1161 #endif
1162 #ifndef FUTEX_WAKE
1163 #define FUTEX_WAKE 1
1164 #endif
1165 
1166 // Fast-path acquire futex lock
1167 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1168  { \
1169  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170  kmp_int32 gtid_code = (gtid + 1) << 1; \
1171  KMP_MB(); \
1172  KMP_FSYNC_PREPARE(ftx); \
1173  kmp_int32 poll_val; \
1174  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1175  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1176  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1177  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1178  if (!cond) { \
1179  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1180  poll_val | \
1181  KMP_LOCK_BUSY(1, futex))) { \
1182  continue; \
1183  } \
1184  poll_val |= KMP_LOCK_BUSY(1, futex); \
1185  } \
1186  kmp_int32 rc; \
1187  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1188  NULL, NULL, 0)) != 0) { \
1189  continue; \
1190  } \
1191  gtid_code |= 1; \
1192  } \
1193  KMP_FSYNC_ACQUIRED(ftx); \
1194  }
1195 
1196 // Fast-path test futex lock
1197 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1198  { \
1199  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1200  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1201  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1202  KMP_FSYNC_ACQUIRED(ftx); \
1203  rc = TRUE; \
1204  } else { \
1205  rc = FALSE; \
1206  } \
1207  }
1208 
1209 // Fast-path release futex lock
1210 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1211  { \
1212  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1213  KMP_MB(); \
1214  KMP_FSYNC_RELEASING(ftx); \
1215  kmp_int32 poll_val = \
1216  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1217  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1218  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1219  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1220  } \
1221  KMP_MB(); \
1222  KMP_YIELD_OVERSUB(); \
1223  }
1224 
1225 #endif // KMP_USE_FUTEX
1226 
1227 #else // KMP_USE_DYNAMIC_LOCK
1228 
1229 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1230  ident_t const *loc,
1231  kmp_int32 gtid) {
1232  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1233 
1234  // Because of the double-check, the following load doesn't need to be volatile
1235  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1236 
1237  if (lck == NULL) {
1238  void *idx;
1239 
1240  // Allocate & initialize the lock.
1241  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1242  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1243  __kmp_init_user_lock_with_checks(lck);
1244  __kmp_set_user_lock_location(lck, loc);
1245 #if USE_ITT_BUILD
1246  __kmp_itt_critical_creating(lck);
1247 // __kmp_itt_critical_creating() should be called *before* the first usage
1248 // of underlying lock. It is the only place where we can guarantee it. There
1249 // are chances the lock will destroyed with no usage, but it is not a
1250 // problem, because this is not real event seen by user but rather setting
1251 // name for object (lock). See more details in kmp_itt.h.
1252 #endif /* USE_ITT_BUILD */
1253 
1254  // Use a cmpxchg instruction to slam the start of the critical section with
1255  // the lock pointer. If another thread beat us to it, deallocate the lock,
1256  // and use the lock that the other thread allocated.
1257  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1258 
1259  if (status == 0) {
1260 // Deallocate the lock and reload the value.
1261 #if USE_ITT_BUILD
1262  __kmp_itt_critical_destroyed(lck);
1263 // Let ITT know the lock is destroyed and the same memory location may be reused
1264 // for another purpose.
1265 #endif /* USE_ITT_BUILD */
1266  __kmp_destroy_user_lock_with_checks(lck);
1267  __kmp_user_lock_free(&idx, gtid, lck);
1268  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1269  KMP_DEBUG_ASSERT(lck != NULL);
1270  }
1271  }
1272  return lck;
1273 }
1274 
1275 #endif // KMP_USE_DYNAMIC_LOCK
1276 
1287 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1288  kmp_critical_name *crit) {
1289 #if KMP_USE_DYNAMIC_LOCK
1290 #if OMPT_SUPPORT && OMPT_OPTIONAL
1291  OMPT_STORE_RETURN_ADDRESS(global_tid);
1292 #endif // OMPT_SUPPORT
1293  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1294 #else
1295  KMP_COUNT_BLOCK(OMP_CRITICAL);
1296 #if OMPT_SUPPORT && OMPT_OPTIONAL
1297  ompt_state_t prev_state = ompt_state_undefined;
1298  ompt_thread_info_t ti;
1299 #endif
1300  kmp_user_lock_p lck;
1301 
1302  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1303  __kmp_assert_valid_gtid(global_tid);
1304 
1305  // TODO: add THR_OVHD_STATE
1306 
1307  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1308  KMP_CHECK_USER_LOCK_INIT();
1309 
1310  if ((__kmp_user_lock_kind == lk_tas) &&
1311  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1312  lck = (kmp_user_lock_p)crit;
1313  }
1314 #if KMP_USE_FUTEX
1315  else if ((__kmp_user_lock_kind == lk_futex) &&
1316  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1317  lck = (kmp_user_lock_p)crit;
1318  }
1319 #endif
1320  else { // ticket, queuing or drdpa
1321  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1322  }
1323 
1324  if (__kmp_env_consistency_check)
1325  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1326 
1327  // since the critical directive binds to all threads, not just the current
1328  // team we have to check this even if we are in a serialized team.
1329  // also, even if we are the uber thread, we still have to conduct the lock,
1330  // as we have to contend with sibling threads.
1331 
1332 #if USE_ITT_BUILD
1333  __kmp_itt_critical_acquiring(lck);
1334 #endif /* USE_ITT_BUILD */
1335 #if OMPT_SUPPORT && OMPT_OPTIONAL
1336  OMPT_STORE_RETURN_ADDRESS(gtid);
1337  void *codeptr_ra = NULL;
1338  if (ompt_enabled.enabled) {
1339  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1340  /* OMPT state update */
1341  prev_state = ti.state;
1342  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1343  ti.state = ompt_state_wait_critical;
1344 
1345  /* OMPT event callback */
1346  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1347  if (ompt_enabled.ompt_callback_mutex_acquire) {
1348  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1349  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1350  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1351  }
1352  }
1353 #endif
1354  // Value of 'crit' should be good for using as a critical_id of the critical
1355  // section directive.
1356  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1357 
1358 #if USE_ITT_BUILD
1359  __kmp_itt_critical_acquired(lck);
1360 #endif /* USE_ITT_BUILD */
1361 #if OMPT_SUPPORT && OMPT_OPTIONAL
1362  if (ompt_enabled.enabled) {
1363  /* OMPT state update */
1364  ti.state = prev_state;
1365  ti.wait_id = 0;
1366 
1367  /* OMPT event callback */
1368  if (ompt_enabled.ompt_callback_mutex_acquired) {
1369  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1370  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1371  }
1372  }
1373 #endif
1374  KMP_POP_PARTITIONED_TIMER();
1375 
1376  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1377  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1378 #endif // KMP_USE_DYNAMIC_LOCK
1379 }
1380 
1381 #if KMP_USE_DYNAMIC_LOCK
1382 
1383 // Converts the given hint to an internal lock implementation
1384 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1385 #if KMP_USE_TSX
1386 #define KMP_TSX_LOCK(seq) lockseq_##seq
1387 #else
1388 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1389 #endif
1390 
1391 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1392 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1393 #else
1394 #define KMP_CPUINFO_RTM 0
1395 #endif
1396 
1397  // Hints that do not require further logic
1398  if (hint & kmp_lock_hint_hle)
1399  return KMP_TSX_LOCK(hle);
1400  if (hint & kmp_lock_hint_rtm)
1401  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1402  if (hint & kmp_lock_hint_adaptive)
1403  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1404 
1405  // Rule out conflicting hints first by returning the default lock
1406  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1407  return __kmp_user_lock_seq;
1408  if ((hint & omp_lock_hint_speculative) &&
1409  (hint & omp_lock_hint_nonspeculative))
1410  return __kmp_user_lock_seq;
1411 
1412  // Do not even consider speculation when it appears to be contended
1413  if (hint & omp_lock_hint_contended)
1414  return lockseq_queuing;
1415 
1416  // Uncontended lock without speculation
1417  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1418  return lockseq_tas;
1419 
1420  // Use RTM lock for speculation
1421  if (hint & omp_lock_hint_speculative)
1422  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1423 
1424  return __kmp_user_lock_seq;
1425 }
1426 
1427 #if OMPT_SUPPORT && OMPT_OPTIONAL
1428 #if KMP_USE_DYNAMIC_LOCK
1429 static kmp_mutex_impl_t
1430 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1431  if (user_lock) {
1432  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1433  case 0:
1434  break;
1435 #if KMP_USE_FUTEX
1436  case locktag_futex:
1437  return kmp_mutex_impl_queuing;
1438 #endif
1439  case locktag_tas:
1440  return kmp_mutex_impl_spin;
1441 #if KMP_USE_TSX
1442  case locktag_hle:
1443  case locktag_rtm_spin:
1444  return kmp_mutex_impl_speculative;
1445 #endif
1446  default:
1447  return kmp_mutex_impl_none;
1448  }
1449  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1450  }
1451  KMP_ASSERT(ilock);
1452  switch (ilock->type) {
1453 #if KMP_USE_TSX
1454  case locktag_adaptive:
1455  case locktag_rtm_queuing:
1456  return kmp_mutex_impl_speculative;
1457 #endif
1458  case locktag_nested_tas:
1459  return kmp_mutex_impl_spin;
1460 #if KMP_USE_FUTEX
1461  case locktag_nested_futex:
1462 #endif
1463  case locktag_ticket:
1464  case locktag_queuing:
1465  case locktag_drdpa:
1466  case locktag_nested_ticket:
1467  case locktag_nested_queuing:
1468  case locktag_nested_drdpa:
1469  return kmp_mutex_impl_queuing;
1470  default:
1471  return kmp_mutex_impl_none;
1472  }
1473 }
1474 #else
1475 // For locks without dynamic binding
1476 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1477  switch (__kmp_user_lock_kind) {
1478  case lk_tas:
1479  return kmp_mutex_impl_spin;
1480 #if KMP_USE_FUTEX
1481  case lk_futex:
1482 #endif
1483  case lk_ticket:
1484  case lk_queuing:
1485  case lk_drdpa:
1486  return kmp_mutex_impl_queuing;
1487 #if KMP_USE_TSX
1488  case lk_hle:
1489  case lk_rtm_queuing:
1490  case lk_rtm_spin:
1491  case lk_adaptive:
1492  return kmp_mutex_impl_speculative;
1493 #endif
1494  default:
1495  return kmp_mutex_impl_none;
1496  }
1497 }
1498 #endif // KMP_USE_DYNAMIC_LOCK
1499 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1500 
1514 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1515  kmp_critical_name *crit, uint32_t hint) {
1516  KMP_COUNT_BLOCK(OMP_CRITICAL);
1517  kmp_user_lock_p lck;
1518 #if OMPT_SUPPORT && OMPT_OPTIONAL
1519  ompt_state_t prev_state = ompt_state_undefined;
1520  ompt_thread_info_t ti;
1521  // This is the case, if called from __kmpc_critical:
1522  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1523  if (!codeptr)
1524  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1525 #endif
1526 
1527  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1528  __kmp_assert_valid_gtid(global_tid);
1529 
1530  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1531  // Check if it is initialized.
1532  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1533  kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1534  if (*lk == 0) {
1535  if (KMP_IS_D_LOCK(lockseq)) {
1536  KMP_COMPARE_AND_STORE_ACQ32(
1537  (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1538  KMP_GET_D_TAG(lockseq));
1539  } else {
1540  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1541  }
1542  }
1543  // Branch for accessing the actual lock object and set operation. This
1544  // branching is inevitable since this lock initialization does not follow the
1545  // normal dispatch path (lock table is not used).
1546  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1547  lck = (kmp_user_lock_p)lk;
1548  if (__kmp_env_consistency_check) {
1549  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1550  __kmp_map_hint_to_lock(hint));
1551  }
1552 #if USE_ITT_BUILD
1553  __kmp_itt_critical_acquiring(lck);
1554 #endif
1555 #if OMPT_SUPPORT && OMPT_OPTIONAL
1556  if (ompt_enabled.enabled) {
1557  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1558  /* OMPT state update */
1559  prev_state = ti.state;
1560  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1561  ti.state = ompt_state_wait_critical;
1562 
1563  /* OMPT event callback */
1564  if (ompt_enabled.ompt_callback_mutex_acquire) {
1565  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1566  ompt_mutex_critical, (unsigned int)hint,
1567  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1568  codeptr);
1569  }
1570  }
1571 #endif
1572 #if KMP_USE_INLINED_TAS
1573  if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1574  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1575  } else
1576 #elif KMP_USE_INLINED_FUTEX
1577  if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1578  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1579  } else
1580 #endif
1581  {
1582  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1583  }
1584  } else {
1585  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1586  lck = ilk->lock;
1587  if (__kmp_env_consistency_check) {
1588  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1589  __kmp_map_hint_to_lock(hint));
1590  }
1591 #if USE_ITT_BUILD
1592  __kmp_itt_critical_acquiring(lck);
1593 #endif
1594 #if OMPT_SUPPORT && OMPT_OPTIONAL
1595  if (ompt_enabled.enabled) {
1596  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1597  /* OMPT state update */
1598  prev_state = ti.state;
1599  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1600  ti.state = ompt_state_wait_critical;
1601 
1602  /* OMPT event callback */
1603  if (ompt_enabled.ompt_callback_mutex_acquire) {
1604  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1605  ompt_mutex_critical, (unsigned int)hint,
1606  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1607  codeptr);
1608  }
1609  }
1610 #endif
1611  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1612  }
1613  KMP_POP_PARTITIONED_TIMER();
1614 
1615 #if USE_ITT_BUILD
1616  __kmp_itt_critical_acquired(lck);
1617 #endif /* USE_ITT_BUILD */
1618 #if OMPT_SUPPORT && OMPT_OPTIONAL
1619  if (ompt_enabled.enabled) {
1620  /* OMPT state update */
1621  ti.state = prev_state;
1622  ti.wait_id = 0;
1623 
1624  /* OMPT event callback */
1625  if (ompt_enabled.ompt_callback_mutex_acquired) {
1626  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1627  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1628  }
1629  }
1630 #endif
1631 
1632  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1633  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1634 } // __kmpc_critical_with_hint
1635 
1636 #endif // KMP_USE_DYNAMIC_LOCK
1637 
1647 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1648  kmp_critical_name *crit) {
1649  kmp_user_lock_p lck;
1650 
1651  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1652 
1653 #if KMP_USE_DYNAMIC_LOCK
1654  int locktag = KMP_EXTRACT_D_TAG(crit);
1655  if (locktag) {
1656  lck = (kmp_user_lock_p)crit;
1657  KMP_ASSERT(lck != NULL);
1658  if (__kmp_env_consistency_check) {
1659  __kmp_pop_sync(global_tid, ct_critical, loc);
1660  }
1661 #if USE_ITT_BUILD
1662  __kmp_itt_critical_releasing(lck);
1663 #endif
1664 #if KMP_USE_INLINED_TAS
1665  if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1666  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1667  } else
1668 #elif KMP_USE_INLINED_FUTEX
1669  if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1670  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1671  } else
1672 #endif
1673  {
1674  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1675  }
1676  } else {
1677  kmp_indirect_lock_t *ilk =
1678  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1679  KMP_ASSERT(ilk != NULL);
1680  lck = ilk->lock;
1681  if (__kmp_env_consistency_check) {
1682  __kmp_pop_sync(global_tid, ct_critical, loc);
1683  }
1684 #if USE_ITT_BUILD
1685  __kmp_itt_critical_releasing(lck);
1686 #endif
1687  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1688  }
1689 
1690 #else // KMP_USE_DYNAMIC_LOCK
1691 
1692  if ((__kmp_user_lock_kind == lk_tas) &&
1693  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1694  lck = (kmp_user_lock_p)crit;
1695  }
1696 #if KMP_USE_FUTEX
1697  else if ((__kmp_user_lock_kind == lk_futex) &&
1698  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1699  lck = (kmp_user_lock_p)crit;
1700  }
1701 #endif
1702  else { // ticket, queuing or drdpa
1703  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1704  }
1705 
1706  KMP_ASSERT(lck != NULL);
1707 
1708  if (__kmp_env_consistency_check)
1709  __kmp_pop_sync(global_tid, ct_critical, loc);
1710 
1711 #if USE_ITT_BUILD
1712  __kmp_itt_critical_releasing(lck);
1713 #endif /* USE_ITT_BUILD */
1714  // Value of 'crit' should be good for using as a critical_id of the critical
1715  // section directive.
1716  __kmp_release_user_lock_with_checks(lck, global_tid);
1717 
1718 #endif // KMP_USE_DYNAMIC_LOCK
1719 
1720 #if OMPT_SUPPORT && OMPT_OPTIONAL
1721  /* OMPT release event triggers after lock is released; place here to trigger
1722  * for all #if branches */
1723  OMPT_STORE_RETURN_ADDRESS(global_tid);
1724  if (ompt_enabled.ompt_callback_mutex_released) {
1725  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1726  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1727  OMPT_LOAD_RETURN_ADDRESS(0));
1728  }
1729 #endif
1730 
1731  KMP_POP_PARTITIONED_TIMER();
1732  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1733 }
1734 
1744 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1745  int status;
1746  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1747  __kmp_assert_valid_gtid(global_tid);
1748 
1749  if (!TCR_4(__kmp_init_parallel))
1750  __kmp_parallel_initialize();
1751 
1752  __kmp_resume_if_soft_paused();
1753 
1754  if (__kmp_env_consistency_check)
1755  __kmp_check_barrier(global_tid, ct_barrier, loc);
1756 
1757 #if OMPT_SUPPORT
1758  ompt_frame_t *ompt_frame;
1759  if (ompt_enabled.enabled) {
1760  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1761  if (ompt_frame->enter_frame.ptr == NULL)
1762  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1763  }
1764  OMPT_STORE_RETURN_ADDRESS(global_tid);
1765 #endif
1766 #if USE_ITT_NOTIFY
1767  __kmp_threads[global_tid]->th.th_ident = loc;
1768 #endif
1769  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1770 #if OMPT_SUPPORT && OMPT_OPTIONAL
1771  if (ompt_enabled.enabled) {
1772  ompt_frame->enter_frame = ompt_data_none;
1773  }
1774 #endif
1775 
1776  return (status != 0) ? 0 : 1;
1777 }
1778 
1788 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1789  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1790  __kmp_assert_valid_gtid(global_tid);
1791  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1792 }
1793 
1804 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1805  kmp_int32 ret;
1806  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1807  __kmp_assert_valid_gtid(global_tid);
1808 
1809  if (!TCR_4(__kmp_init_parallel))
1810  __kmp_parallel_initialize();
1811 
1812  __kmp_resume_if_soft_paused();
1813 
1814  if (__kmp_env_consistency_check) {
1815  if (loc == 0) {
1816  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1817  }
1818  __kmp_check_barrier(global_tid, ct_barrier, loc);
1819  }
1820 
1821 #if OMPT_SUPPORT
1822  ompt_frame_t *ompt_frame;
1823  if (ompt_enabled.enabled) {
1824  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1825  if (ompt_frame->enter_frame.ptr == NULL)
1826  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1827  }
1828  OMPT_STORE_RETURN_ADDRESS(global_tid);
1829 #endif
1830 #if USE_ITT_NOTIFY
1831  __kmp_threads[global_tid]->th.th_ident = loc;
1832 #endif
1833  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1834 #if OMPT_SUPPORT && OMPT_OPTIONAL
1835  if (ompt_enabled.enabled) {
1836  ompt_frame->enter_frame = ompt_data_none;
1837  }
1838 #endif
1839 
1840  ret = __kmpc_master(loc, global_tid);
1841 
1842  if (__kmp_env_consistency_check) {
1843  /* there's no __kmpc_end_master called; so the (stats) */
1844  /* actions of __kmpc_end_master are done here */
1845  if (ret) {
1846  /* only one thread should do the pop since only */
1847  /* one did the push (see __kmpc_master()) */
1848  __kmp_pop_sync(global_tid, ct_master, loc);
1849  }
1850  }
1851 
1852  return (ret);
1853 }
1854 
1855 /* The BARRIER for a SINGLE process section is always explicit */
1867 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1868  __kmp_assert_valid_gtid(global_tid);
1869  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1870 
1871  if (rc) {
1872  // We are going to execute the single statement, so we should count it.
1873  KMP_COUNT_BLOCK(OMP_SINGLE);
1874  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1875  }
1876 
1877 #if OMPT_SUPPORT && OMPT_OPTIONAL
1878  kmp_info_t *this_thr = __kmp_threads[global_tid];
1879  kmp_team_t *team = this_thr->th.th_team;
1880  int tid = __kmp_tid_from_gtid(global_tid);
1881 
1882  if (ompt_enabled.enabled) {
1883  if (rc) {
1884  if (ompt_enabled.ompt_callback_work) {
1885  ompt_callbacks.ompt_callback(ompt_callback_work)(
1886  ompt_work_single_executor, ompt_scope_begin,
1887  &(team->t.ompt_team_info.parallel_data),
1888  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1889  1, OMPT_GET_RETURN_ADDRESS(0));
1890  }
1891  } else {
1892  if (ompt_enabled.ompt_callback_work) {
1893  ompt_callbacks.ompt_callback(ompt_callback_work)(
1894  ompt_work_single_other, ompt_scope_begin,
1895  &(team->t.ompt_team_info.parallel_data),
1896  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1897  1, OMPT_GET_RETURN_ADDRESS(0));
1898  ompt_callbacks.ompt_callback(ompt_callback_work)(
1899  ompt_work_single_other, ompt_scope_end,
1900  &(team->t.ompt_team_info.parallel_data),
1901  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1902  1, OMPT_GET_RETURN_ADDRESS(0));
1903  }
1904  }
1905  }
1906 #endif
1907 
1908  return rc;
1909 }
1910 
1920 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1921  __kmp_assert_valid_gtid(global_tid);
1922  __kmp_exit_single(global_tid);
1923  KMP_POP_PARTITIONED_TIMER();
1924 
1925 #if OMPT_SUPPORT && OMPT_OPTIONAL
1926  kmp_info_t *this_thr = __kmp_threads[global_tid];
1927  kmp_team_t *team = this_thr->th.th_team;
1928  int tid = __kmp_tid_from_gtid(global_tid);
1929 
1930  if (ompt_enabled.ompt_callback_work) {
1931  ompt_callbacks.ompt_callback(ompt_callback_work)(
1932  ompt_work_single_executor, ompt_scope_end,
1933  &(team->t.ompt_team_info.parallel_data),
1934  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1935  OMPT_GET_RETURN_ADDRESS(0));
1936  }
1937 #endif
1938 }
1939 
1947 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1948  KMP_POP_PARTITIONED_TIMER();
1949  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1950 
1951 #if OMPT_SUPPORT && OMPT_OPTIONAL
1952  if (ompt_enabled.ompt_callback_work) {
1953  ompt_work_t ompt_work_type = ompt_work_loop;
1954  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1955  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1956  // Determine workshare type
1957  if (loc != NULL) {
1958  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1959  ompt_work_type = ompt_work_loop;
1960  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1961  ompt_work_type = ompt_work_sections;
1962  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1963  ompt_work_type = ompt_work_distribute;
1964  } else {
1965  // use default set above.
1966  // a warning about this case is provided in __kmpc_for_static_init
1967  }
1968  KMP_DEBUG_ASSERT(ompt_work_type);
1969  }
1970  ompt_callbacks.ompt_callback(ompt_callback_work)(
1971  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1972  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1973  }
1974 #endif
1975  if (__kmp_env_consistency_check)
1976  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1977 }
1978 
1979 // User routines which take C-style arguments (call by value)
1980 // different from the Fortran equivalent routines
1981 
1982 void ompc_set_num_threads(int arg) {
1983  // !!!!! TODO: check the per-task binding
1984  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1985 }
1986 
1987 void ompc_set_dynamic(int flag) {
1988  kmp_info_t *thread;
1989 
1990  /* For the thread-private implementation of the internal controls */
1991  thread = __kmp_entry_thread();
1992 
1993  __kmp_save_internal_controls(thread);
1994 
1995  set__dynamic(thread, flag ? true : false);
1996 }
1997 
1998 void ompc_set_nested(int flag) {
1999  kmp_info_t *thread;
2000 
2001  /* For the thread-private internal controls implementation */
2002  thread = __kmp_entry_thread();
2003 
2004  __kmp_save_internal_controls(thread);
2005 
2006  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2007 }
2008 
2009 void ompc_set_max_active_levels(int max_active_levels) {
2010  /* TO DO */
2011  /* we want per-task implementation of this internal control */
2012 
2013  /* For the per-thread internal controls implementation */
2014  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2015 }
2016 
2017 void ompc_set_schedule(omp_sched_t kind, int modifier) {
2018  // !!!!! TODO: check the per-task binding
2019  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2020 }
2021 
2022 int ompc_get_ancestor_thread_num(int level) {
2023  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2024 }
2025 
2026 int ompc_get_team_size(int level) {
2027  return __kmp_get_team_size(__kmp_entry_gtid(), level);
2028 }
2029 
2030 /* OpenMP 5.0 Affinity Format API */
2031 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2032  if (!__kmp_init_serial) {
2033  __kmp_serial_initialize();
2034  }
2035  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2036  format, KMP_STRLEN(format) + 1);
2037 }
2038 
2039 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2040  size_t format_size;
2041  if (!__kmp_init_serial) {
2042  __kmp_serial_initialize();
2043  }
2044  format_size = KMP_STRLEN(__kmp_affinity_format);
2045  if (buffer && size) {
2046  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2047  format_size + 1);
2048  }
2049  return format_size;
2050 }
2051 
2052 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2053  int gtid;
2054  if (!TCR_4(__kmp_init_middle)) {
2055  __kmp_middle_initialize();
2056  }
2057  __kmp_assign_root_init_mask();
2058  gtid = __kmp_get_gtid();
2059 #if KMP_AFFINITY_SUPPORTED
2060  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2061  __kmp_affinity.flags.reset) {
2062  __kmp_reset_root_init_mask(gtid);
2063  }
2064 #endif
2065  __kmp_aux_display_affinity(gtid, format);
2066 }
2067 
2068 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2069  char const *format) {
2070  int gtid;
2071  size_t num_required;
2072  kmp_str_buf_t capture_buf;
2073  if (!TCR_4(__kmp_init_middle)) {
2074  __kmp_middle_initialize();
2075  }
2076  __kmp_assign_root_init_mask();
2077  gtid = __kmp_get_gtid();
2078 #if KMP_AFFINITY_SUPPORTED
2079  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2080  __kmp_affinity.flags.reset) {
2081  __kmp_reset_root_init_mask(gtid);
2082  }
2083 #endif
2084  __kmp_str_buf_init(&capture_buf);
2085  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2086  if (buffer && buf_size) {
2087  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2088  capture_buf.used + 1);
2089  }
2090  __kmp_str_buf_free(&capture_buf);
2091  return num_required;
2092 }
2093 
2094 void kmpc_set_stacksize(int arg) {
2095  // __kmp_aux_set_stacksize initializes the library if needed
2096  __kmp_aux_set_stacksize(arg);
2097 }
2098 
2099 void kmpc_set_stacksize_s(size_t arg) {
2100  // __kmp_aux_set_stacksize initializes the library if needed
2101  __kmp_aux_set_stacksize(arg);
2102 }
2103 
2104 void kmpc_set_blocktime(int arg) {
2105  int gtid, tid, bt = arg;
2106  kmp_info_t *thread;
2107 
2108  gtid = __kmp_entry_gtid();
2109  tid = __kmp_tid_from_gtid(gtid);
2110  thread = __kmp_thread_from_gtid(gtid);
2111 
2112  __kmp_aux_convert_blocktime(&bt);
2113  __kmp_aux_set_blocktime(bt, thread, tid);
2114 }
2115 
2116 void kmpc_set_library(int arg) {
2117  // __kmp_user_set_library initializes the library if needed
2118  __kmp_user_set_library((enum library_type)arg);
2119 }
2120 
2121 void kmpc_set_defaults(char const *str) {
2122  // __kmp_aux_set_defaults initializes the library if needed
2123  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2124 }
2125 
2126 void kmpc_set_disp_num_buffers(int arg) {
2127  // ignore after initialization because some teams have already
2128  // allocated dispatch buffers
2129  if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2130  arg <= KMP_MAX_DISP_NUM_BUFF) {
2131  __kmp_dispatch_num_buffers = arg;
2132  }
2133 }
2134 
2135 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2136 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2137  return -1;
2138 #else
2139  if (!TCR_4(__kmp_init_middle)) {
2140  __kmp_middle_initialize();
2141  }
2142  __kmp_assign_root_init_mask();
2143  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2144 #endif
2145 }
2146 
2147 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2148 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2149  return -1;
2150 #else
2151  if (!TCR_4(__kmp_init_middle)) {
2152  __kmp_middle_initialize();
2153  }
2154  __kmp_assign_root_init_mask();
2155  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2156 #endif
2157 }
2158 
2159 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2160 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2161  return -1;
2162 #else
2163  if (!TCR_4(__kmp_init_middle)) {
2164  __kmp_middle_initialize();
2165  }
2166  __kmp_assign_root_init_mask();
2167  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2168 #endif
2169 }
2170 
2171 /* -------------------------------------------------------------------------- */
2216 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2217  void *cpy_data, void (*cpy_func)(void *, void *),
2218  kmp_int32 didit) {
2219  void **data_ptr;
2220  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2221  __kmp_assert_valid_gtid(gtid);
2222 
2223  KMP_MB();
2224 
2225  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2226 
2227  if (__kmp_env_consistency_check) {
2228  if (loc == 0) {
2229  KMP_WARNING(ConstructIdentInvalid);
2230  }
2231  }
2232 
2233  // ToDo: Optimize the following two barriers into some kind of split barrier
2234 
2235  if (didit)
2236  *data_ptr = cpy_data;
2237 
2238 #if OMPT_SUPPORT
2239  ompt_frame_t *ompt_frame;
2240  if (ompt_enabled.enabled) {
2241  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2242  if (ompt_frame->enter_frame.ptr == NULL)
2243  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2244  }
2245  OMPT_STORE_RETURN_ADDRESS(gtid);
2246 #endif
2247 /* This barrier is not a barrier region boundary */
2248 #if USE_ITT_NOTIFY
2249  __kmp_threads[gtid]->th.th_ident = loc;
2250 #endif
2251  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2252 
2253  if (!didit)
2254  (*cpy_func)(cpy_data, *data_ptr);
2255 
2256  // Consider next barrier a user-visible barrier for barrier region boundaries
2257  // Nesting checks are already handled by the single construct checks
2258  {
2259 #if OMPT_SUPPORT
2260  OMPT_STORE_RETURN_ADDRESS(gtid);
2261 #endif
2262 #if USE_ITT_NOTIFY
2263  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2264 // tasks can overwrite the location)
2265 #endif
2266  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2267 #if OMPT_SUPPORT && OMPT_OPTIONAL
2268  if (ompt_enabled.enabled) {
2269  ompt_frame->enter_frame = ompt_data_none;
2270  }
2271 #endif
2272  }
2273 }
2274 
2275 /* --------------------------------------------------------------------------*/
2292 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2293  void **data_ptr;
2294 
2295  KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2296 
2297  KMP_MB();
2298 
2299  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2300 
2301  if (__kmp_env_consistency_check) {
2302  if (loc == 0) {
2303  KMP_WARNING(ConstructIdentInvalid);
2304  }
2305  }
2306 
2307  // ToDo: Optimize the following barrier
2308 
2309  if (cpy_data)
2310  *data_ptr = cpy_data;
2311 
2312 #if OMPT_SUPPORT
2313  ompt_frame_t *ompt_frame;
2314  if (ompt_enabled.enabled) {
2315  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2316  if (ompt_frame->enter_frame.ptr == NULL)
2317  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2318  OMPT_STORE_RETURN_ADDRESS(gtid);
2319  }
2320 #endif
2321 /* This barrier is not a barrier region boundary */
2322 #if USE_ITT_NOTIFY
2323  __kmp_threads[gtid]->th.th_ident = loc;
2324 #endif
2325  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2326 
2327  return *data_ptr;
2328 }
2329 
2330 /* -------------------------------------------------------------------------- */
2331 
2332 #define INIT_LOCK __kmp_init_user_lock_with_checks
2333 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2334 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2335 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2336 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2337 #define ACQUIRE_NESTED_LOCK_TIMED \
2338  __kmp_acquire_nested_user_lock_with_checks_timed
2339 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2340 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2341 #define TEST_LOCK __kmp_test_user_lock_with_checks
2342 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2343 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2344 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2345 
2346 // TODO: Make check abort messages use location info & pass it into
2347 // with_checks routines
2348 
2349 #if KMP_USE_DYNAMIC_LOCK
2350 
2351 // internal lock initializer
2352 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2353  kmp_dyna_lockseq_t seq) {
2354  if (KMP_IS_D_LOCK(seq)) {
2355  KMP_INIT_D_LOCK(lock, seq);
2356 #if USE_ITT_BUILD
2357  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2358 #endif
2359  } else {
2360  KMP_INIT_I_LOCK(lock, seq);
2361 #if USE_ITT_BUILD
2362  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2363  __kmp_itt_lock_creating(ilk->lock, loc);
2364 #endif
2365  }
2366 }
2367 
2368 // internal nest lock initializer
2369 static __forceinline void
2370 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2371  kmp_dyna_lockseq_t seq) {
2372 #if KMP_USE_TSX
2373  // Don't have nested lock implementation for speculative locks
2374  if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2375  seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2376  seq = __kmp_user_lock_seq;
2377 #endif
2378  switch (seq) {
2379  case lockseq_tas:
2380  seq = lockseq_nested_tas;
2381  break;
2382 #if KMP_USE_FUTEX
2383  case lockseq_futex:
2384  seq = lockseq_nested_futex;
2385  break;
2386 #endif
2387  case lockseq_ticket:
2388  seq = lockseq_nested_ticket;
2389  break;
2390  case lockseq_queuing:
2391  seq = lockseq_nested_queuing;
2392  break;
2393  case lockseq_drdpa:
2394  seq = lockseq_nested_drdpa;
2395  break;
2396  default:
2397  seq = lockseq_nested_queuing;
2398  }
2399  KMP_INIT_I_LOCK(lock, seq);
2400 #if USE_ITT_BUILD
2401  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2402  __kmp_itt_lock_creating(ilk->lock, loc);
2403 #endif
2404 }
2405 
2406 /* initialize the lock with a hint */
2407 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2408  uintptr_t hint) {
2409  KMP_DEBUG_ASSERT(__kmp_init_serial);
2410  if (__kmp_env_consistency_check && user_lock == NULL) {
2411  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2412  }
2413 
2414  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2415 
2416 #if OMPT_SUPPORT && OMPT_OPTIONAL
2417  // This is the case, if called from omp_init_lock_with_hint:
2418  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2419  if (!codeptr)
2420  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2421  if (ompt_enabled.ompt_callback_lock_init) {
2422  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2423  ompt_mutex_lock, (omp_lock_hint_t)hint,
2424  __ompt_get_mutex_impl_type(user_lock),
2425  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2426  }
2427 #endif
2428 }
2429 
2430 /* initialize the lock with a hint */
2431 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2432  void **user_lock, uintptr_t hint) {
2433  KMP_DEBUG_ASSERT(__kmp_init_serial);
2434  if (__kmp_env_consistency_check && user_lock == NULL) {
2435  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2436  }
2437 
2438  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2439 
2440 #if OMPT_SUPPORT && OMPT_OPTIONAL
2441  // This is the case, if called from omp_init_lock_with_hint:
2442  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2443  if (!codeptr)
2444  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2445  if (ompt_enabled.ompt_callback_lock_init) {
2446  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2447  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2448  __ompt_get_mutex_impl_type(user_lock),
2449  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2450  }
2451 #endif
2452 }
2453 
2454 #endif // KMP_USE_DYNAMIC_LOCK
2455 
2456 /* initialize the lock */
2457 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2458 #if KMP_USE_DYNAMIC_LOCK
2459 
2460  KMP_DEBUG_ASSERT(__kmp_init_serial);
2461  if (__kmp_env_consistency_check && user_lock == NULL) {
2462  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2463  }
2464  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2465 
2466 #if OMPT_SUPPORT && OMPT_OPTIONAL
2467  // This is the case, if called from omp_init_lock_with_hint:
2468  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2469  if (!codeptr)
2470  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2471  if (ompt_enabled.ompt_callback_lock_init) {
2472  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2473  ompt_mutex_lock, omp_lock_hint_none,
2474  __ompt_get_mutex_impl_type(user_lock),
2475  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2476  }
2477 #endif
2478 
2479 #else // KMP_USE_DYNAMIC_LOCK
2480 
2481  static char const *const func = "omp_init_lock";
2482  kmp_user_lock_p lck;
2483  KMP_DEBUG_ASSERT(__kmp_init_serial);
2484 
2485  if (__kmp_env_consistency_check) {
2486  if (user_lock == NULL) {
2487  KMP_FATAL(LockIsUninitialized, func);
2488  }
2489  }
2490 
2491  KMP_CHECK_USER_LOCK_INIT();
2492 
2493  if ((__kmp_user_lock_kind == lk_tas) &&
2494  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2495  lck = (kmp_user_lock_p)user_lock;
2496  }
2497 #if KMP_USE_FUTEX
2498  else if ((__kmp_user_lock_kind == lk_futex) &&
2499  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2500  lck = (kmp_user_lock_p)user_lock;
2501  }
2502 #endif
2503  else {
2504  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2505  }
2506  INIT_LOCK(lck);
2507  __kmp_set_user_lock_location(lck, loc);
2508 
2509 #if OMPT_SUPPORT && OMPT_OPTIONAL
2510  // This is the case, if called from omp_init_lock_with_hint:
2511  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2512  if (!codeptr)
2513  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2514  if (ompt_enabled.ompt_callback_lock_init) {
2515  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2516  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2517  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2518  }
2519 #endif
2520 
2521 #if USE_ITT_BUILD
2522  __kmp_itt_lock_creating(lck);
2523 #endif /* USE_ITT_BUILD */
2524 
2525 #endif // KMP_USE_DYNAMIC_LOCK
2526 } // __kmpc_init_lock
2527 
2528 /* initialize the lock */
2529 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2530 #if KMP_USE_DYNAMIC_LOCK
2531 
2532  KMP_DEBUG_ASSERT(__kmp_init_serial);
2533  if (__kmp_env_consistency_check && user_lock == NULL) {
2534  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2535  }
2536  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2537 
2538 #if OMPT_SUPPORT && OMPT_OPTIONAL
2539  // This is the case, if called from omp_init_lock_with_hint:
2540  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2541  if (!codeptr)
2542  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2543  if (ompt_enabled.ompt_callback_lock_init) {
2544  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2545  ompt_mutex_nest_lock, omp_lock_hint_none,
2546  __ompt_get_mutex_impl_type(user_lock),
2547  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2548  }
2549 #endif
2550 
2551 #else // KMP_USE_DYNAMIC_LOCK
2552 
2553  static char const *const func = "omp_init_nest_lock";
2554  kmp_user_lock_p lck;
2555  KMP_DEBUG_ASSERT(__kmp_init_serial);
2556 
2557  if (__kmp_env_consistency_check) {
2558  if (user_lock == NULL) {
2559  KMP_FATAL(LockIsUninitialized, func);
2560  }
2561  }
2562 
2563  KMP_CHECK_USER_LOCK_INIT();
2564 
2565  if ((__kmp_user_lock_kind == lk_tas) &&
2566  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2567  OMP_NEST_LOCK_T_SIZE)) {
2568  lck = (kmp_user_lock_p)user_lock;
2569  }
2570 #if KMP_USE_FUTEX
2571  else if ((__kmp_user_lock_kind == lk_futex) &&
2572  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2573  OMP_NEST_LOCK_T_SIZE)) {
2574  lck = (kmp_user_lock_p)user_lock;
2575  }
2576 #endif
2577  else {
2578  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2579  }
2580 
2581  INIT_NESTED_LOCK(lck);
2582  __kmp_set_user_lock_location(lck, loc);
2583 
2584 #if OMPT_SUPPORT && OMPT_OPTIONAL
2585  // This is the case, if called from omp_init_lock_with_hint:
2586  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2587  if (!codeptr)
2588  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2589  if (ompt_enabled.ompt_callback_lock_init) {
2590  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2591  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2592  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2593  }
2594 #endif
2595 
2596 #if USE_ITT_BUILD
2597  __kmp_itt_lock_creating(lck);
2598 #endif /* USE_ITT_BUILD */
2599 
2600 #endif // KMP_USE_DYNAMIC_LOCK
2601 } // __kmpc_init_nest_lock
2602 
2603 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2604 #if KMP_USE_DYNAMIC_LOCK
2605 
2606 #if USE_ITT_BUILD
2607  kmp_user_lock_p lck;
2608  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2609  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2610  } else {
2611  lck = (kmp_user_lock_p)user_lock;
2612  }
2613  __kmp_itt_lock_destroyed(lck);
2614 #endif
2615 #if OMPT_SUPPORT && OMPT_OPTIONAL
2616  // This is the case, if called from omp_init_lock_with_hint:
2617  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2618  if (!codeptr)
2619  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2620  if (ompt_enabled.ompt_callback_lock_destroy) {
2621  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2622  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2623  }
2624 #endif
2625  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2626 #else
2627  kmp_user_lock_p lck;
2628 
2629  if ((__kmp_user_lock_kind == lk_tas) &&
2630  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2631  lck = (kmp_user_lock_p)user_lock;
2632  }
2633 #if KMP_USE_FUTEX
2634  else if ((__kmp_user_lock_kind == lk_futex) &&
2635  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2636  lck = (kmp_user_lock_p)user_lock;
2637  }
2638 #endif
2639  else {
2640  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2641  }
2642 
2643 #if OMPT_SUPPORT && OMPT_OPTIONAL
2644  // This is the case, if called from omp_init_lock_with_hint:
2645  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2646  if (!codeptr)
2647  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2648  if (ompt_enabled.ompt_callback_lock_destroy) {
2649  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2650  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2651  }
2652 #endif
2653 
2654 #if USE_ITT_BUILD
2655  __kmp_itt_lock_destroyed(lck);
2656 #endif /* USE_ITT_BUILD */
2657  DESTROY_LOCK(lck);
2658 
2659  if ((__kmp_user_lock_kind == lk_tas) &&
2660  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2661  ;
2662  }
2663 #if KMP_USE_FUTEX
2664  else if ((__kmp_user_lock_kind == lk_futex) &&
2665  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2666  ;
2667  }
2668 #endif
2669  else {
2670  __kmp_user_lock_free(user_lock, gtid, lck);
2671  }
2672 #endif // KMP_USE_DYNAMIC_LOCK
2673 } // __kmpc_destroy_lock
2674 
2675 /* destroy the lock */
2676 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2677 #if KMP_USE_DYNAMIC_LOCK
2678 
2679 #if USE_ITT_BUILD
2680  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2681  __kmp_itt_lock_destroyed(ilk->lock);
2682 #endif
2683 #if OMPT_SUPPORT && OMPT_OPTIONAL
2684  // This is the case, if called from omp_init_lock_with_hint:
2685  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2686  if (!codeptr)
2687  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2688  if (ompt_enabled.ompt_callback_lock_destroy) {
2689  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2690  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2691  }
2692 #endif
2693  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2694 
2695 #else // KMP_USE_DYNAMIC_LOCK
2696 
2697  kmp_user_lock_p lck;
2698 
2699  if ((__kmp_user_lock_kind == lk_tas) &&
2700  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2701  OMP_NEST_LOCK_T_SIZE)) {
2702  lck = (kmp_user_lock_p)user_lock;
2703  }
2704 #if KMP_USE_FUTEX
2705  else if ((__kmp_user_lock_kind == lk_futex) &&
2706  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2707  OMP_NEST_LOCK_T_SIZE)) {
2708  lck = (kmp_user_lock_p)user_lock;
2709  }
2710 #endif
2711  else {
2712  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2713  }
2714 
2715 #if OMPT_SUPPORT && OMPT_OPTIONAL
2716  // This is the case, if called from omp_init_lock_with_hint:
2717  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2718  if (!codeptr)
2719  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2720  if (ompt_enabled.ompt_callback_lock_destroy) {
2721  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2722  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2723  }
2724 #endif
2725 
2726 #if USE_ITT_BUILD
2727  __kmp_itt_lock_destroyed(lck);
2728 #endif /* USE_ITT_BUILD */
2729 
2730  DESTROY_NESTED_LOCK(lck);
2731 
2732  if ((__kmp_user_lock_kind == lk_tas) &&
2733  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2734  OMP_NEST_LOCK_T_SIZE)) {
2735  ;
2736  }
2737 #if KMP_USE_FUTEX
2738  else if ((__kmp_user_lock_kind == lk_futex) &&
2739  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2740  OMP_NEST_LOCK_T_SIZE)) {
2741  ;
2742  }
2743 #endif
2744  else {
2745  __kmp_user_lock_free(user_lock, gtid, lck);
2746  }
2747 #endif // KMP_USE_DYNAMIC_LOCK
2748 } // __kmpc_destroy_nest_lock
2749 
2750 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2751  KMP_COUNT_BLOCK(OMP_set_lock);
2752 #if KMP_USE_DYNAMIC_LOCK
2753  int tag = KMP_EXTRACT_D_TAG(user_lock);
2754 #if USE_ITT_BUILD
2755  __kmp_itt_lock_acquiring(
2756  (kmp_user_lock_p)
2757  user_lock); // itt function will get to the right lock object.
2758 #endif
2759 #if OMPT_SUPPORT && OMPT_OPTIONAL
2760  // This is the case, if called from omp_init_lock_with_hint:
2761  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2762  if (!codeptr)
2763  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2764  if (ompt_enabled.ompt_callback_mutex_acquire) {
2765  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2766  ompt_mutex_lock, omp_lock_hint_none,
2767  __ompt_get_mutex_impl_type(user_lock),
2768  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2769  }
2770 #endif
2771 #if KMP_USE_INLINED_TAS
2772  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2773  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2774  } else
2775 #elif KMP_USE_INLINED_FUTEX
2776  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2777  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2778  } else
2779 #endif
2780  {
2781  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2782  }
2783 #if USE_ITT_BUILD
2784  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2785 #endif
2786 #if OMPT_SUPPORT && OMPT_OPTIONAL
2787  if (ompt_enabled.ompt_callback_mutex_acquired) {
2788  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2789  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2790  }
2791 #endif
2792 
2793 #else // KMP_USE_DYNAMIC_LOCK
2794 
2795  kmp_user_lock_p lck;
2796 
2797  if ((__kmp_user_lock_kind == lk_tas) &&
2798  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2799  lck = (kmp_user_lock_p)user_lock;
2800  }
2801 #if KMP_USE_FUTEX
2802  else if ((__kmp_user_lock_kind == lk_futex) &&
2803  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2804  lck = (kmp_user_lock_p)user_lock;
2805  }
2806 #endif
2807  else {
2808  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2809  }
2810 
2811 #if USE_ITT_BUILD
2812  __kmp_itt_lock_acquiring(lck);
2813 #endif /* USE_ITT_BUILD */
2814 #if OMPT_SUPPORT && OMPT_OPTIONAL
2815  // This is the case, if called from omp_init_lock_with_hint:
2816  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2817  if (!codeptr)
2818  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2819  if (ompt_enabled.ompt_callback_mutex_acquire) {
2820  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2821  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2822  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2823  }
2824 #endif
2825 
2826  ACQUIRE_LOCK(lck, gtid);
2827 
2828 #if USE_ITT_BUILD
2829  __kmp_itt_lock_acquired(lck);
2830 #endif /* USE_ITT_BUILD */
2831 
2832 #if OMPT_SUPPORT && OMPT_OPTIONAL
2833  if (ompt_enabled.ompt_callback_mutex_acquired) {
2834  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2835  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2836  }
2837 #endif
2838 
2839 #endif // KMP_USE_DYNAMIC_LOCK
2840 }
2841 
2842 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2843 #if KMP_USE_DYNAMIC_LOCK
2844 
2845 #if USE_ITT_BUILD
2846  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2847 #endif
2848 #if OMPT_SUPPORT && OMPT_OPTIONAL
2849  // This is the case, if called from omp_init_lock_with_hint:
2850  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2851  if (!codeptr)
2852  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2853  if (ompt_enabled.enabled) {
2854  if (ompt_enabled.ompt_callback_mutex_acquire) {
2855  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2856  ompt_mutex_nest_lock, omp_lock_hint_none,
2857  __ompt_get_mutex_impl_type(user_lock),
2858  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2859  }
2860  }
2861 #endif
2862  int acquire_status =
2863  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2864  (void)acquire_status;
2865 #if USE_ITT_BUILD
2866  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2867 #endif
2868 
2869 #if OMPT_SUPPORT && OMPT_OPTIONAL
2870  if (ompt_enabled.enabled) {
2871  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2872  if (ompt_enabled.ompt_callback_mutex_acquired) {
2873  // lock_first
2874  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2875  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2876  codeptr);
2877  }
2878  } else {
2879  if (ompt_enabled.ompt_callback_nest_lock) {
2880  // lock_next
2881  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2882  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2883  }
2884  }
2885  }
2886 #endif
2887 
2888 #else // KMP_USE_DYNAMIC_LOCK
2889  int acquire_status;
2890  kmp_user_lock_p lck;
2891 
2892  if ((__kmp_user_lock_kind == lk_tas) &&
2893  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2894  OMP_NEST_LOCK_T_SIZE)) {
2895  lck = (kmp_user_lock_p)user_lock;
2896  }
2897 #if KMP_USE_FUTEX
2898  else if ((__kmp_user_lock_kind == lk_futex) &&
2899  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2900  OMP_NEST_LOCK_T_SIZE)) {
2901  lck = (kmp_user_lock_p)user_lock;
2902  }
2903 #endif
2904  else {
2905  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2906  }
2907 
2908 #if USE_ITT_BUILD
2909  __kmp_itt_lock_acquiring(lck);
2910 #endif /* USE_ITT_BUILD */
2911 #if OMPT_SUPPORT && OMPT_OPTIONAL
2912  // This is the case, if called from omp_init_lock_with_hint:
2913  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2914  if (!codeptr)
2915  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2916  if (ompt_enabled.enabled) {
2917  if (ompt_enabled.ompt_callback_mutex_acquire) {
2918  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2919  ompt_mutex_nest_lock, omp_lock_hint_none,
2920  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2921  codeptr);
2922  }
2923  }
2924 #endif
2925 
2926  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2927 
2928 #if USE_ITT_BUILD
2929  __kmp_itt_lock_acquired(lck);
2930 #endif /* USE_ITT_BUILD */
2931 
2932 #if OMPT_SUPPORT && OMPT_OPTIONAL
2933  if (ompt_enabled.enabled) {
2934  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2935  if (ompt_enabled.ompt_callback_mutex_acquired) {
2936  // lock_first
2937  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2938  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2939  }
2940  } else {
2941  if (ompt_enabled.ompt_callback_nest_lock) {
2942  // lock_next
2943  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2944  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2945  }
2946  }
2947  }
2948 #endif
2949 
2950 #endif // KMP_USE_DYNAMIC_LOCK
2951 }
2952 
2953 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2954 #if KMP_USE_DYNAMIC_LOCK
2955 
2956  int tag = KMP_EXTRACT_D_TAG(user_lock);
2957 #if USE_ITT_BUILD
2958  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2959 #endif
2960 #if KMP_USE_INLINED_TAS
2961  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2962  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2963  } else
2964 #elif KMP_USE_INLINED_FUTEX
2965  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2966  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2967  } else
2968 #endif
2969  {
2970  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2971  }
2972 
2973 #if OMPT_SUPPORT && OMPT_OPTIONAL
2974  // This is the case, if called from omp_init_lock_with_hint:
2975  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2976  if (!codeptr)
2977  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2978  if (ompt_enabled.ompt_callback_mutex_released) {
2979  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2980  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2981  }
2982 #endif
2983 
2984 #else // KMP_USE_DYNAMIC_LOCK
2985 
2986  kmp_user_lock_p lck;
2987 
2988  /* Can't use serial interval since not block structured */
2989  /* release the lock */
2990 
2991  if ((__kmp_user_lock_kind == lk_tas) &&
2992  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2993 #if KMP_OS_LINUX && \
2994  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2995 // "fast" path implemented to fix customer performance issue
2996 #if USE_ITT_BUILD
2997  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2998 #endif /* USE_ITT_BUILD */
2999  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3000  KMP_MB();
3001 
3002 #if OMPT_SUPPORT && OMPT_OPTIONAL
3003  // This is the case, if called from omp_init_lock_with_hint:
3004  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3005  if (!codeptr)
3006  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3007  if (ompt_enabled.ompt_callback_mutex_released) {
3008  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3009  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3010  }
3011 #endif
3012 
3013  return;
3014 #else
3015  lck = (kmp_user_lock_p)user_lock;
3016 #endif
3017  }
3018 #if KMP_USE_FUTEX
3019  else if ((__kmp_user_lock_kind == lk_futex) &&
3020  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3021  lck = (kmp_user_lock_p)user_lock;
3022  }
3023 #endif
3024  else {
3025  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3026  }
3027 
3028 #if USE_ITT_BUILD
3029  __kmp_itt_lock_releasing(lck);
3030 #endif /* USE_ITT_BUILD */
3031 
3032  RELEASE_LOCK(lck, gtid);
3033 
3034 #if OMPT_SUPPORT && OMPT_OPTIONAL
3035  // This is the case, if called from omp_init_lock_with_hint:
3036  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3037  if (!codeptr)
3038  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3039  if (ompt_enabled.ompt_callback_mutex_released) {
3040  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3041  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3042  }
3043 #endif
3044 
3045 #endif // KMP_USE_DYNAMIC_LOCK
3046 }
3047 
3048 /* release the lock */
3049 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3050 #if KMP_USE_DYNAMIC_LOCK
3051 
3052 #if USE_ITT_BUILD
3053  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3054 #endif
3055  int release_status =
3056  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3057  (void)release_status;
3058 
3059 #if OMPT_SUPPORT && OMPT_OPTIONAL
3060  // This is the case, if called from omp_init_lock_with_hint:
3061  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3062  if (!codeptr)
3063  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3064  if (ompt_enabled.enabled) {
3065  if (release_status == KMP_LOCK_RELEASED) {
3066  if (ompt_enabled.ompt_callback_mutex_released) {
3067  // release_lock_last
3068  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3069  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3070  codeptr);
3071  }
3072  } else if (ompt_enabled.ompt_callback_nest_lock) {
3073  // release_lock_prev
3074  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3075  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3076  }
3077  }
3078 #endif
3079 
3080 #else // KMP_USE_DYNAMIC_LOCK
3081 
3082  kmp_user_lock_p lck;
3083 
3084  /* Can't use serial interval since not block structured */
3085 
3086  if ((__kmp_user_lock_kind == lk_tas) &&
3087  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3088  OMP_NEST_LOCK_T_SIZE)) {
3089 #if KMP_OS_LINUX && \
3090  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3091  // "fast" path implemented to fix customer performance issue
3092  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3093 #if USE_ITT_BUILD
3094  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3095 #endif /* USE_ITT_BUILD */
3096 
3097 #if OMPT_SUPPORT && OMPT_OPTIONAL
3098  int release_status = KMP_LOCK_STILL_HELD;
3099 #endif
3100 
3101  if (--(tl->lk.depth_locked) == 0) {
3102  TCW_4(tl->lk.poll, 0);
3103 #if OMPT_SUPPORT && OMPT_OPTIONAL
3104  release_status = KMP_LOCK_RELEASED;
3105 #endif
3106  }
3107  KMP_MB();
3108 
3109 #if OMPT_SUPPORT && OMPT_OPTIONAL
3110  // This is the case, if called from omp_init_lock_with_hint:
3111  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3112  if (!codeptr)
3113  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3114  if (ompt_enabled.enabled) {
3115  if (release_status == KMP_LOCK_RELEASED) {
3116  if (ompt_enabled.ompt_callback_mutex_released) {
3117  // release_lock_last
3118  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3119  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3120  }
3121  } else if (ompt_enabled.ompt_callback_nest_lock) {
3122  // release_lock_previous
3123  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3124  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3125  }
3126  }
3127 #endif
3128 
3129  return;
3130 #else
3131  lck = (kmp_user_lock_p)user_lock;
3132 #endif
3133  }
3134 #if KMP_USE_FUTEX
3135  else if ((__kmp_user_lock_kind == lk_futex) &&
3136  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3137  OMP_NEST_LOCK_T_SIZE)) {
3138  lck = (kmp_user_lock_p)user_lock;
3139  }
3140 #endif
3141  else {
3142  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3143  }
3144 
3145 #if USE_ITT_BUILD
3146  __kmp_itt_lock_releasing(lck);
3147 #endif /* USE_ITT_BUILD */
3148 
3149  int release_status;
3150  release_status = RELEASE_NESTED_LOCK(lck, gtid);
3151 #if OMPT_SUPPORT && OMPT_OPTIONAL
3152  // This is the case, if called from omp_init_lock_with_hint:
3153  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3154  if (!codeptr)
3155  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3156  if (ompt_enabled.enabled) {
3157  if (release_status == KMP_LOCK_RELEASED) {
3158  if (ompt_enabled.ompt_callback_mutex_released) {
3159  // release_lock_last
3160  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3161  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3162  }
3163  } else if (ompt_enabled.ompt_callback_nest_lock) {
3164  // release_lock_previous
3165  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3166  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3167  }
3168  }
3169 #endif
3170 
3171 #endif // KMP_USE_DYNAMIC_LOCK
3172 }
3173 
3174 /* try to acquire the lock */
3175 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3176  KMP_COUNT_BLOCK(OMP_test_lock);
3177 
3178 #if KMP_USE_DYNAMIC_LOCK
3179  int rc;
3180  int tag = KMP_EXTRACT_D_TAG(user_lock);
3181 #if USE_ITT_BUILD
3182  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3183 #endif
3184 #if OMPT_SUPPORT && OMPT_OPTIONAL
3185  // This is the case, if called from omp_init_lock_with_hint:
3186  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3187  if (!codeptr)
3188  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3189  if (ompt_enabled.ompt_callback_mutex_acquire) {
3190  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3191  ompt_mutex_test_lock, omp_lock_hint_none,
3192  __ompt_get_mutex_impl_type(user_lock),
3193  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3194  }
3195 #endif
3196 #if KMP_USE_INLINED_TAS
3197  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3198  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3199  } else
3200 #elif KMP_USE_INLINED_FUTEX
3201  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3202  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3203  } else
3204 #endif
3205  {
3206  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3207  }
3208  if (rc) {
3209 #if USE_ITT_BUILD
3210  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3211 #endif
3212 #if OMPT_SUPPORT && OMPT_OPTIONAL
3213  if (ompt_enabled.ompt_callback_mutex_acquired) {
3214  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3215  ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3216  }
3217 #endif
3218  return FTN_TRUE;
3219  } else {
3220 #if USE_ITT_BUILD
3221  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3222 #endif
3223  return FTN_FALSE;
3224  }
3225 
3226 #else // KMP_USE_DYNAMIC_LOCK
3227 
3228  kmp_user_lock_p lck;
3229  int rc;
3230 
3231  if ((__kmp_user_lock_kind == lk_tas) &&
3232  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3233  lck = (kmp_user_lock_p)user_lock;
3234  }
3235 #if KMP_USE_FUTEX
3236  else if ((__kmp_user_lock_kind == lk_futex) &&
3237  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3238  lck = (kmp_user_lock_p)user_lock;
3239  }
3240 #endif
3241  else {
3242  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3243  }
3244 
3245 #if USE_ITT_BUILD
3246  __kmp_itt_lock_acquiring(lck);
3247 #endif /* USE_ITT_BUILD */
3248 #if OMPT_SUPPORT && OMPT_OPTIONAL
3249  // This is the case, if called from omp_init_lock_with_hint:
3250  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3251  if (!codeptr)
3252  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3253  if (ompt_enabled.ompt_callback_mutex_acquire) {
3254  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3255  ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3256  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3257  }
3258 #endif
3259 
3260  rc = TEST_LOCK(lck, gtid);
3261 #if USE_ITT_BUILD
3262  if (rc) {
3263  __kmp_itt_lock_acquired(lck);
3264  } else {
3265  __kmp_itt_lock_cancelled(lck);
3266  }
3267 #endif /* USE_ITT_BUILD */
3268 #if OMPT_SUPPORT && OMPT_OPTIONAL
3269  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3270  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3271  ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3272  }
3273 #endif
3274 
3275  return (rc ? FTN_TRUE : FTN_FALSE);
3276 
3277  /* Can't use serial interval since not block structured */
3278 
3279 #endif // KMP_USE_DYNAMIC_LOCK
3280 }
3281 
3282 /* try to acquire the lock */
3283 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3284 #if KMP_USE_DYNAMIC_LOCK
3285  int rc;
3286 #if USE_ITT_BUILD
3287  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3288 #endif
3289 #if OMPT_SUPPORT && OMPT_OPTIONAL
3290  // This is the case, if called from omp_init_lock_with_hint:
3291  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3292  if (!codeptr)
3293  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3294  if (ompt_enabled.ompt_callback_mutex_acquire) {
3295  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3296  ompt_mutex_test_nest_lock, omp_lock_hint_none,
3297  __ompt_get_mutex_impl_type(user_lock),
3298  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3299  }
3300 #endif
3301  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3302 #if USE_ITT_BUILD
3303  if (rc) {
3304  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3305  } else {
3306  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3307  }
3308 #endif
3309 #if OMPT_SUPPORT && OMPT_OPTIONAL
3310  if (ompt_enabled.enabled && rc) {
3311  if (rc == 1) {
3312  if (ompt_enabled.ompt_callback_mutex_acquired) {
3313  // lock_first
3314  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3315  ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3316  codeptr);
3317  }
3318  } else {
3319  if (ompt_enabled.ompt_callback_nest_lock) {
3320  // lock_next
3321  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3322  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3323  }
3324  }
3325  }
3326 #endif
3327  return rc;
3328 
3329 #else // KMP_USE_DYNAMIC_LOCK
3330 
3331  kmp_user_lock_p lck;
3332  int rc;
3333 
3334  if ((__kmp_user_lock_kind == lk_tas) &&
3335  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3336  OMP_NEST_LOCK_T_SIZE)) {
3337  lck = (kmp_user_lock_p)user_lock;
3338  }
3339 #if KMP_USE_FUTEX
3340  else if ((__kmp_user_lock_kind == lk_futex) &&
3341  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3342  OMP_NEST_LOCK_T_SIZE)) {
3343  lck = (kmp_user_lock_p)user_lock;
3344  }
3345 #endif
3346  else {
3347  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3348  }
3349 
3350 #if USE_ITT_BUILD
3351  __kmp_itt_lock_acquiring(lck);
3352 #endif /* USE_ITT_BUILD */
3353 
3354 #if OMPT_SUPPORT && OMPT_OPTIONAL
3355  // This is the case, if called from omp_init_lock_with_hint:
3356  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3357  if (!codeptr)
3358  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3359  if (ompt_enabled.enabled) &&
3360  ompt_enabled.ompt_callback_mutex_acquire) {
3361  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3362  ompt_mutex_test_nest_lock, omp_lock_hint_none,
3363  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3364  codeptr);
3365  }
3366 #endif
3367 
3368  rc = TEST_NESTED_LOCK(lck, gtid);
3369 #if USE_ITT_BUILD
3370  if (rc) {
3371  __kmp_itt_lock_acquired(lck);
3372  } else {
3373  __kmp_itt_lock_cancelled(lck);
3374  }
3375 #endif /* USE_ITT_BUILD */
3376 #if OMPT_SUPPORT && OMPT_OPTIONAL
3377  if (ompt_enabled.enabled && rc) {
3378  if (rc == 1) {
3379  if (ompt_enabled.ompt_callback_mutex_acquired) {
3380  // lock_first
3381  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3382  ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3383  }
3384  } else {
3385  if (ompt_enabled.ompt_callback_nest_lock) {
3386  // lock_next
3387  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3388  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3389  }
3390  }
3391  }
3392 #endif
3393  return rc;
3394 
3395  /* Can't use serial interval since not block structured */
3396 
3397 #endif // KMP_USE_DYNAMIC_LOCK
3398 }
3399 
3400 // Interface to fast scalable reduce methods routines
3401 
3402 // keep the selected method in a thread local structure for cross-function
3403 // usage: will be used in __kmpc_end_reduce* functions;
3404 // another solution: to re-determine the method one more time in
3405 // __kmpc_end_reduce* functions (new prototype required then)
3406 // AT: which solution is better?
3407 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3408  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3409 
3410 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3411  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3412 
3413 // description of the packed_reduction_method variable: look at the macros in
3414 // kmp.h
3415 
3416 // used in a critical section reduce block
3417 static __forceinline void
3418 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3419  kmp_critical_name *crit) {
3420 
3421  // this lock was visible to a customer and to the threading profile tool as a
3422  // serial overhead span (although it's used for an internal purpose only)
3423  // why was it visible in previous implementation?
3424  // should we keep it visible in new reduce block?
3425  kmp_user_lock_p lck;
3426 
3427 #if KMP_USE_DYNAMIC_LOCK
3428 
3429  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3430  // Check if it is initialized.
3431  if (*lk == 0) {
3432  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3433  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3434  KMP_GET_D_TAG(__kmp_user_lock_seq));
3435  } else {
3436  __kmp_init_indirect_csptr(crit, loc, global_tid,
3437  KMP_GET_I_TAG(__kmp_user_lock_seq));
3438  }
3439  }
3440  // Branch for accessing the actual lock object and set operation. This
3441  // branching is inevitable since this lock initialization does not follow the
3442  // normal dispatch path (lock table is not used).
3443  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3444  lck = (kmp_user_lock_p)lk;
3445  KMP_DEBUG_ASSERT(lck != NULL);
3446  if (__kmp_env_consistency_check) {
3447  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3448  }
3449  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3450  } else {
3451  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3452  lck = ilk->lock;
3453  KMP_DEBUG_ASSERT(lck != NULL);
3454  if (__kmp_env_consistency_check) {
3455  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3456  }
3457  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3458  }
3459 
3460 #else // KMP_USE_DYNAMIC_LOCK
3461 
3462  // We know that the fast reduction code is only emitted by Intel compilers
3463  // with 32 byte critical sections. If there isn't enough space, then we
3464  // have to use a pointer.
3465  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3466  lck = (kmp_user_lock_p)crit;
3467  } else {
3468  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3469  }
3470  KMP_DEBUG_ASSERT(lck != NULL);
3471 
3472  if (__kmp_env_consistency_check)
3473  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3474 
3475  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3476 
3477 #endif // KMP_USE_DYNAMIC_LOCK
3478 }
3479 
3480 // used in a critical section reduce block
3481 static __forceinline void
3482 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3483  kmp_critical_name *crit) {
3484 
3485  kmp_user_lock_p lck;
3486 
3487 #if KMP_USE_DYNAMIC_LOCK
3488 
3489  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3490  lck = (kmp_user_lock_p)crit;
3491  if (__kmp_env_consistency_check)
3492  __kmp_pop_sync(global_tid, ct_critical, loc);
3493  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3494  } else {
3495  kmp_indirect_lock_t *ilk =
3496  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3497  if (__kmp_env_consistency_check)
3498  __kmp_pop_sync(global_tid, ct_critical, loc);
3499  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3500  }
3501 
3502 #else // KMP_USE_DYNAMIC_LOCK
3503 
3504  // We know that the fast reduction code is only emitted by Intel compilers
3505  // with 32 byte critical sections. If there isn't enough space, then we have
3506  // to use a pointer.
3507  if (__kmp_base_user_lock_size > 32) {
3508  lck = *((kmp_user_lock_p *)crit);
3509  KMP_ASSERT(lck != NULL);
3510  } else {
3511  lck = (kmp_user_lock_p)crit;
3512  }
3513 
3514  if (__kmp_env_consistency_check)
3515  __kmp_pop_sync(global_tid, ct_critical, loc);
3516 
3517  __kmp_release_user_lock_with_checks(lck, global_tid);
3518 
3519 #endif // KMP_USE_DYNAMIC_LOCK
3520 } // __kmp_end_critical_section_reduce_block
3521 
3522 static __forceinline int
3523 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3524  int *task_state) {
3525  kmp_team_t *team;
3526 
3527  // Check if we are inside the teams construct?
3528  if (th->th.th_teams_microtask) {
3529  *team_p = team = th->th.th_team;
3530  if (team->t.t_level == th->th.th_teams_level) {
3531  // This is reduction at teams construct.
3532  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3533  // Let's swap teams temporarily for the reduction.
3534  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3535  th->th.th_team = team->t.t_parent;
3536  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3537  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3538  *task_state = th->th.th_task_state;
3539  th->th.th_task_state = 0;
3540 
3541  return 1;
3542  }
3543  }
3544  return 0;
3545 }
3546 
3547 static __forceinline void
3548 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3549  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3550  th->th.th_info.ds.ds_tid = 0;
3551  th->th.th_team = team;
3552  th->th.th_team_nproc = team->t.t_nproc;
3553  th->th.th_task_team = team->t.t_task_team[task_state];
3554  __kmp_type_convert(task_state, &(th->th.th_task_state));
3555 }
3556 
3557 /* 2.a.i. Reduce Block without a terminating barrier */
3573 kmp_int32
3574 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3575  size_t reduce_size, void *reduce_data,
3576  void (*reduce_func)(void *lhs_data, void *rhs_data),
3577  kmp_critical_name *lck) {
3578 
3579  KMP_COUNT_BLOCK(REDUCE_nowait);
3580  int retval = 0;
3581  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3582  kmp_info_t *th;
3583  kmp_team_t *team;
3584  int teams_swapped = 0, task_state;
3585  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3586  __kmp_assert_valid_gtid(global_tid);
3587 
3588  // why do we need this initialization here at all?
3589  // Reduction clause can not be used as a stand-alone directive.
3590 
3591  // do not call __kmp_serial_initialize(), it will be called by
3592  // __kmp_parallel_initialize() if needed
3593  // possible detection of false-positive race by the threadchecker ???
3594  if (!TCR_4(__kmp_init_parallel))
3595  __kmp_parallel_initialize();
3596 
3597  __kmp_resume_if_soft_paused();
3598 
3599 // check correctness of reduce block nesting
3600 #if KMP_USE_DYNAMIC_LOCK
3601  if (__kmp_env_consistency_check)
3602  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3603 #else
3604  if (__kmp_env_consistency_check)
3605  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3606 #endif
3607 
3608  th = __kmp_thread_from_gtid(global_tid);
3609  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3610 
3611  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3612  // the value should be kept in a variable
3613  // the variable should be either a construct-specific or thread-specific
3614  // property, not a team specific property
3615  // (a thread can reach the next reduce block on the next construct, reduce
3616  // method may differ on the next construct)
3617  // an ident_t "loc" parameter could be used as a construct-specific property
3618  // (what if loc == 0?)
3619  // (if both construct-specific and team-specific variables were shared,
3620  // then unness extra syncs should be needed)
3621  // a thread-specific variable is better regarding two issues above (next
3622  // construct and extra syncs)
3623  // a thread-specific "th_local.reduction_method" variable is used currently
3624  // each thread executes 'determine' and 'set' lines (no need to execute by one
3625  // thread, to avoid unness extra syncs)
3626 
3627  packed_reduction_method = __kmp_determine_reduction_method(
3628  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3629  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3630 
3631  OMPT_REDUCTION_DECL(th, global_tid);
3632  if (packed_reduction_method == critical_reduce_block) {
3633 
3634  OMPT_REDUCTION_BEGIN;
3635 
3636  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3637  retval = 1;
3638 
3639  } else if (packed_reduction_method == empty_reduce_block) {
3640 
3641  OMPT_REDUCTION_BEGIN;
3642 
3643  // usage: if team size == 1, no synchronization is required ( Intel
3644  // platforms only )
3645  retval = 1;
3646 
3647  } else if (packed_reduction_method == atomic_reduce_block) {
3648 
3649  retval = 2;
3650 
3651  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3652  // won't be called by the code gen)
3653  // (it's not quite good, because the checking block has been closed by
3654  // this 'pop',
3655  // but atomic operation has not been executed yet, will be executed
3656  // slightly later, literally on next instruction)
3657  if (__kmp_env_consistency_check)
3658  __kmp_pop_sync(global_tid, ct_reduce, loc);
3659 
3660  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3661  tree_reduce_block)) {
3662 
3663 // AT: performance issue: a real barrier here
3664 // AT: (if primary thread is slow, other threads are blocked here waiting for
3665 // the primary thread to come and release them)
3666 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3667 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3668 // be confusing to a customer)
3669 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3670 // might go faster and be more in line with sense of NOWAIT
3671 // AT: TO DO: do epcc test and compare times
3672 
3673 // this barrier should be invisible to a customer and to the threading profile
3674 // tool (it's neither a terminating barrier nor customer's code, it's
3675 // used for an internal purpose)
3676 #if OMPT_SUPPORT
3677  // JP: can this barrier potentially leed to task scheduling?
3678  // JP: as long as there is a barrier in the implementation, OMPT should and
3679  // will provide the barrier events
3680  // so we set-up the necessary frame/return addresses.
3681  ompt_frame_t *ompt_frame;
3682  if (ompt_enabled.enabled) {
3683  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3684  if (ompt_frame->enter_frame.ptr == NULL)
3685  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3686  }
3687  OMPT_STORE_RETURN_ADDRESS(global_tid);
3688 #endif
3689 #if USE_ITT_NOTIFY
3690  __kmp_threads[global_tid]->th.th_ident = loc;
3691 #endif
3692  retval =
3693  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3694  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3695  retval = (retval != 0) ? (0) : (1);
3696 #if OMPT_SUPPORT && OMPT_OPTIONAL
3697  if (ompt_enabled.enabled) {
3698  ompt_frame->enter_frame = ompt_data_none;
3699  }
3700 #endif
3701 
3702  // all other workers except primary thread should do this pop here
3703  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3704  if (__kmp_env_consistency_check) {
3705  if (retval == 0) {
3706  __kmp_pop_sync(global_tid, ct_reduce, loc);
3707  }
3708  }
3709 
3710  } else {
3711 
3712  // should never reach this block
3713  KMP_ASSERT(0); // "unexpected method"
3714  }
3715  if (teams_swapped) {
3716  __kmp_restore_swapped_teams(th, team, task_state);
3717  }
3718  KA_TRACE(
3719  10,
3720  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3721  global_tid, packed_reduction_method, retval));
3722 
3723  return retval;
3724 }
3725 
3734 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3735  kmp_critical_name *lck) {
3736 
3737  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3738 
3739  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3740  __kmp_assert_valid_gtid(global_tid);
3741 
3742  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3743 
3744  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3745 
3746  if (packed_reduction_method == critical_reduce_block) {
3747 
3748  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3749  OMPT_REDUCTION_END;
3750 
3751  } else if (packed_reduction_method == empty_reduce_block) {
3752 
3753  // usage: if team size == 1, no synchronization is required ( on Intel
3754  // platforms only )
3755 
3756  OMPT_REDUCTION_END;
3757 
3758  } else if (packed_reduction_method == atomic_reduce_block) {
3759 
3760  // neither primary thread nor other workers should get here
3761  // (code gen does not generate this call in case 2: atomic reduce block)
3762  // actually it's better to remove this elseif at all;
3763  // after removal this value will checked by the 'else' and will assert
3764 
3765  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3766  tree_reduce_block)) {
3767 
3768  // only primary thread gets here
3769  // OMPT: tree reduction is annotated in the barrier code
3770 
3771  } else {
3772 
3773  // should never reach this block
3774  KMP_ASSERT(0); // "unexpected method"
3775  }
3776 
3777  if (__kmp_env_consistency_check)
3778  __kmp_pop_sync(global_tid, ct_reduce, loc);
3779 
3780  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3781  global_tid, packed_reduction_method));
3782 
3783  return;
3784 }
3785 
3786 /* 2.a.ii. Reduce Block with a terminating barrier */
3787 
3803 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3804  size_t reduce_size, void *reduce_data,
3805  void (*reduce_func)(void *lhs_data, void *rhs_data),
3806  kmp_critical_name *lck) {
3807  KMP_COUNT_BLOCK(REDUCE_wait);
3808  int retval = 0;
3809  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3810  kmp_info_t *th;
3811  kmp_team_t *team;
3812  int teams_swapped = 0, task_state;
3813 
3814  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3815  __kmp_assert_valid_gtid(global_tid);
3816 
3817  // why do we need this initialization here at all?
3818  // Reduction clause can not be a stand-alone directive.
3819 
3820  // do not call __kmp_serial_initialize(), it will be called by
3821  // __kmp_parallel_initialize() if needed
3822  // possible detection of false-positive race by the threadchecker ???
3823  if (!TCR_4(__kmp_init_parallel))
3824  __kmp_parallel_initialize();
3825 
3826  __kmp_resume_if_soft_paused();
3827 
3828 // check correctness of reduce block nesting
3829 #if KMP_USE_DYNAMIC_LOCK
3830  if (__kmp_env_consistency_check)
3831  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3832 #else
3833  if (__kmp_env_consistency_check)
3834  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3835 #endif
3836 
3837  th = __kmp_thread_from_gtid(global_tid);
3838  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3839 
3840  packed_reduction_method = __kmp_determine_reduction_method(
3841  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3842  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3843 
3844  OMPT_REDUCTION_DECL(th, global_tid);
3845 
3846  if (packed_reduction_method == critical_reduce_block) {
3847 
3848  OMPT_REDUCTION_BEGIN;
3849  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3850  retval = 1;
3851 
3852  } else if (packed_reduction_method == empty_reduce_block) {
3853 
3854  OMPT_REDUCTION_BEGIN;
3855  // usage: if team size == 1, no synchronization is required ( Intel
3856  // platforms only )
3857  retval = 1;
3858 
3859  } else if (packed_reduction_method == atomic_reduce_block) {
3860 
3861  retval = 2;
3862 
3863  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3864  tree_reduce_block)) {
3865 
3866 // case tree_reduce_block:
3867 // this barrier should be visible to a customer and to the threading profile
3868 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3869 #if OMPT_SUPPORT
3870  ompt_frame_t *ompt_frame;
3871  if (ompt_enabled.enabled) {
3872  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3873  if (ompt_frame->enter_frame.ptr == NULL)
3874  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3875  }
3876  OMPT_STORE_RETURN_ADDRESS(global_tid);
3877 #endif
3878 #if USE_ITT_NOTIFY
3879  __kmp_threads[global_tid]->th.th_ident =
3880  loc; // needed for correct notification of frames
3881 #endif
3882  retval =
3883  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3884  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3885  retval = (retval != 0) ? (0) : (1);
3886 #if OMPT_SUPPORT && OMPT_OPTIONAL
3887  if (ompt_enabled.enabled) {
3888  ompt_frame->enter_frame = ompt_data_none;
3889  }
3890 #endif
3891 
3892  // all other workers except primary thread should do this pop here
3893  // (none of other workers except primary will enter __kmpc_end_reduce())
3894  if (__kmp_env_consistency_check) {
3895  if (retval == 0) { // 0: all other workers; 1: primary thread
3896  __kmp_pop_sync(global_tid, ct_reduce, loc);
3897  }
3898  }
3899 
3900  } else {
3901 
3902  // should never reach this block
3903  KMP_ASSERT(0); // "unexpected method"
3904  }
3905  if (teams_swapped) {
3906  __kmp_restore_swapped_teams(th, team, task_state);
3907  }
3908 
3909  KA_TRACE(10,
3910  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3911  global_tid, packed_reduction_method, retval));
3912  return retval;
3913 }
3914 
3925 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3926  kmp_critical_name *lck) {
3927 
3928  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3929  kmp_info_t *th;
3930  kmp_team_t *team;
3931  int teams_swapped = 0, task_state;
3932 
3933  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3934  __kmp_assert_valid_gtid(global_tid);
3935 
3936  th = __kmp_thread_from_gtid(global_tid);
3937  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3938 
3939  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3940 
3941  // this barrier should be visible to a customer and to the threading profile
3942  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3943  OMPT_REDUCTION_DECL(th, global_tid);
3944 
3945  if (packed_reduction_method == critical_reduce_block) {
3946  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3947 
3948  OMPT_REDUCTION_END;
3949 
3950 // TODO: implicit barrier: should be exposed
3951 #if OMPT_SUPPORT
3952  ompt_frame_t *ompt_frame;
3953  if (ompt_enabled.enabled) {
3954  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3955  if (ompt_frame->enter_frame.ptr == NULL)
3956  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3957  }
3958  OMPT_STORE_RETURN_ADDRESS(global_tid);
3959 #endif
3960 #if USE_ITT_NOTIFY
3961  __kmp_threads[global_tid]->th.th_ident = loc;
3962 #endif
3963  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3964 #if OMPT_SUPPORT && OMPT_OPTIONAL
3965  if (ompt_enabled.enabled) {
3966  ompt_frame->enter_frame = ompt_data_none;
3967  }
3968 #endif
3969 
3970  } else if (packed_reduction_method == empty_reduce_block) {
3971 
3972  OMPT_REDUCTION_END;
3973 
3974 // usage: if team size==1, no synchronization is required (Intel platforms only)
3975 
3976 // TODO: implicit barrier: should be exposed
3977 #if OMPT_SUPPORT
3978  ompt_frame_t *ompt_frame;
3979  if (ompt_enabled.enabled) {
3980  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3981  if (ompt_frame->enter_frame.ptr == NULL)
3982  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3983  }
3984  OMPT_STORE_RETURN_ADDRESS(global_tid);
3985 #endif
3986 #if USE_ITT_NOTIFY
3987  __kmp_threads[global_tid]->th.th_ident = loc;
3988 #endif
3989  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3990 #if OMPT_SUPPORT && OMPT_OPTIONAL
3991  if (ompt_enabled.enabled) {
3992  ompt_frame->enter_frame = ompt_data_none;
3993  }
3994 #endif
3995 
3996  } else if (packed_reduction_method == atomic_reduce_block) {
3997 
3998 #if OMPT_SUPPORT
3999  ompt_frame_t *ompt_frame;
4000  if (ompt_enabled.enabled) {
4001  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4002  if (ompt_frame->enter_frame.ptr == NULL)
4003  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4004  }
4005  OMPT_STORE_RETURN_ADDRESS(global_tid);
4006 #endif
4007 // TODO: implicit barrier: should be exposed
4008 #if USE_ITT_NOTIFY
4009  __kmp_threads[global_tid]->th.th_ident = loc;
4010 #endif
4011  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4012 #if OMPT_SUPPORT && OMPT_OPTIONAL
4013  if (ompt_enabled.enabled) {
4014  ompt_frame->enter_frame = ompt_data_none;
4015  }
4016 #endif
4017 
4018  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4019  tree_reduce_block)) {
4020 
4021  // only primary thread executes here (primary releases all other workers)
4022  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4023  global_tid);
4024 
4025  } else {
4026 
4027  // should never reach this block
4028  KMP_ASSERT(0); // "unexpected method"
4029  }
4030  if (teams_swapped) {
4031  __kmp_restore_swapped_teams(th, team, task_state);
4032  }
4033 
4034  if (__kmp_env_consistency_check)
4035  __kmp_pop_sync(global_tid, ct_reduce, loc);
4036 
4037  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4038  global_tid, packed_reduction_method));
4039 
4040  return;
4041 }
4042 
4043 #undef __KMP_GET_REDUCTION_METHOD
4044 #undef __KMP_SET_REDUCTION_METHOD
4045 
4046 /* end of interface to fast scalable reduce routines */
4047 
4048 kmp_uint64 __kmpc_get_taskid() {
4049 
4050  kmp_int32 gtid;
4051  kmp_info_t *thread;
4052 
4053  gtid = __kmp_get_gtid();
4054  if (gtid < 0) {
4055  return 0;
4056  }
4057  thread = __kmp_thread_from_gtid(gtid);
4058  return thread->th.th_current_task->td_task_id;
4059 
4060 } // __kmpc_get_taskid
4061 
4062 kmp_uint64 __kmpc_get_parent_taskid() {
4063 
4064  kmp_int32 gtid;
4065  kmp_info_t *thread;
4066  kmp_taskdata_t *parent_task;
4067 
4068  gtid = __kmp_get_gtid();
4069  if (gtid < 0) {
4070  return 0;
4071  }
4072  thread = __kmp_thread_from_gtid(gtid);
4073  parent_task = thread->th.th_current_task->td_parent;
4074  return (parent_task == NULL ? 0 : parent_task->td_task_id);
4075 
4076 } // __kmpc_get_parent_taskid
4077 
4089 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4090  const struct kmp_dim *dims) {
4091  __kmp_assert_valid_gtid(gtid);
4092  int j, idx;
4093  kmp_int64 last, trace_count;
4094  kmp_info_t *th = __kmp_threads[gtid];
4095  kmp_team_t *team = th->th.th_team;
4096  kmp_uint32 *flags;
4097  kmp_disp_t *pr_buf = th->th.th_dispatch;
4098  dispatch_shared_info_t *sh_buf;
4099 
4100  KA_TRACE(
4101  20,
4102  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4103  gtid, num_dims, !team->t.t_serialized));
4104  KMP_DEBUG_ASSERT(dims != NULL);
4105  KMP_DEBUG_ASSERT(num_dims > 0);
4106 
4107  if (team->t.t_serialized) {
4108  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4109  return; // no dependencies if team is serialized
4110  }
4111  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4112  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4113  // the next loop
4114  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4115 
4116  // Save bounds info into allocated private buffer
4117  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4118  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4119  th, sizeof(kmp_int64) * (4 * num_dims + 1));
4120  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4121  pr_buf->th_doacross_info[0] =
4122  (kmp_int64)num_dims; // first element is number of dimensions
4123  // Save also address of num_done in order to access it later without knowing
4124  // the buffer index
4125  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4126  pr_buf->th_doacross_info[2] = dims[0].lo;
4127  pr_buf->th_doacross_info[3] = dims[0].up;
4128  pr_buf->th_doacross_info[4] = dims[0].st;
4129  last = 5;
4130  for (j = 1; j < num_dims; ++j) {
4131  kmp_int64
4132  range_length; // To keep ranges of all dimensions but the first dims[0]
4133  if (dims[j].st == 1) { // most common case
4134  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4135  range_length = dims[j].up - dims[j].lo + 1;
4136  } else {
4137  if (dims[j].st > 0) {
4138  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4139  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4140  } else { // negative increment
4141  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4142  range_length =
4143  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4144  }
4145  }
4146  pr_buf->th_doacross_info[last++] = range_length;
4147  pr_buf->th_doacross_info[last++] = dims[j].lo;
4148  pr_buf->th_doacross_info[last++] = dims[j].up;
4149  pr_buf->th_doacross_info[last++] = dims[j].st;
4150  }
4151 
4152  // Compute total trip count.
4153  // Start with range of dims[0] which we don't need to keep in the buffer.
4154  if (dims[0].st == 1) { // most common case
4155  trace_count = dims[0].up - dims[0].lo + 1;
4156  } else if (dims[0].st > 0) {
4157  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4158  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4159  } else { // negative increment
4160  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4161  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4162  }
4163  for (j = 1; j < num_dims; ++j) {
4164  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4165  }
4166  KMP_DEBUG_ASSERT(trace_count > 0);
4167 
4168  // Check if shared buffer is not occupied by other loop (idx -
4169  // __kmp_dispatch_num_buffers)
4170  if (idx != sh_buf->doacross_buf_idx) {
4171  // Shared buffer is occupied, wait for it to be free
4172  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4173  __kmp_eq_4, NULL);
4174  }
4175 #if KMP_32_BIT_ARCH
4176  // Check if we are the first thread. After the CAS the first thread gets 0,
4177  // others get 1 if initialization is in progress, allocated pointer otherwise.
4178  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4179  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4180  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4181 #else
4182  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4183  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4184 #endif
4185  if (flags == NULL) {
4186  // we are the first thread, allocate the array of flags
4187  size_t size =
4188  (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4189  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4190  KMP_MB();
4191  sh_buf->doacross_flags = flags;
4192  } else if (flags == (kmp_uint32 *)1) {
4193 #if KMP_32_BIT_ARCH
4194  // initialization is still in progress, need to wait
4195  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4196 #else
4197  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4198 #endif
4199  KMP_YIELD(TRUE);
4200  KMP_MB();
4201  } else {
4202  KMP_MB();
4203  }
4204  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4205  pr_buf->th_doacross_flags =
4206  sh_buf->doacross_flags; // save private copy in order to not
4207  // touch shared buffer on each iteration
4208  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4209 }
4210 
4211 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4212  __kmp_assert_valid_gtid(gtid);
4213  kmp_int64 shft;
4214  size_t num_dims, i;
4215  kmp_uint32 flag;
4216  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4217  kmp_info_t *th = __kmp_threads[gtid];
4218  kmp_team_t *team = th->th.th_team;
4219  kmp_disp_t *pr_buf;
4220  kmp_int64 lo, up, st;
4221 
4222  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4223  if (team->t.t_serialized) {
4224  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4225  return; // no dependencies if team is serialized
4226  }
4227 
4228  // calculate sequential iteration number and check out-of-bounds condition
4229  pr_buf = th->th.th_dispatch;
4230  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4231  num_dims = (size_t)pr_buf->th_doacross_info[0];
4232  lo = pr_buf->th_doacross_info[2];
4233  up = pr_buf->th_doacross_info[3];
4234  st = pr_buf->th_doacross_info[4];
4235 #if OMPT_SUPPORT && OMPT_OPTIONAL
4236  ompt_dependence_t deps[num_dims];
4237 #endif
4238  if (st == 1) { // most common case
4239  if (vec[0] < lo || vec[0] > up) {
4240  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4241  "bounds [%lld,%lld]\n",
4242  gtid, vec[0], lo, up));
4243  return;
4244  }
4245  iter_number = vec[0] - lo;
4246  } else if (st > 0) {
4247  if (vec[0] < lo || vec[0] > up) {
4248  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4249  "bounds [%lld,%lld]\n",
4250  gtid, vec[0], lo, up));
4251  return;
4252  }
4253  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4254  } else { // negative increment
4255  if (vec[0] > lo || vec[0] < up) {
4256  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4257  "bounds [%lld,%lld]\n",
4258  gtid, vec[0], lo, up));
4259  return;
4260  }
4261  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4262  }
4263 #if OMPT_SUPPORT && OMPT_OPTIONAL
4264  deps[0].variable.value = iter_number;
4265  deps[0].dependence_type = ompt_dependence_type_sink;
4266 #endif
4267  for (i = 1; i < num_dims; ++i) {
4268  kmp_int64 iter, ln;
4269  size_t j = i * 4;
4270  ln = pr_buf->th_doacross_info[j + 1];
4271  lo = pr_buf->th_doacross_info[j + 2];
4272  up = pr_buf->th_doacross_info[j + 3];
4273  st = pr_buf->th_doacross_info[j + 4];
4274  if (st == 1) {
4275  if (vec[i] < lo || vec[i] > up) {
4276  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4277  "bounds [%lld,%lld]\n",
4278  gtid, vec[i], lo, up));
4279  return;
4280  }
4281  iter = vec[i] - lo;
4282  } else if (st > 0) {
4283  if (vec[i] < lo || vec[i] > up) {
4284  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4285  "bounds [%lld,%lld]\n",
4286  gtid, vec[i], lo, up));
4287  return;
4288  }
4289  iter = (kmp_uint64)(vec[i] - lo) / st;
4290  } else { // st < 0
4291  if (vec[i] > lo || vec[i] < up) {
4292  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4293  "bounds [%lld,%lld]\n",
4294  gtid, vec[i], lo, up));
4295  return;
4296  }
4297  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4298  }
4299  iter_number = iter + ln * iter_number;
4300 #if OMPT_SUPPORT && OMPT_OPTIONAL
4301  deps[i].variable.value = iter;
4302  deps[i].dependence_type = ompt_dependence_type_sink;
4303 #endif
4304  }
4305  shft = iter_number % 32; // use 32-bit granularity
4306  iter_number >>= 5; // divided by 32
4307  flag = 1 << shft;
4308  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4309  KMP_YIELD(TRUE);
4310  }
4311  KMP_MB();
4312 #if OMPT_SUPPORT && OMPT_OPTIONAL
4313  if (ompt_enabled.ompt_callback_dependences) {
4314  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4315  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4316  }
4317 #endif
4318  KA_TRACE(20,
4319  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4320  gtid, (iter_number << 5) + shft));
4321 }
4322 
4323 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4324  __kmp_assert_valid_gtid(gtid);
4325  kmp_int64 shft;
4326  size_t num_dims, i;
4327  kmp_uint32 flag;
4328  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4329  kmp_info_t *th = __kmp_threads[gtid];
4330  kmp_team_t *team = th->th.th_team;
4331  kmp_disp_t *pr_buf;
4332  kmp_int64 lo, st;
4333 
4334  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4335  if (team->t.t_serialized) {
4336  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4337  return; // no dependencies if team is serialized
4338  }
4339 
4340  // calculate sequential iteration number (same as in "wait" but no
4341  // out-of-bounds checks)
4342  pr_buf = th->th.th_dispatch;
4343  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4344  num_dims = (size_t)pr_buf->th_doacross_info[0];
4345  lo = pr_buf->th_doacross_info[2];
4346  st = pr_buf->th_doacross_info[4];
4347 #if OMPT_SUPPORT && OMPT_OPTIONAL
4348  ompt_dependence_t deps[num_dims];
4349 #endif
4350  if (st == 1) { // most common case
4351  iter_number = vec[0] - lo;
4352  } else if (st > 0) {
4353  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4354  } else { // negative increment
4355  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4356  }
4357 #if OMPT_SUPPORT && OMPT_OPTIONAL
4358  deps[0].variable.value = iter_number;
4359  deps[0].dependence_type = ompt_dependence_type_source;
4360 #endif
4361  for (i = 1; i < num_dims; ++i) {
4362  kmp_int64 iter, ln;
4363  size_t j = i * 4;
4364  ln = pr_buf->th_doacross_info[j + 1];
4365  lo = pr_buf->th_doacross_info[j + 2];
4366  st = pr_buf->th_doacross_info[j + 4];
4367  if (st == 1) {
4368  iter = vec[i] - lo;
4369  } else if (st > 0) {
4370  iter = (kmp_uint64)(vec[i] - lo) / st;
4371  } else { // st < 0
4372  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4373  }
4374  iter_number = iter + ln * iter_number;
4375 #if OMPT_SUPPORT && OMPT_OPTIONAL
4376  deps[i].variable.value = iter;
4377  deps[i].dependence_type = ompt_dependence_type_source;
4378 #endif
4379  }
4380 #if OMPT_SUPPORT && OMPT_OPTIONAL
4381  if (ompt_enabled.ompt_callback_dependences) {
4382  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4383  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4384  }
4385 #endif
4386  shft = iter_number % 32; // use 32-bit granularity
4387  iter_number >>= 5; // divided by 32
4388  flag = 1 << shft;
4389  KMP_MB();
4390  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4391  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4392  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4393  (iter_number << 5) + shft));
4394 }
4395 
4396 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4397  __kmp_assert_valid_gtid(gtid);
4398  kmp_int32 num_done;
4399  kmp_info_t *th = __kmp_threads[gtid];
4400  kmp_team_t *team = th->th.th_team;
4401  kmp_disp_t *pr_buf = th->th.th_dispatch;
4402 
4403  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4404  if (team->t.t_serialized) {
4405  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4406  return; // nothing to do
4407  }
4408  num_done =
4409  KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4410  if (num_done == th->th.th_team_nproc) {
4411  // we are the last thread, need to free shared resources
4412  int idx = pr_buf->th_doacross_buf_idx - 1;
4413  dispatch_shared_info_t *sh_buf =
4414  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4415  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4416  (kmp_int64)&sh_buf->doacross_num_done);
4417  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4418  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4419  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4420  sh_buf->doacross_flags = NULL;
4421  sh_buf->doacross_num_done = 0;
4422  sh_buf->doacross_buf_idx +=
4423  __kmp_dispatch_num_buffers; // free buffer for future re-use
4424  }
4425  // free private resources (need to keep buffer index forever)
4426  pr_buf->th_doacross_flags = NULL;
4427  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4428  pr_buf->th_doacross_info = NULL;
4429  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4430 }
4431 
4432 /* OpenMP 5.1 Memory Management routines */
4433 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4434  return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4435 }
4436 
4437 void *omp_aligned_alloc(size_t align, size_t size,
4438  omp_allocator_handle_t allocator) {
4439  return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4440 }
4441 
4442 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4443  return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4444 }
4445 
4446 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4447  omp_allocator_handle_t allocator) {
4448  return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4449 }
4450 
4451 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4452  omp_allocator_handle_t free_allocator) {
4453  return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4454  free_allocator);
4455 }
4456 
4457 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4458  ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4459 }
4460 /* end of OpenMP 5.1 Memory Management routines */
4461 
4462 int __kmpc_get_target_offload(void) {
4463  if (!__kmp_init_serial) {
4464  __kmp_serial_initialize();
4465  }
4466  return __kmp_target_offload;
4467 }
4468 
4469 int __kmpc_pause_resource(kmp_pause_status_t level) {
4470  if (!__kmp_init_serial) {
4471  return 1; // Can't pause if runtime is not initialized
4472  }
4473  return __kmp_pause_resource(level);
4474 }
4475 
4476 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4477  if (!__kmp_init_serial)
4478  __kmp_serial_initialize();
4479 
4480  KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4481 
4482 #if OMPT_SUPPORT
4483  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4484  ompt_callbacks.ompt_callback(ompt_callback_error)(
4485  (ompt_severity_t)severity, message, KMP_STRLEN(message),
4486  OMPT_GET_RETURN_ADDRESS(0));
4487  }
4488 #endif // OMPT_SUPPORT
4489 
4490  char *src_loc;
4491  if (loc && loc->psource) {
4492  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4493  src_loc =
4494  __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4495  __kmp_str_loc_free(&str_loc);
4496  } else {
4497  src_loc = __kmp_str_format("unknown");
4498  }
4499 
4500  if (severity == severity_warning)
4501  KMP_WARNING(UserDirectedWarning, src_loc, message);
4502  else
4503  KMP_FATAL(UserDirectedError, src_loc, message);
4504 
4505  __kmp_str_free(&src_loc);
4506 }
4507 
4508 // Mark begin of scope directive.
4509 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4510 // reserved is for extension of scope directive and not used.
4511 #if OMPT_SUPPORT && OMPT_OPTIONAL
4512  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4513  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4514  int tid = __kmp_tid_from_gtid(gtid);
4515  ompt_callbacks.ompt_callback(ompt_callback_work)(
4516  ompt_work_scope, ompt_scope_begin,
4517  &(team->t.ompt_team_info.parallel_data),
4518  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4519  OMPT_GET_RETURN_ADDRESS(0));
4520  }
4521 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4522 }
4523 
4524 // Mark end of scope directive
4525 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4526 // reserved is for extension of scope directive and not used.
4527 #if OMPT_SUPPORT && OMPT_OPTIONAL
4528  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4529  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4530  int tid = __kmp_tid_from_gtid(gtid);
4531  ompt_callbacks.ompt_callback(ompt_callback_work)(
4532  ompt_work_scope, ompt_scope_end,
4533  &(team->t.ompt_team_info.parallel_data),
4534  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4535  OMPT_GET_RETURN_ADDRESS(0));
4536  }
4537 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4538 }
4539 
4540 #ifdef KMP_USE_VERSION_SYMBOLS
4541 // For GOMP compatibility there are two versions of each omp_* API.
4542 // One is the plain C symbol and one is the Fortran symbol with an appended
4543 // underscore. When we implement a specific ompc_* version of an omp_*
4544 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4545 // instead of the Fortran versions in kmp_ftn_entry.h
4546 extern "C" {
4547 // Have to undef these from omp.h so they aren't translated into
4548 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4549 #ifdef omp_set_affinity_format
4550 #undef omp_set_affinity_format
4551 #endif
4552 #ifdef omp_get_affinity_format
4553 #undef omp_get_affinity_format
4554 #endif
4555 #ifdef omp_display_affinity
4556 #undef omp_display_affinity
4557 #endif
4558 #ifdef omp_capture_affinity
4559 #undef omp_capture_affinity
4560 #endif
4561 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4562  "OMP_5.0");
4563 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4564  "OMP_5.0");
4565 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4566  "OMP_5.0");
4567 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4568  "OMP_5.0");
4569 } // extern "C"
4570 #endif
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:226
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:228
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:211
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:230
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1744
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:911
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:246
char const * psource
Definition: kmp.h:256
kmp_int32 flags
Definition: kmp.h:248