LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602  on *_32 and *_32e. This is just a temporary workaround for the problem. It
603  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604  in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648  kmp_cmplx128_a4_t &rhs) {
649  return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652  kmp_cmplx128_a4_t &rhs) {
653  return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656  kmp_cmplx128_a4_t &rhs) {
657  return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660  kmp_cmplx128_a4_t &rhs) {
661  return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665  kmp_cmplx128_a16_t &rhs) {
666  return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669  kmp_cmplx128_a16_t &rhs) {
670  return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673  kmp_cmplx128_a16_t &rhs) {
674  return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677  kmp_cmplx128_a16_t &rhs) {
678  return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID \
689  if (gtid == KMP_GTID_UNKNOWN) { \
690  gtid = __kmp_entry_gtid(); \
691  } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 // fixed)
696 // OP_ID - operation identifier (add, sub, mul, ...)
697 // TYPE - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700  TYPE *lhs, TYPE rhs) { \
701  KMP_DEBUG_ASSERT(__kmp_init_serial); \
702  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 // OP - operator (it's supposed to contain an assignment)
723 // LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID) \
727  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728  \
729  (*lhs) OP(rhs); \
730  \
731  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735  (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \
736  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section. On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange. Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1. If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG) \
762  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763  KMP_CHECK_GTID; \
764  OP_CRITICAL(OP, 0); \
765  return; \
766  }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770  KMP_CHECK_GTID; \
771  OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772  return; \
773  }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 // TYPE - operands' type
788 // BITS - size in bits, used to distinguish low level calls
789 // OP - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP) \
791  { \
792  TYPE old_value, new_value; \
793  old_value = *(TYPE volatile *)lhs; \
794  new_value = (TYPE)(old_value OP((TYPE)rhs)); \
795  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798  KMP_DO_PAUSE; \
799  \
800  old_value = *(TYPE volatile *)lhs; \
801  new_value = (TYPE)(old_value OP((TYPE)rhs)); \
802  } \
803  }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813  { \
814  struct _sss { \
815  TYPE cmp; \
816  kmp_int##BITS *vvv; \
817  }; \
818  struct _sss old_value, new_value; \
819  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826  KMP_DO_PAUSE; \
827  \
828  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830  } \
831  }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
836 
837 // ------------------------------------------------------------------------
838 // X86 or X86_64: no alignment problems ====================================
839 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
840  GOMP_FLAG) \
841  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
842  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
843  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
844  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
845  }
846 // -------------------------------------------------------------------------
847 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
848  GOMP_FLAG) \
849  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
850  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
851  OP_CMPXCHG(TYPE, BITS, OP) \
852  }
853 #if USE_CMPXCHG_FIX
854 // -------------------------------------------------------------------------
855 // workaround for C78287 (complex(kind=4) data type)
856 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
857  MASK, GOMP_FLAG) \
858  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
859  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
860  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
861  }
862 // end of the second part of the workaround for C78287
863 #endif // USE_CMPXCHG_FIX
864 
865 #else
866 // -------------------------------------------------------------------------
867 // Code for other architectures that don't handle unaligned accesses.
868 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
869  GOMP_FLAG) \
870  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
871  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
872  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
873  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
874  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
875  } else { \
876  KMP_CHECK_GTID; \
877  OP_UPDATE_CRITICAL(TYPE, OP, \
878  LCK_ID) /* unaligned address - use critical */ \
879  } \
880  }
881 // -------------------------------------------------------------------------
882 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
883  GOMP_FLAG) \
884  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
885  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
886  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
887  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
888  } else { \
889  KMP_CHECK_GTID; \
890  OP_UPDATE_CRITICAL(TYPE, OP, \
891  LCK_ID) /* unaligned address - use critical */ \
892  } \
893  }
894 #if USE_CMPXCHG_FIX
895 // -------------------------------------------------------------------------
896 // workaround for C78287 (complex(kind=4) data type)
897 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
898  MASK, GOMP_FLAG) \
899  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
900  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
901  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
902  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
903  } else { \
904  KMP_CHECK_GTID; \
905  OP_UPDATE_CRITICAL(TYPE, OP, \
906  LCK_ID) /* unaligned address - use critical */ \
907  } \
908  }
909 // end of the second part of the workaround for C78287
910 #endif // USE_CMPXCHG_FIX
911 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
912 
913 // Routines for ATOMIC 4-byte operands addition and subtraction
914 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
915  0) // __kmpc_atomic_fixed4_add
916 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
917  0) // __kmpc_atomic_fixed4_sub
918 
919 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
920  KMP_ARCH_X86) // __kmpc_atomic_float4_add
921 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
922  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
923 
924 // Routines for ATOMIC 8-byte operands addition and subtraction
925 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
926  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
927 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
928  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
929 
930 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
931  KMP_ARCH_X86) // __kmpc_atomic_float8_add
932 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
933  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
934 
935 // ------------------------------------------------------------------------
936 // Entries definition for integer operands
937 // TYPE_ID - operands type and size (fixed4, float4)
938 // OP_ID - operation identifier (add, sub, mul, ...)
939 // TYPE - operand type
940 // BITS - size in bits, used to distinguish low level calls
941 // OP - operator (used in critical section)
942 // LCK_ID - lock identifier, used to possibly distinguish lock variable
943 // MASK - used for alignment check
944 
945 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
946 // ------------------------------------------------------------------------
947 // Routines for ATOMIC integer operands, other operators
948 // ------------------------------------------------------------------------
949 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
950 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
951  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
952 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
953  0) // __kmpc_atomic_fixed1_andb
954 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
955  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
956 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
957  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
958 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
959  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
960 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
961  0) // __kmpc_atomic_fixed1_orb
962 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
963  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
964 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
965  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
966 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
967  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
968 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
969  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
970 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
971  0) // __kmpc_atomic_fixed1_xor
972 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
973  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
974 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
975  0) // __kmpc_atomic_fixed2_andb
976 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
977  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
978 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
979  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
980 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
981  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
982 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
983  0) // __kmpc_atomic_fixed2_orb
984 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
985  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
986 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
987  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
988 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
989  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
990 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
991  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
992 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
993  0) // __kmpc_atomic_fixed2_xor
994 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
995  0) // __kmpc_atomic_fixed4_andb
996 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
997  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
998 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
999  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1000 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1001  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1002 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1003  0) // __kmpc_atomic_fixed4_orb
1004 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1005  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1006 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1007  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1008 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1009  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1010 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1011  0) // __kmpc_atomic_fixed4_xor
1012 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1013  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1014 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1015  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1016 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1017  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1018 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1019  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1020 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1021  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1022 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1023  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1024 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1025  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1026 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1027  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1028 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1029  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1030 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1031  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1032 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1033  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1034 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1035  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1036 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1037  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1038 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1039 
1040 /* ------------------------------------------------------------------------ */
1041 /* Routines for C/C++ Reduction operators && and || */
1042 
1043 // ------------------------------------------------------------------------
1044 // Need separate macros for &&, || because there is no combined assignment
1045 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1046 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1047  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1048  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1049  OP_CRITICAL(= *lhs OP, LCK_ID) \
1050  }
1051 
1052 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1053 
1054 // ------------------------------------------------------------------------
1055 // X86 or X86_64: no alignment problems ===================================
1056 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1057  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1058  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1059  OP_CMPXCHG(TYPE, BITS, OP) \
1060  }
1061 
1062 #else
1063 // ------------------------------------------------------------------------
1064 // Code for other architectures that don't handle unaligned accesses.
1065 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1066  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1067  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1068  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1069  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1070  } else { \
1071  KMP_CHECK_GTID; \
1072  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1073  } \
1074  }
1075 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1076 
1077 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1078  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1079 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1080  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1081 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1082  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1083 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1084  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1085 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1086  0) // __kmpc_atomic_fixed4_andl
1087 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1088  0) // __kmpc_atomic_fixed4_orl
1089 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1090  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1091 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1092  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1093 
1094 /* ------------------------------------------------------------------------- */
1095 /* Routines for Fortran operators that matched no one in C: */
1096 /* MAX, MIN, .EQV., .NEQV. */
1097 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1098 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1099 
1100 // -------------------------------------------------------------------------
1101 // MIN and MAX need separate macros
1102 // OP - operator to check if we need any actions?
1103 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1104  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1105  \
1106  if (*lhs OP rhs) { /* still need actions? */ \
1107  *lhs = rhs; \
1108  } \
1109  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1110 
1111 // -------------------------------------------------------------------------
1112 #ifdef KMP_GOMP_COMPAT
1113 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1114  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1115  KMP_CHECK_GTID; \
1116  MIN_MAX_CRITSECT(OP, 0); \
1117  return; \
1118  }
1119 #else
1120 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1121 #endif /* KMP_GOMP_COMPAT */
1122 
1123 // -------------------------------------------------------------------------
1124 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1125  { \
1126  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1127  TYPE old_value; \
1128  temp_val = *lhs; \
1129  old_value = temp_val; \
1130  while (old_value OP rhs && /* still need actions? */ \
1131  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1132  (kmp_int##BITS *)lhs, \
1133  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1134  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1135  temp_val = *lhs; \
1136  old_value = temp_val; \
1137  } \
1138  }
1139 
1140 // -------------------------------------------------------------------------
1141 // 1-byte, 2-byte operands - use critical section
1142 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1143  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1144  if (*lhs OP rhs) { /* need actions? */ \
1145  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1146  MIN_MAX_CRITSECT(OP, LCK_ID) \
1147  } \
1148  }
1149 
1150 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1151 
1152 // -------------------------------------------------------------------------
1153 // X86 or X86_64: no alignment problems ====================================
1154 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1155  GOMP_FLAG) \
1156  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1157  if (*lhs OP rhs) { \
1158  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1159  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1160  } \
1161  }
1162 
1163 #else
1164 // -------------------------------------------------------------------------
1165 // Code for other architectures that don't handle unaligned accesses.
1166 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1167  GOMP_FLAG) \
1168  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1169  if (*lhs OP rhs) { \
1170  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1171  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1172  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1173  } else { \
1174  KMP_CHECK_GTID; \
1175  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1176  } \
1177  } \
1178  }
1179 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1180 
1181 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1182  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1183 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1184  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1185 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1186  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1187 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1188  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1189 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1190  0) // __kmpc_atomic_fixed4_max
1191 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1192  0) // __kmpc_atomic_fixed4_min
1193 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1194  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1195 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1196  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1197 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1198  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1199 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1200  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1201 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1202  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1203 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1204  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1205 #if KMP_HAVE_QUAD
1206 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1207  1) // __kmpc_atomic_float16_max
1208 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1209  1) // __kmpc_atomic_float16_min
1210 #if (KMP_ARCH_X86)
1211 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1212  1) // __kmpc_atomic_float16_max_a16
1213 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1214  1) // __kmpc_atomic_float16_min_a16
1215 #endif // (KMP_ARCH_X86)
1216 #endif // KMP_HAVE_QUAD
1217 // ------------------------------------------------------------------------
1218 // Need separate macros for .EQV. because of the need of complement (~)
1219 // OP ignored for critical sections, ^=~ used instead
1220 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1221  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1222  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1223  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1224  }
1225 
1226 // ------------------------------------------------------------------------
1227 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1228 // ------------------------------------------------------------------------
1229 // X86 or X86_64: no alignment problems ===================================
1230 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1231  GOMP_FLAG) \
1232  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1233  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1234  OP_CMPXCHG(TYPE, BITS, OP) \
1235  }
1236 // ------------------------------------------------------------------------
1237 #else
1238 // ------------------------------------------------------------------------
1239 // Code for other architectures that don't handle unaligned accesses.
1240 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1241  GOMP_FLAG) \
1242  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1243  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1244  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1245  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1246  } else { \
1247  KMP_CHECK_GTID; \
1248  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1249  } \
1250  }
1251 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1252 
1253 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1254  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1255 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1256  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1257 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1258  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1259 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1260  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1261 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1262  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1263 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1264  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1265 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1266  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1267 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1268  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1269 
1270 // ------------------------------------------------------------------------
1271 // Routines for Extended types: long double, _Quad, complex flavours (use
1272 // critical section)
1273 // TYPE_ID, OP_ID, TYPE - detailed above
1274 // OP - operator
1275 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1276 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1277  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1278  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1279  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1280  }
1281 
1282 /* ------------------------------------------------------------------------- */
1283 // routines for long double type
1284 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1285  1) // __kmpc_atomic_float10_add
1286 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1287  1) // __kmpc_atomic_float10_sub
1288 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1289  1) // __kmpc_atomic_float10_mul
1290 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1291  1) // __kmpc_atomic_float10_div
1292 #if KMP_HAVE_QUAD
1293 // routines for _Quad type
1294 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1295  1) // __kmpc_atomic_float16_add
1296 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1297  1) // __kmpc_atomic_float16_sub
1298 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1299  1) // __kmpc_atomic_float16_mul
1300 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1301  1) // __kmpc_atomic_float16_div
1302 #if (KMP_ARCH_X86)
1303 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1304  1) // __kmpc_atomic_float16_add_a16
1305 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1306  1) // __kmpc_atomic_float16_sub_a16
1307 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1308  1) // __kmpc_atomic_float16_mul_a16
1309 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1310  1) // __kmpc_atomic_float16_div_a16
1311 #endif // (KMP_ARCH_X86)
1312 #endif // KMP_HAVE_QUAD
1313 // routines for complex types
1314 
1315 #if USE_CMPXCHG_FIX
1316 // workaround for C78287 (complex(kind=4) data type)
1317 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1318  1) // __kmpc_atomic_cmplx4_add
1319 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1320  1) // __kmpc_atomic_cmplx4_sub
1321 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1322  1) // __kmpc_atomic_cmplx4_mul
1323 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1324  1) // __kmpc_atomic_cmplx4_div
1325 // end of the workaround for C78287
1326 #else
1327 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1328 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1329 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1330 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1331 #endif // USE_CMPXCHG_FIX
1332 
1333 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1334 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1335 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1336 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1337 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1338  1) // __kmpc_atomic_cmplx10_add
1339 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1340  1) // __kmpc_atomic_cmplx10_sub
1341 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1342  1) // __kmpc_atomic_cmplx10_mul
1343 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1344  1) // __kmpc_atomic_cmplx10_div
1345 #if KMP_HAVE_QUAD
1346 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1347  1) // __kmpc_atomic_cmplx16_add
1348 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1349  1) // __kmpc_atomic_cmplx16_sub
1350 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1351  1) // __kmpc_atomic_cmplx16_mul
1352 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1353  1) // __kmpc_atomic_cmplx16_div
1354 #if (KMP_ARCH_X86)
1355 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1356  1) // __kmpc_atomic_cmplx16_add_a16
1357 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1358  1) // __kmpc_atomic_cmplx16_sub_a16
1359 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1360  1) // __kmpc_atomic_cmplx16_mul_a16
1361 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1362  1) // __kmpc_atomic_cmplx16_div_a16
1363 #endif // (KMP_ARCH_X86)
1364 #endif // KMP_HAVE_QUAD
1365 
1366 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1367 // Supported only on IA-32 architecture and Intel(R) 64
1368 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1369 
1370 // ------------------------------------------------------------------------
1371 // Operation on *lhs, rhs bound by critical section
1372 // OP - operator (it's supposed to contain an assignment)
1373 // LCK_ID - lock identifier
1374 // Note: don't check gtid as it should always be valid
1375 // 1, 2-byte - expect valid parameter, other - check before this macro
1376 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1377  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1378  \
1379  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1380  \
1381  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1382 
1383 #ifdef KMP_GOMP_COMPAT
1384 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1385  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1386  KMP_CHECK_GTID; \
1387  OP_CRITICAL_REV(TYPE, OP, 0); \
1388  return; \
1389  }
1390 
1391 #else
1392 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1393 #endif /* KMP_GOMP_COMPAT */
1394 
1395 // Beginning of a definition (provides name, parameters, gebug trace)
1396 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1397 // fixed)
1398 // OP_ID - operation identifier (add, sub, mul, ...)
1399 // TYPE - operands' type
1400 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1401  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1402  TYPE *lhs, TYPE rhs) { \
1403  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1404  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1405 
1406 // ------------------------------------------------------------------------
1407 // Operation on *lhs, rhs using "compare_and_store" routine
1408 // TYPE - operands' type
1409 // BITS - size in bits, used to distinguish low level calls
1410 // OP - operator
1411 // Note: temp_val introduced in order to force the compiler to read
1412 // *lhs only once (w/o it the compiler reads *lhs twice)
1413 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1414  { \
1415  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1416  TYPE old_value, new_value; \
1417  temp_val = *lhs; \
1418  old_value = temp_val; \
1419  new_value = (TYPE)(rhs OP old_value); \
1420  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1421  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1422  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1423  KMP_DO_PAUSE; \
1424  \
1425  temp_val = *lhs; \
1426  old_value = temp_val; \
1427  new_value = (TYPE)(rhs OP old_value); \
1428  } \
1429  }
1430 
1431 // -------------------------------------------------------------------------
1432 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1433  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1434  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1435  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1436  }
1437 
1438 // ------------------------------------------------------------------------
1439 // Entries definition for integer operands
1440 // TYPE_ID - operands type and size (fixed4, float4)
1441 // OP_ID - operation identifier (add, sub, mul, ...)
1442 // TYPE - operand type
1443 // BITS - size in bits, used to distinguish low level calls
1444 // OP - operator (used in critical section)
1445 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1446 
1447 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1448 // ------------------------------------------------------------------------
1449 // Routines for ATOMIC integer operands, other operators
1450 // ------------------------------------------------------------------------
1451 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1452 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1453  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1454 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1455  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1456 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1457  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1458 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1459  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1460 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1461  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1462 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1463  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1464 
1465 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1466  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1467 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1468  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1469 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1470  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1471 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1472  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1473 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1474  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1475 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1476  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1477 
1478 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1479  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1480 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1481  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1482 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1483  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1484 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1485  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1486 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1487  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1488 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1489  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1490 
1491 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1492  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1493 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1494  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1495 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1496  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1497 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1498  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1499 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1500  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1501 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1502  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1503 
1504 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1505  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1506 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1507  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1508 
1509 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1510  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1511 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1512  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1513 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1514 
1515 // ------------------------------------------------------------------------
1516 // Routines for Extended types: long double, _Quad, complex flavours (use
1517 // critical section)
1518 // TYPE_ID, OP_ID, TYPE - detailed above
1519 // OP - operator
1520 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1521 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1522  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1523  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1524  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1525  }
1526 
1527 /* ------------------------------------------------------------------------- */
1528 // routines for long double type
1529 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1530  1) // __kmpc_atomic_float10_sub_rev
1531 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1532  1) // __kmpc_atomic_float10_div_rev
1533 #if KMP_HAVE_QUAD
1534 // routines for _Quad type
1535 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1536  1) // __kmpc_atomic_float16_sub_rev
1537 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1538  1) // __kmpc_atomic_float16_div_rev
1539 #if (KMP_ARCH_X86)
1540 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1541  1) // __kmpc_atomic_float16_sub_a16_rev
1542 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1543  1) // __kmpc_atomic_float16_div_a16_rev
1544 #endif // KMP_ARCH_X86
1545 #endif // KMP_HAVE_QUAD
1546 
1547 // routines for complex types
1548 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1549  1) // __kmpc_atomic_cmplx4_sub_rev
1550 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1551  1) // __kmpc_atomic_cmplx4_div_rev
1552 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1553  1) // __kmpc_atomic_cmplx8_sub_rev
1554 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1555  1) // __kmpc_atomic_cmplx8_div_rev
1556 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1557  1) // __kmpc_atomic_cmplx10_sub_rev
1558 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1559  1) // __kmpc_atomic_cmplx10_div_rev
1560 #if KMP_HAVE_QUAD
1561 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1562  1) // __kmpc_atomic_cmplx16_sub_rev
1563 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1564  1) // __kmpc_atomic_cmplx16_div_rev
1565 #if (KMP_ARCH_X86)
1566 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1567  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1568 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1569  1) // __kmpc_atomic_cmplx16_div_a16_rev
1570 #endif // KMP_ARCH_X86
1571 #endif // KMP_HAVE_QUAD
1572 
1573 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1574 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1575 
1576 /* ------------------------------------------------------------------------ */
1577 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1578 /* Note: in order to reduce the total number of types combinations */
1579 /* it is supposed that compiler converts RHS to longest floating type,*/
1580 /* that is _Quad, before call to any of these routines */
1581 /* Conversion to _Quad will be done by the compiler during calculation, */
1582 /* conversion back to TYPE - before the assignment, like: */
1583 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1584 /* Performance penalty expected because of SW emulation use */
1585 /* ------------------------------------------------------------------------ */
1586 
1587 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1588  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1589  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1590  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1591  KA_TRACE(100, \
1592  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1593  gtid));
1594 
1595 // -------------------------------------------------------------------------
1596 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1597  GOMP_FLAG) \
1598  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1599  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1600  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1601  }
1602 
1603 // -------------------------------------------------------------------------
1604 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1605 // -------------------------------------------------------------------------
1606 // X86 or X86_64: no alignment problems ====================================
1607 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1608  LCK_ID, MASK, GOMP_FLAG) \
1609  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1610  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1611  OP_CMPXCHG(TYPE, BITS, OP) \
1612  }
1613 // -------------------------------------------------------------------------
1614 #else
1615 // ------------------------------------------------------------------------
1616 // Code for other architectures that don't handle unaligned accesses.
1617 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1618  LCK_ID, MASK, GOMP_FLAG) \
1619  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1620  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1621  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1622  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1623  } else { \
1624  KMP_CHECK_GTID; \
1625  OP_UPDATE_CRITICAL(TYPE, OP, \
1626  LCK_ID) /* unaligned address - use critical */ \
1627  } \
1628  }
1629 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1630 
1631 // -------------------------------------------------------------------------
1632 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1633 // -------------------------------------------------------------------------
1634 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1635  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1636  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1637  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1638  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1639  }
1640 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1641  LCK_ID, GOMP_FLAG) \
1642  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1643  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1644  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1645  }
1646 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1647 
1648 // RHS=float8
1649 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1650  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1651 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1652  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1653 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1654  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1655 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1656  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1657 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1658  0) // __kmpc_atomic_fixed4_mul_float8
1659 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1660  0) // __kmpc_atomic_fixed4_div_float8
1661 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1662  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1663 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1664  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1665 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1666  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1667 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1668  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1669 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1670  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1671 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1672  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1673 
1674 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1675 // use them)
1676 #if KMP_HAVE_QUAD
1677 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1678  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1679 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1680  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1681 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1682  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1683 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1684  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1685 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1686  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1687 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1688  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1689 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1690  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1691 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1692  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1693 
1694 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1695  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1696 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1697  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1698 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1699  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1700 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1701  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1702 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1703  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1704 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1705  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1706 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1707  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1708 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1709  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1710 
1711 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1712  0) // __kmpc_atomic_fixed4_add_fp
1713 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1714  0) // __kmpc_atomic_fixed4u_add_fp
1715 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1716  0) // __kmpc_atomic_fixed4_sub_fp
1717 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1718  0) // __kmpc_atomic_fixed4u_sub_fp
1719 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1720  0) // __kmpc_atomic_fixed4_mul_fp
1721 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1722  0) // __kmpc_atomic_fixed4u_mul_fp
1723 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1724  0) // __kmpc_atomic_fixed4_div_fp
1725 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1726  0) // __kmpc_atomic_fixed4u_div_fp
1727 
1728 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1729  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1730 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1731  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1732 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1733  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1734 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1735  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1736 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1737  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1738 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1739  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1740 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1741  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1742 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1743  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1744 
1745 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1746  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1747 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1748  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1749 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1750  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1751 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1752  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1753 
1754 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1755  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1756 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1757  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1758 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1759  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1760 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1761  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1762 
1763 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1764  1) // __kmpc_atomic_float10_add_fp
1765 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1766  1) // __kmpc_atomic_float10_sub_fp
1767 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1768  1) // __kmpc_atomic_float10_mul_fp
1769 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1770  1) // __kmpc_atomic_float10_div_fp
1771 
1772 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1773 // Reverse operations
1774 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1775  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1776 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1777  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1778 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1779  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1780 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1781  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1782 
1783 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1784  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1785 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1786  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1787 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1788  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1789 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1790  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1791 
1792 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1793  0) // __kmpc_atomic_fixed4_sub_rev_fp
1794 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1795  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1796 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1797  0) // __kmpc_atomic_fixed4_div_rev_fp
1798 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1799  0) // __kmpc_atomic_fixed4u_div_rev_fp
1800 
1801 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1802  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1803 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1804  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1805 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1806  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1807 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1808  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1809 
1810 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1811  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1812 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1813  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1814 
1815 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1816  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1817 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1818  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1819 
1820 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1821  1) // __kmpc_atomic_float10_sub_rev_fp
1822 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1823  1) // __kmpc_atomic_float10_div_rev_fp
1824 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1825 
1826 #endif // KMP_HAVE_QUAD
1827 
1828 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1829 // ------------------------------------------------------------------------
1830 // X86 or X86_64: no alignment problems ====================================
1831 #if USE_CMPXCHG_FIX
1832 // workaround for C78287 (complex(kind=4) data type)
1833 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1834  LCK_ID, MASK, GOMP_FLAG) \
1835  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1836  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1837  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1838  }
1839 // end of the second part of the workaround for C78287
1840 #else
1841 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1842  LCK_ID, MASK, GOMP_FLAG) \
1843  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1844  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1845  OP_CMPXCHG(TYPE, BITS, OP) \
1846  }
1847 #endif // USE_CMPXCHG_FIX
1848 #else
1849 // ------------------------------------------------------------------------
1850 // Code for other architectures that don't handle unaligned accesses.
1851 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1852  LCK_ID, MASK, GOMP_FLAG) \
1853  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1854  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1855  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1856  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1857  } else { \
1858  KMP_CHECK_GTID; \
1859  OP_UPDATE_CRITICAL(TYPE, OP, \
1860  LCK_ID) /* unaligned address - use critical */ \
1861  } \
1862  }
1863 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1864 
1865 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1866  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1867 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1868  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1869 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1870  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1871 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1872  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1873 
1874 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1875 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1876 
1877 // ------------------------------------------------------------------------
1878 // Atomic READ routines
1879 
1880 // ------------------------------------------------------------------------
1881 // Beginning of a definition (provides name, parameters, gebug trace)
1882 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1883 // fixed)
1884 // OP_ID - operation identifier (add, sub, mul, ...)
1885 // TYPE - operands' type
1886 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1887  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1888  TYPE *loc) { \
1889  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1890  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1891 
1892 // ------------------------------------------------------------------------
1893 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1894 // TYPE - operands' type
1895 // BITS - size in bits, used to distinguish low level calls
1896 // OP - operator
1897 // Note: temp_val introduced in order to force the compiler to read
1898 // *lhs only once (w/o it the compiler reads *lhs twice)
1899 // TODO: check if it is still necessary
1900 // Return old value regardless of the result of "compare & swap# operation
1901 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1902  { \
1903  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1904  union f_i_union { \
1905  TYPE f_val; \
1906  kmp_int##BITS i_val; \
1907  }; \
1908  union f_i_union old_value; \
1909  temp_val = *loc; \
1910  old_value.f_val = temp_val; \
1911  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1912  (kmp_int##BITS *)loc, \
1913  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1914  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1915  new_value = old_value.f_val; \
1916  return new_value; \
1917  }
1918 
1919 // -------------------------------------------------------------------------
1920 // Operation on *lhs, rhs bound by critical section
1921 // OP - operator (it's supposed to contain an assignment)
1922 // LCK_ID - lock identifier
1923 // Note: don't check gtid as it should always be valid
1924 // 1, 2-byte - expect valid parameter, other - check before this macro
1925 #define OP_CRITICAL_READ(OP, LCK_ID) \
1926  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1927  \
1928  new_value = (*loc); \
1929  \
1930  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1931 
1932 // -------------------------------------------------------------------------
1933 #ifdef KMP_GOMP_COMPAT
1934 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1935  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1936  KMP_CHECK_GTID; \
1937  OP_CRITICAL_READ(OP, 0); \
1938  return new_value; \
1939  }
1940 #else
1941 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1942 #endif /* KMP_GOMP_COMPAT */
1943 
1944 // -------------------------------------------------------------------------
1945 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1946  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1947  TYPE new_value; \
1948  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1949  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1950  return new_value; \
1951  }
1952 // -------------------------------------------------------------------------
1953 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1954  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1955  TYPE new_value; \
1956  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1957  OP_CMPXCHG_READ(TYPE, BITS, OP) \
1958  }
1959 // ------------------------------------------------------------------------
1960 // Routines for Extended types: long double, _Quad, complex flavours (use
1961 // critical section)
1962 // TYPE_ID, OP_ID, TYPE - detailed above
1963 // OP - operator
1964 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1965 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1966  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1967  TYPE new_value; \
1968  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1969  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1970  return new_value; \
1971  }
1972 
1973 // ------------------------------------------------------------------------
1974 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1975 // value doesn't work.
1976 // Let's return the read value through the additional parameter.
1977 #if (KMP_OS_WINDOWS)
1978 
1979 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1980  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1981  \
1982  (*out) = (*loc); \
1983  \
1984  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1985 // ------------------------------------------------------------------------
1986 #ifdef KMP_GOMP_COMPAT
1987 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1988  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1989  KMP_CHECK_GTID; \
1990  OP_CRITICAL_READ_WRK(OP, 0); \
1991  }
1992 #else
1993 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1994 #endif /* KMP_GOMP_COMPAT */
1995 // ------------------------------------------------------------------------
1996 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1997  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1998  TYPE *loc) { \
1999  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2000  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2001 
2002 // ------------------------------------------------------------------------
2003 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2004  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2005  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2006  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2007  }
2008 
2009 #endif // KMP_OS_WINDOWS
2010 
2011 // ------------------------------------------------------------------------
2012 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2013 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2014 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2015  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2016 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2017  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2018 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2019  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2020 
2021 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2022 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2023  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2024 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2025  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2026 
2027 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2028  1) // __kmpc_atomic_float10_rd
2029 #if KMP_HAVE_QUAD
2030 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2031  1) // __kmpc_atomic_float16_rd
2032 #endif // KMP_HAVE_QUAD
2033 
2034 // Fix for CQ220361 on Windows* OS
2035 #if (KMP_OS_WINDOWS)
2036 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2037  1) // __kmpc_atomic_cmplx4_rd
2038 #else
2039 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2040  1) // __kmpc_atomic_cmplx4_rd
2041 #endif // (KMP_OS_WINDOWS)
2042 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2043  1) // __kmpc_atomic_cmplx8_rd
2044 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2045  1) // __kmpc_atomic_cmplx10_rd
2046 #if KMP_HAVE_QUAD
2047 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2048  1) // __kmpc_atomic_cmplx16_rd
2049 #if (KMP_ARCH_X86)
2050 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2051  1) // __kmpc_atomic_float16_a16_rd
2052 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2053  1) // __kmpc_atomic_cmplx16_a16_rd
2054 #endif // (KMP_ARCH_X86)
2055 #endif // KMP_HAVE_QUAD
2056 
2057 // ------------------------------------------------------------------------
2058 // Atomic WRITE routines
2059 
2060 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2061  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2062  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2063  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2064  }
2065 // ------------------------------------------------------------------------
2066 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2067  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2068  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2069  KMP_XCHG_REAL##BITS(lhs, rhs); \
2070  }
2071 
2072 // ------------------------------------------------------------------------
2073 // Operation on *lhs, rhs using "compare_and_store" routine
2074 // TYPE - operands' type
2075 // BITS - size in bits, used to distinguish low level calls
2076 // OP - operator
2077 // Note: temp_val introduced in order to force the compiler to read
2078 // *lhs only once (w/o it the compiler reads *lhs twice)
2079 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2080  { \
2081  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2082  TYPE old_value, new_value; \
2083  temp_val = *lhs; \
2084  old_value = temp_val; \
2085  new_value = rhs; \
2086  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2087  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2088  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2089  temp_val = *lhs; \
2090  old_value = temp_val; \
2091  new_value = rhs; \
2092  } \
2093  }
2094 
2095 // -------------------------------------------------------------------------
2096 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2097  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2098  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2099  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2100  }
2101 
2102 // ------------------------------------------------------------------------
2103 // Routines for Extended types: long double, _Quad, complex flavours (use
2104 // critical section)
2105 // TYPE_ID, OP_ID, TYPE - detailed above
2106 // OP - operator
2107 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2108 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2109  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2110  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2111  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2112  }
2113 // -------------------------------------------------------------------------
2114 
2115 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2116  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2117 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2118  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2119 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2120  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2121 #if (KMP_ARCH_X86)
2122 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2123  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2124 #else
2125 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2126  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2127 #endif // (KMP_ARCH_X86)
2128 
2129 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2130  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2131 #if (KMP_ARCH_X86)
2132 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2133  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2134 #else
2135 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2136  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2137 #endif // (KMP_ARCH_X86)
2138 
2139 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2140  1) // __kmpc_atomic_float10_wr
2141 #if KMP_HAVE_QUAD
2142 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2143  1) // __kmpc_atomic_float16_wr
2144 #endif // KMP_HAVE_QUAD
2145 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2146 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2147  1) // __kmpc_atomic_cmplx8_wr
2148 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2149  1) // __kmpc_atomic_cmplx10_wr
2150 #if KMP_HAVE_QUAD
2151 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2152  1) // __kmpc_atomic_cmplx16_wr
2153 #if (KMP_ARCH_X86)
2154 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2155  1) // __kmpc_atomic_float16_a16_wr
2156 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2157  1) // __kmpc_atomic_cmplx16_a16_wr
2158 #endif // (KMP_ARCH_X86)
2159 #endif // KMP_HAVE_QUAD
2160 
2161 // ------------------------------------------------------------------------
2162 // Atomic CAPTURE routines
2163 
2164 // Beginning of a definition (provides name, parameters, gebug trace)
2165 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2166 // fixed)
2167 // OP_ID - operation identifier (add, sub, mul, ...)
2168 // TYPE - operands' type
2169 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2170  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2171  TYPE *lhs, TYPE rhs, int flag) { \
2172  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2173  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2174 
2175 // -------------------------------------------------------------------------
2176 // Operation on *lhs, rhs bound by critical section
2177 // OP - operator (it's supposed to contain an assignment)
2178 // LCK_ID - lock identifier
2179 // Note: don't check gtid as it should always be valid
2180 // 1, 2-byte - expect valid parameter, other - check before this macro
2181 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2182  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2183  \
2184  if (flag) { \
2185  (*lhs) OP rhs; \
2186  new_value = (*lhs); \
2187  } else { \
2188  new_value = (*lhs); \
2189  (*lhs) OP rhs; \
2190  } \
2191  \
2192  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2193  return new_value;
2194 
2195 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2196  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2197  \
2198  if (flag) { \
2199  (*lhs) = (TYPE)((*lhs)OP rhs); \
2200  new_value = (*lhs); \
2201  } else { \
2202  new_value = (*lhs); \
2203  (*lhs) = (TYPE)((*lhs)OP rhs); \
2204  } \
2205  \
2206  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2207  return new_value;
2208 
2209 // ------------------------------------------------------------------------
2210 #ifdef KMP_GOMP_COMPAT
2211 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2212  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2213  KMP_CHECK_GTID; \
2214  OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2215  }
2216 #else
2217 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2218 #endif /* KMP_GOMP_COMPAT */
2219 
2220 // ------------------------------------------------------------------------
2221 // Operation on *lhs, rhs using "compare_and_store" routine
2222 // TYPE - operands' type
2223 // BITS - size in bits, used to distinguish low level calls
2224 // OP - operator
2225 // Note: temp_val introduced in order to force the compiler to read
2226 // *lhs only once (w/o it the compiler reads *lhs twice)
2227 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2228  { \
2229  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2230  TYPE old_value, new_value; \
2231  temp_val = *lhs; \
2232  old_value = temp_val; \
2233  new_value = (TYPE)(old_value OP rhs); \
2234  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2235  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2236  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2237  temp_val = *lhs; \
2238  old_value = temp_val; \
2239  new_value = (TYPE)(old_value OP rhs); \
2240  } \
2241  if (flag) { \
2242  return new_value; \
2243  } else \
2244  return old_value; \
2245  }
2246 
2247 // -------------------------------------------------------------------------
2248 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2249  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2250  TYPE new_value; \
2251  (void)new_value; \
2252  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2253  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2254  }
2255 
2256 // -------------------------------------------------------------------------
2257 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2258  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2259  TYPE old_value, new_value; \
2260  (void)new_value; \
2261  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2262  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2263  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2264  if (flag) { \
2265  return old_value OP rhs; \
2266  } else \
2267  return old_value; \
2268  }
2269 // -------------------------------------------------------------------------
2270 
2271 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2272  0) // __kmpc_atomic_fixed4_add_cpt
2273 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2274  0) // __kmpc_atomic_fixed4_sub_cpt
2275 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2276  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2277 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2278  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2279 
2280 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2281  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2282 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2283  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2284 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2285  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2286 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2287  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2288 
2289 // ------------------------------------------------------------------------
2290 // Entries definition for integer operands
2291 // TYPE_ID - operands type and size (fixed4, float4)
2292 // OP_ID - operation identifier (add, sub, mul, ...)
2293 // TYPE - operand type
2294 // BITS - size in bits, used to distinguish low level calls
2295 // OP - operator (used in critical section)
2296 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2297 // ------------------------------------------------------------------------
2298 // Routines for ATOMIC integer operands, other operators
2299 // ------------------------------------------------------------------------
2300 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2301 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2302  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2303 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2304  0) // __kmpc_atomic_fixed1_andb_cpt
2305 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2306  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2307 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2308  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2309 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2310  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2311 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2312  0) // __kmpc_atomic_fixed1_orb_cpt
2313 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2314  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2315 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2316  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2317 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2318  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2319 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2320  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2321 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2322  0) // __kmpc_atomic_fixed1_xor_cpt
2323 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2324  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2325 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2326  0) // __kmpc_atomic_fixed2_andb_cpt
2327 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2328  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2329 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2330  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2331 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2332  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2333 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2334  0) // __kmpc_atomic_fixed2_orb_cpt
2335 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2336  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2337 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2338  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2339 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2340  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2341 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2342  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2343 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2344  0) // __kmpc_atomic_fixed2_xor_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2346  0) // __kmpc_atomic_fixed4_andb_cpt
2347 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2348  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2349 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2350  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2351 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2352  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2353 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2354  0) // __kmpc_atomic_fixed4_orb_cpt
2355 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2356  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2357 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2358  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2359 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2360  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2361 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2362  0) // __kmpc_atomic_fixed4_xor_cpt
2363 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2364  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2365 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2366  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2367 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2368  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2369 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2370  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2371 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2372  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2373 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2374  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2375 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2376  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2377 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2378  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2379 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2380  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2381 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2382  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2383 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2384  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2385 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2386  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2387 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2388  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2389 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2390 
2391 // CAPTURE routines for mixed types RHS=float16
2392 #if KMP_HAVE_QUAD
2393 
2394 // Beginning of a definition (provides name, parameters, gebug trace)
2395 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2396 // fixed)
2397 // OP_ID - operation identifier (add, sub, mul, ...)
2398 // TYPE - operands' type
2399 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2400  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2401  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2402  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2403  KA_TRACE(100, \
2404  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2405  gtid));
2406 
2407 // -------------------------------------------------------------------------
2408 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2409  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2410  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2411  TYPE new_value; \
2412  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2413  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2414  }
2415 
2416 // -------------------------------------------------------------------------
2417 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2418  LCK_ID, GOMP_FLAG) \
2419  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2420  TYPE new_value; \
2421  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2422  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2423  }
2424 
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2426  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2428  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2430  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2431 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2432  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2433 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2434  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2435 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2436  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2437 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2438  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2439 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2440  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2441 
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2443  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2445  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2447  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2448 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2449  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2450 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2451  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2452 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2453  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2454 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2455  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2456 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2457  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2458 
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2460  0) // __kmpc_atomic_fixed4_add_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2462  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2463 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2464  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2465 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2466  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2467 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2468  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2469 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2470  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2471 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2472  0) // __kmpc_atomic_fixed4_div_cpt_fp
2473 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2474  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2475 
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2477  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2478 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2479  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2480 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2481  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2482 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2483  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2484 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2485  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2486 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2487  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2488 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2489  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2490 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2491  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2492 
2493 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2494  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2495 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2496  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2497 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2498  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2499 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2500  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2501 
2502 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2503  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2505  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2507  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2508 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2509  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2510 
2511 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2512  1) // __kmpc_atomic_float10_add_cpt_fp
2513 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2514  1) // __kmpc_atomic_float10_sub_cpt_fp
2515 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2516  1) // __kmpc_atomic_float10_mul_cpt_fp
2517 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2518  1) // __kmpc_atomic_float10_div_cpt_fp
2519 
2520 #endif // KMP_HAVE_QUAD
2521 
2522 // ------------------------------------------------------------------------
2523 // Routines for C/C++ Reduction operators && and ||
2524 
2525 // -------------------------------------------------------------------------
2526 // Operation on *lhs, rhs bound by critical section
2527 // OP - operator (it's supposed to contain an assignment)
2528 // LCK_ID - lock identifier
2529 // Note: don't check gtid as it should always be valid
2530 // 1, 2-byte - expect valid parameter, other - check before this macro
2531 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2532  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2533  \
2534  if (flag) { \
2535  new_value OP rhs; \
2536  (*lhs) = new_value; \
2537  } else { \
2538  new_value = (*lhs); \
2539  (*lhs) OP rhs; \
2540  } \
2541  \
2542  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2543 
2544 // ------------------------------------------------------------------------
2545 #ifdef KMP_GOMP_COMPAT
2546 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2547  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2548  KMP_CHECK_GTID; \
2549  OP_CRITICAL_L_CPT(OP, 0); \
2550  return new_value; \
2551  }
2552 #else
2553 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2554 #endif /* KMP_GOMP_COMPAT */
2555 
2556 // ------------------------------------------------------------------------
2557 // Need separate macros for &&, || because there is no combined assignment
2558 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2559  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2560  TYPE new_value; \
2561  (void)new_value; \
2562  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2563  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2564  }
2565 
2566 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2567  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2568 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2569  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2570 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2571  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2572 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2573  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2574 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2575  0) // __kmpc_atomic_fixed4_andl_cpt
2576 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2577  0) // __kmpc_atomic_fixed4_orl_cpt
2578 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2579  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2580 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2581  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2582 
2583 // -------------------------------------------------------------------------
2584 // Routines for Fortran operators that matched no one in C:
2585 // MAX, MIN, .EQV., .NEQV.
2586 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2587 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2588 
2589 // -------------------------------------------------------------------------
2590 // MIN and MAX need separate macros
2591 // OP - operator to check if we need any actions?
2592 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2593  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2594  \
2595  if (*lhs OP rhs) { /* still need actions? */ \
2596  old_value = *lhs; \
2597  *lhs = rhs; \
2598  if (flag) \
2599  new_value = rhs; \
2600  else \
2601  new_value = old_value; \
2602  } else { \
2603  new_value = *lhs; \
2604  } \
2605  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2606  return new_value;
2607 
2608 // -------------------------------------------------------------------------
2609 #ifdef KMP_GOMP_COMPAT
2610 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2611  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2612  KMP_CHECK_GTID; \
2613  MIN_MAX_CRITSECT_CPT(OP, 0); \
2614  }
2615 #else
2616 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2617 #endif /* KMP_GOMP_COMPAT */
2618 
2619 // -------------------------------------------------------------------------
2620 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2621  { \
2622  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2623  /*TYPE old_value; */ \
2624  temp_val = *lhs; \
2625  old_value = temp_val; \
2626  while (old_value OP rhs && /* still need actions? */ \
2627  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2628  (kmp_int##BITS *)lhs, \
2629  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2630  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2631  temp_val = *lhs; \
2632  old_value = temp_val; \
2633  } \
2634  if (flag) \
2635  return rhs; \
2636  else \
2637  return old_value; \
2638  }
2639 
2640 // -------------------------------------------------------------------------
2641 // 1-byte, 2-byte operands - use critical section
2642 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2643  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2644  TYPE new_value, old_value; \
2645  if (*lhs OP rhs) { /* need actions? */ \
2646  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2647  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2648  } \
2649  return *lhs; \
2650  }
2651 
2652 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2653  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2654  TYPE new_value, old_value; \
2655  (void)new_value; \
2656  if (*lhs OP rhs) { \
2657  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2658  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2659  } \
2660  return *lhs; \
2661  }
2662 
2663 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2664  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2665 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2666  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2667 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2668  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2669 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2670  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2671 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2672  0) // __kmpc_atomic_fixed4_max_cpt
2673 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2674  0) // __kmpc_atomic_fixed4_min_cpt
2675 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2676  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2677 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2678  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2679 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2680  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2681 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2682  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2683 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2684  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2685 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2686  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2687 #if KMP_HAVE_QUAD
2688 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2689  1) // __kmpc_atomic_float16_max_cpt
2690 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2691  1) // __kmpc_atomic_float16_min_cpt
2692 #if (KMP_ARCH_X86)
2693 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2694  1) // __kmpc_atomic_float16_max_a16_cpt
2695 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2696  1) // __kmpc_atomic_float16_mix_a16_cpt
2697 #endif // (KMP_ARCH_X86)
2698 #endif // KMP_HAVE_QUAD
2699 
2700 // ------------------------------------------------------------------------
2701 #ifdef KMP_GOMP_COMPAT
2702 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2703  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2704  KMP_CHECK_GTID; \
2705  OP_CRITICAL_CPT(OP, 0); \
2706  }
2707 #else
2708 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2709 #endif /* KMP_GOMP_COMPAT */
2710 // ------------------------------------------------------------------------
2711 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2712  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2713  TYPE new_value; \
2714  (void)new_value; \
2715  OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2716  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2717  }
2718 
2719 // ------------------------------------------------------------------------
2720 
2721 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2722  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2723 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2724  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2725 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2726  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2727 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2728  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2729 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2730  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2731 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2732  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2733 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2734  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2735 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2736  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2737 
2738 // ------------------------------------------------------------------------
2739 // Routines for Extended types: long double, _Quad, complex flavours (use
2740 // critical section)
2741 // TYPE_ID, OP_ID, TYPE - detailed above
2742 // OP - operator
2743 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2744 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2745  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2746  TYPE new_value; \
2747  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2748  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2749  }
2750 
2751 // ------------------------------------------------------------------------
2752 // Workaround for cmplx4. Regular routines with return value don't work
2753 // on Win_32e. Let's return captured values through the additional parameter.
2754 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2755  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2756  \
2757  if (flag) { \
2758  (*lhs) OP rhs; \
2759  (*out) = (*lhs); \
2760  } else { \
2761  (*out) = (*lhs); \
2762  (*lhs) OP rhs; \
2763  } \
2764  \
2765  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2766  return;
2767 // ------------------------------------------------------------------------
2768 
2769 #ifdef KMP_GOMP_COMPAT
2770 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2771  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2772  KMP_CHECK_GTID; \
2773  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2774  }
2775 #else
2776 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2777 #endif /* KMP_GOMP_COMPAT */
2778 // ------------------------------------------------------------------------
2779 
2780 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2781  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2782  TYPE rhs, TYPE *out, int flag) { \
2783  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2784  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2785 // ------------------------------------------------------------------------
2786 
2787 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2788  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2789  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2790  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2791  }
2792 // The end of workaround for cmplx4
2793 
2794 /* ------------------------------------------------------------------------- */
2795 // routines for long double type
2796 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2797  1) // __kmpc_atomic_float10_add_cpt
2798 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2799  1) // __kmpc_atomic_float10_sub_cpt
2800 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2801  1) // __kmpc_atomic_float10_mul_cpt
2802 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2803  1) // __kmpc_atomic_float10_div_cpt
2804 #if KMP_HAVE_QUAD
2805 // routines for _Quad type
2806 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2807  1) // __kmpc_atomic_float16_add_cpt
2808 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2809  1) // __kmpc_atomic_float16_sub_cpt
2810 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2811  1) // __kmpc_atomic_float16_mul_cpt
2812 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2813  1) // __kmpc_atomic_float16_div_cpt
2814 #if (KMP_ARCH_X86)
2815 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2816  1) // __kmpc_atomic_float16_add_a16_cpt
2817 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2818  1) // __kmpc_atomic_float16_sub_a16_cpt
2819 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2820  1) // __kmpc_atomic_float16_mul_a16_cpt
2821 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2822  1) // __kmpc_atomic_float16_div_a16_cpt
2823 #endif // (KMP_ARCH_X86)
2824 #endif // KMP_HAVE_QUAD
2825 
2826 // routines for complex types
2827 
2828 // cmplx4 routines to return void
2829 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2830  1) // __kmpc_atomic_cmplx4_add_cpt
2831 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2832  1) // __kmpc_atomic_cmplx4_sub_cpt
2833 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2834  1) // __kmpc_atomic_cmplx4_mul_cpt
2835 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2836  1) // __kmpc_atomic_cmplx4_div_cpt
2837 
2838 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2839  1) // __kmpc_atomic_cmplx8_add_cpt
2840 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2841  1) // __kmpc_atomic_cmplx8_sub_cpt
2842 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2843  1) // __kmpc_atomic_cmplx8_mul_cpt
2844 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2845  1) // __kmpc_atomic_cmplx8_div_cpt
2846 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2847  1) // __kmpc_atomic_cmplx10_add_cpt
2848 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2849  1) // __kmpc_atomic_cmplx10_sub_cpt
2850 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2851  1) // __kmpc_atomic_cmplx10_mul_cpt
2852 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2853  1) // __kmpc_atomic_cmplx10_div_cpt
2854 #if KMP_HAVE_QUAD
2855 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2856  1) // __kmpc_atomic_cmplx16_add_cpt
2857 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2858  1) // __kmpc_atomic_cmplx16_sub_cpt
2859 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2860  1) // __kmpc_atomic_cmplx16_mul_cpt
2861 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2862  1) // __kmpc_atomic_cmplx16_div_cpt
2863 #if (KMP_ARCH_X86)
2864 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2865  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2866 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2867  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2868 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2869  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2870 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2871  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2872 #endif // (KMP_ARCH_X86)
2873 #endif // KMP_HAVE_QUAD
2874 
2875 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2876 // binop x; v = x; } for non-commutative operations.
2877 // Supported only on IA-32 architecture and Intel(R) 64
2878 
2879 // -------------------------------------------------------------------------
2880 // Operation on *lhs, rhs bound by critical section
2881 // OP - operator (it's supposed to contain an assignment)
2882 // LCK_ID - lock identifier
2883 // Note: don't check gtid as it should always be valid
2884 // 1, 2-byte - expect valid parameter, other - check before this macro
2885 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2886  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2887  \
2888  if (flag) { \
2889  /*temp_val = (*lhs);*/ \
2890  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2891  new_value = (*lhs); \
2892  } else { \
2893  new_value = (*lhs); \
2894  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2895  } \
2896  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2897  return new_value;
2898 
2899 // ------------------------------------------------------------------------
2900 #ifdef KMP_GOMP_COMPAT
2901 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2902  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2903  KMP_CHECK_GTID; \
2904  OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2905  }
2906 #else
2907 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2908 #endif /* KMP_GOMP_COMPAT */
2909 
2910 // ------------------------------------------------------------------------
2911 // Operation on *lhs, rhs using "compare_and_store" routine
2912 // TYPE - operands' type
2913 // BITS - size in bits, used to distinguish low level calls
2914 // OP - operator
2915 // Note: temp_val introduced in order to force the compiler to read
2916 // *lhs only once (w/o it the compiler reads *lhs twice)
2917 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2918  { \
2919  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2920  TYPE old_value, new_value; \
2921  temp_val = *lhs; \
2922  old_value = temp_val; \
2923  new_value = (TYPE)(rhs OP old_value); \
2924  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2925  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2926  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2927  temp_val = *lhs; \
2928  old_value = temp_val; \
2929  new_value = (TYPE)(rhs OP old_value); \
2930  } \
2931  if (flag) { \
2932  return new_value; \
2933  } else \
2934  return old_value; \
2935  }
2936 
2937 // -------------------------------------------------------------------------
2938 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2939  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2940  TYPE new_value; \
2941  (void)new_value; \
2942  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2943  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2944  }
2945 
2946 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2947  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2949  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2951  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2953  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2955  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2957  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2959  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2961  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2963  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2965  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2967  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2968 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2969  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2970 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2971  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2972 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2973  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2974 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2975  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2976 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2977  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2978 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2979  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2980 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2981  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2982 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2983  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2984 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2985  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2986 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2987  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2988 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2989  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2990 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2991  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2992 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2993  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2994 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2995  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2996 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2997  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2998 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2999  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3000 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3001  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3002 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3003 
3004 // ------------------------------------------------------------------------
3005 // Routines for Extended types: long double, _Quad, complex flavours (use
3006 // critical section)
3007 // TYPE_ID, OP_ID, TYPE - detailed above
3008 // OP - operator
3009 // LCK_ID - lock identifier, used to possibly distinguish lock variable
3010 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3011  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3012  TYPE new_value; \
3013  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3014  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3015  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3016  }
3017 
3018 /* ------------------------------------------------------------------------- */
3019 // routines for long double type
3020 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3021  1) // __kmpc_atomic_float10_sub_cpt_rev
3022 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3023  1) // __kmpc_atomic_float10_div_cpt_rev
3024 #if KMP_HAVE_QUAD
3025 // routines for _Quad type
3026 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3027  1) // __kmpc_atomic_float16_sub_cpt_rev
3028 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3029  1) // __kmpc_atomic_float16_div_cpt_rev
3030 #if (KMP_ARCH_X86)
3031 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3032  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3033 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3034  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3035 #endif // (KMP_ARCH_X86)
3036 #endif // KMP_HAVE_QUAD
3037 
3038 // routines for complex types
3039 
3040 // ------------------------------------------------------------------------
3041 // Workaround for cmplx4. Regular routines with return value don't work
3042 // on Win_32e. Let's return captured values through the additional parameter.
3043 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3044  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3045  \
3046  if (flag) { \
3047  (*lhs) = (rhs)OP(*lhs); \
3048  (*out) = (*lhs); \
3049  } else { \
3050  (*out) = (*lhs); \
3051  (*lhs) = (rhs)OP(*lhs); \
3052  } \
3053  \
3054  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3055  return;
3056 // ------------------------------------------------------------------------
3057 
3058 #ifdef KMP_GOMP_COMPAT
3059 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3060  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3061  KMP_CHECK_GTID; \
3062  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3063  }
3064 #else
3065 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3066 #endif /* KMP_GOMP_COMPAT */
3067 // ------------------------------------------------------------------------
3068 
3069 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3070  GOMP_FLAG) \
3071  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3072  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3073  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3074  }
3075 // The end of workaround for cmplx4
3076 
3077 // !!! TODO: check if we need to return void for cmplx4 routines
3078 // cmplx4 routines to return void
3079 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3080  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3081 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3082  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3083 
3084 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3085  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3086 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3087  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3088 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3089  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3090 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3091  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3092 #if KMP_HAVE_QUAD
3093 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3094  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3095 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3096  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3097 #if (KMP_ARCH_X86)
3098 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3099  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3100 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3101  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3102 #endif // (KMP_ARCH_X86)
3103 #endif // KMP_HAVE_QUAD
3104 
3105 // Capture reverse for mixed type: RHS=float16
3106 #if KMP_HAVE_QUAD
3107 
3108 // Beginning of a definition (provides name, parameters, gebug trace)
3109 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3110 // fixed)
3111 // OP_ID - operation identifier (add, sub, mul, ...)
3112 // TYPE - operands' type
3113 // -------------------------------------------------------------------------
3114 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3115  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3116  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3117  TYPE new_value; \
3118  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3119  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3120  }
3121 
3122 // -------------------------------------------------------------------------
3123 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3124  LCK_ID, GOMP_FLAG) \
3125  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3126  TYPE new_value; \
3127  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3128  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3129  }
3130 
3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3132  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3133 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3134  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3136  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3138  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3139 
3140 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3141  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3143  1,
3144  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3145 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3146  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3147 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3148  1,
3149  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3150 
3151 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3152  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3153 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3154  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3155 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3156  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3157 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3158  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3159 
3160 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3161  7,
3162  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3163 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3164  8i, 7,
3165  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3166 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3167  7,
3168  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3169 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3170  8i, 7,
3171  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3172 
3173 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3174  4r, 3,
3175  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3176 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3177  4r, 3,
3178  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3179 
3180 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3181  8r, 7,
3182  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3183 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3184  8r, 7,
3185  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3186 
3187 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3188  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3189 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3190  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3191 
3192 #endif // KMP_HAVE_QUAD
3193 
3194 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3195 
3196 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3197  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3198  TYPE rhs) { \
3199  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3200  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3201 
3202 #define CRITICAL_SWP(LCK_ID) \
3203  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3204  \
3205  old_value = (*lhs); \
3206  (*lhs) = rhs; \
3207  \
3208  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3209  return old_value;
3210 
3211 // ------------------------------------------------------------------------
3212 #ifdef KMP_GOMP_COMPAT
3213 #define GOMP_CRITICAL_SWP(FLAG) \
3214  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3215  KMP_CHECK_GTID; \
3216  CRITICAL_SWP(0); \
3217  }
3218 #else
3219 #define GOMP_CRITICAL_SWP(FLAG)
3220 #endif /* KMP_GOMP_COMPAT */
3221 
3222 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3223  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3224  TYPE old_value; \
3225  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3226  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3227  return old_value; \
3228  }
3229 // ------------------------------------------------------------------------
3230 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3231  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3232  TYPE old_value; \
3233  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3234  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3235  return old_value; \
3236  }
3237 
3238 // ------------------------------------------------------------------------
3239 #define CMPXCHG_SWP(TYPE, BITS) \
3240  { \
3241  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3242  TYPE old_value, new_value; \
3243  temp_val = *lhs; \
3244  old_value = temp_val; \
3245  new_value = rhs; \
3246  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3247  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3248  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3249  temp_val = *lhs; \
3250  old_value = temp_val; \
3251  new_value = rhs; \
3252  } \
3253  return old_value; \
3254  }
3255 
3256 // -------------------------------------------------------------------------
3257 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3258  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3259  TYPE old_value; \
3260  (void)old_value; \
3261  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3262  CMPXCHG_SWP(TYPE, BITS) \
3263  }
3264 
3265 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3266 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3267 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3268 
3269 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3270  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3271 
3272 #if (KMP_ARCH_X86)
3273 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3274  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3275 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3276  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3277 #else
3278 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3279 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3280  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3281 #endif // (KMP_ARCH_X86)
3282 
3283 // ------------------------------------------------------------------------
3284 // Routines for Extended types: long double, _Quad, complex flavours (use
3285 // critical section)
3286 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3287  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3288  TYPE old_value; \
3289  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3290  CRITICAL_SWP(LCK_ID) \
3291  }
3292 
3293 // ------------------------------------------------------------------------
3294 // !!! TODO: check if we need to return void for cmplx4 routines
3295 // Workaround for cmplx4. Regular routines with return value don't work
3296 // on Win_32e. Let's return captured values through the additional parameter.
3297 
3298 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3299  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3300  TYPE rhs, TYPE *out) { \
3301  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3302  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3303 
3304 #define CRITICAL_SWP_WRK(LCK_ID) \
3305  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3306  \
3307  tmp = (*lhs); \
3308  (*lhs) = (rhs); \
3309  (*out) = tmp; \
3310  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3311  return;
3312 // ------------------------------------------------------------------------
3313 
3314 #ifdef KMP_GOMP_COMPAT
3315 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3316  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3317  KMP_CHECK_GTID; \
3318  CRITICAL_SWP_WRK(0); \
3319  }
3320 #else
3321 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3322 #endif /* KMP_GOMP_COMPAT */
3323 // ------------------------------------------------------------------------
3324 
3325 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3326  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3327  TYPE tmp; \
3328  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3329  CRITICAL_SWP_WRK(LCK_ID) \
3330  }
3331 // The end of workaround for cmplx4
3332 
3333 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3334 #if KMP_HAVE_QUAD
3335 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3336 #endif // KMP_HAVE_QUAD
3337 // cmplx4 routine to return void
3338 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3339 
3340 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3341 // __kmpc_atomic_cmplx4_swp
3342 
3343 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3344 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3345 #if KMP_HAVE_QUAD
3346 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3347 #if (KMP_ARCH_X86)
3348 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3349  1) // __kmpc_atomic_float16_a16_swp
3350 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3351  1) // __kmpc_atomic_cmplx16_a16_swp
3352 #endif // (KMP_ARCH_X86)
3353 #endif // KMP_HAVE_QUAD
3354 
3355 // End of OpenMP 4.0 Capture
3356 
3357 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3358 
3359 #undef OP_CRITICAL
3360 
3361 /* ------------------------------------------------------------------------ */
3362 /* Generic atomic routines */
3363 
3364 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3365  void (*f)(void *, void *, void *)) {
3366  KMP_DEBUG_ASSERT(__kmp_init_serial);
3367 
3368  if (
3369 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3370  FALSE /* must use lock */
3371 #else
3372  TRUE
3373 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3374  ) {
3375  kmp_int8 old_value, new_value;
3376 
3377  old_value = *(kmp_int8 *)lhs;
3378  (*f)(&new_value, &old_value, rhs);
3379 
3380  /* TODO: Should this be acquire or release? */
3381  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3382  *(kmp_int8 *)&new_value)) {
3383  KMP_CPU_PAUSE();
3384 
3385  old_value = *(kmp_int8 *)lhs;
3386  (*f)(&new_value, &old_value, rhs);
3387  }
3388 
3389  return;
3390  } else {
3391  // All 1-byte data is of integer data type.
3392 
3393 #ifdef KMP_GOMP_COMPAT
3394  if (__kmp_atomic_mode == 2) {
3395  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3396  } else
3397 #endif /* KMP_GOMP_COMPAT */
3398  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3399 
3400  (*f)(lhs, lhs, rhs);
3401 
3402 #ifdef KMP_GOMP_COMPAT
3403  if (__kmp_atomic_mode == 2) {
3404  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3405  } else
3406 #endif /* KMP_GOMP_COMPAT */
3407  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3408  }
3409 }
3410 
3411 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3412  void (*f)(void *, void *, void *)) {
3413  if (
3414 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3415  FALSE /* must use lock */
3416 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3417  TRUE /* no alignment problems */
3418 #else
3419  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3420 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421  ) {
3422  kmp_int16 old_value, new_value;
3423 
3424  old_value = *(kmp_int16 *)lhs;
3425  (*f)(&new_value, &old_value, rhs);
3426 
3427  /* TODO: Should this be acquire or release? */
3428  while (!KMP_COMPARE_AND_STORE_ACQ16(
3429  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3430  KMP_CPU_PAUSE();
3431 
3432  old_value = *(kmp_int16 *)lhs;
3433  (*f)(&new_value, &old_value, rhs);
3434  }
3435 
3436  return;
3437  } else {
3438  // All 2-byte data is of integer data type.
3439 
3440 #ifdef KMP_GOMP_COMPAT
3441  if (__kmp_atomic_mode == 2) {
3442  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3443  } else
3444 #endif /* KMP_GOMP_COMPAT */
3445  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3446 
3447  (*f)(lhs, lhs, rhs);
3448 
3449 #ifdef KMP_GOMP_COMPAT
3450  if (__kmp_atomic_mode == 2) {
3451  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3452  } else
3453 #endif /* KMP_GOMP_COMPAT */
3454  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3455  }
3456 }
3457 
3458 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3459  void (*f)(void *, void *, void *)) {
3460  KMP_DEBUG_ASSERT(__kmp_init_serial);
3461 
3462  if (
3463 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3464 // Gomp compatibility is broken if this routine is called for floats.
3465 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3466  TRUE /* no alignment problems */
3467 #else
3468  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3469 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3470  ) {
3471  kmp_int32 old_value, new_value;
3472 
3473  old_value = *(kmp_int32 *)lhs;
3474  (*f)(&new_value, &old_value, rhs);
3475 
3476  /* TODO: Should this be acquire or release? */
3477  while (!KMP_COMPARE_AND_STORE_ACQ32(
3478  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3479  KMP_CPU_PAUSE();
3480 
3481  old_value = *(kmp_int32 *)lhs;
3482  (*f)(&new_value, &old_value, rhs);
3483  }
3484 
3485  return;
3486  } else {
3487  // Use __kmp_atomic_lock_4i for all 4-byte data,
3488  // even if it isn't of integer data type.
3489 
3490 #ifdef KMP_GOMP_COMPAT
3491  if (__kmp_atomic_mode == 2) {
3492  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3493  } else
3494 #endif /* KMP_GOMP_COMPAT */
3495  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3496 
3497  (*f)(lhs, lhs, rhs);
3498 
3499 #ifdef KMP_GOMP_COMPAT
3500  if (__kmp_atomic_mode == 2) {
3501  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3502  } else
3503 #endif /* KMP_GOMP_COMPAT */
3504  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3505  }
3506 }
3507 
3508 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3509  void (*f)(void *, void *, void *)) {
3510  KMP_DEBUG_ASSERT(__kmp_init_serial);
3511  if (
3512 
3513 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3514  FALSE /* must use lock */
3515 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3516  TRUE /* no alignment problems */
3517 #else
3518  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3519 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3520  ) {
3521  kmp_int64 old_value, new_value;
3522 
3523  old_value = *(kmp_int64 *)lhs;
3524  (*f)(&new_value, &old_value, rhs);
3525  /* TODO: Should this be acquire or release? */
3526  while (!KMP_COMPARE_AND_STORE_ACQ64(
3527  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3528  KMP_CPU_PAUSE();
3529 
3530  old_value = *(kmp_int64 *)lhs;
3531  (*f)(&new_value, &old_value, rhs);
3532  }
3533 
3534  return;
3535  } else {
3536  // Use __kmp_atomic_lock_8i for all 8-byte data,
3537  // even if it isn't of integer data type.
3538 
3539 #ifdef KMP_GOMP_COMPAT
3540  if (__kmp_atomic_mode == 2) {
3541  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3542  } else
3543 #endif /* KMP_GOMP_COMPAT */
3544  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3545 
3546  (*f)(lhs, lhs, rhs);
3547 
3548 #ifdef KMP_GOMP_COMPAT
3549  if (__kmp_atomic_mode == 2) {
3550  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3551  } else
3552 #endif /* KMP_GOMP_COMPAT */
3553  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3554  }
3555 }
3556 
3557 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3558  void (*f)(void *, void *, void *)) {
3559  KMP_DEBUG_ASSERT(__kmp_init_serial);
3560 
3561 #ifdef KMP_GOMP_COMPAT
3562  if (__kmp_atomic_mode == 2) {
3563  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3564  } else
3565 #endif /* KMP_GOMP_COMPAT */
3566  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3567 
3568  (*f)(lhs, lhs, rhs);
3569 
3570 #ifdef KMP_GOMP_COMPAT
3571  if (__kmp_atomic_mode == 2) {
3572  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3573  } else
3574 #endif /* KMP_GOMP_COMPAT */
3575  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3576 }
3577 
3578 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3579  void (*f)(void *, void *, void *)) {
3580  KMP_DEBUG_ASSERT(__kmp_init_serial);
3581 
3582 #ifdef KMP_GOMP_COMPAT
3583  if (__kmp_atomic_mode == 2) {
3584  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3585  } else
3586 #endif /* KMP_GOMP_COMPAT */
3587  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3588 
3589  (*f)(lhs, lhs, rhs);
3590 
3591 #ifdef KMP_GOMP_COMPAT
3592  if (__kmp_atomic_mode == 2) {
3593  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3594  } else
3595 #endif /* KMP_GOMP_COMPAT */
3596  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3597 }
3598 
3599 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3600  void (*f)(void *, void *, void *)) {
3601  KMP_DEBUG_ASSERT(__kmp_init_serial);
3602 
3603 #ifdef KMP_GOMP_COMPAT
3604  if (__kmp_atomic_mode == 2) {
3605  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3606  } else
3607 #endif /* KMP_GOMP_COMPAT */
3608  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3609 
3610  (*f)(lhs, lhs, rhs);
3611 
3612 #ifdef KMP_GOMP_COMPAT
3613  if (__kmp_atomic_mode == 2) {
3614  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3615  } else
3616 #endif /* KMP_GOMP_COMPAT */
3617  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3618 }
3619 
3620 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3621  void (*f)(void *, void *, void *)) {
3622  KMP_DEBUG_ASSERT(__kmp_init_serial);
3623 
3624 #ifdef KMP_GOMP_COMPAT
3625  if (__kmp_atomic_mode == 2) {
3626  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3627  } else
3628 #endif /* KMP_GOMP_COMPAT */
3629  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3630 
3631  (*f)(lhs, lhs, rhs);
3632 
3633 #ifdef KMP_GOMP_COMPAT
3634  if (__kmp_atomic_mode == 2) {
3635  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3636  } else
3637 #endif /* KMP_GOMP_COMPAT */
3638  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3639 }
3640 
3641 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3642 // compiler; duplicated in order to not use 3-party names in pure Intel code
3643 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3644 void __kmpc_atomic_start(void) {
3645  int gtid = __kmp_entry_gtid();
3646  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3647  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3648 }
3649 
3650 void __kmpc_atomic_end(void) {
3651  int gtid = __kmp_get_gtid();
3652  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3653  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3654 }
3655 
3660 // end of file
Definition: kmp.h:229