core\stdarch\crates\core_arch\src\x86/
sha.rs

1use crate::core_arch::{simd::*, x86::*};
2
3#[allow(improper_ctypes)]
4unsafe extern "C" {
5    #[link_name = "llvm.x86.sha1msg1"]
6    fn sha1msg1(a: i32x4, b: i32x4) -> i32x4;
7    #[link_name = "llvm.x86.sha1msg2"]
8    fn sha1msg2(a: i32x4, b: i32x4) -> i32x4;
9    #[link_name = "llvm.x86.sha1nexte"]
10    fn sha1nexte(a: i32x4, b: i32x4) -> i32x4;
11    #[link_name = "llvm.x86.sha1rnds4"]
12    fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4;
13    #[link_name = "llvm.x86.sha256msg1"]
14    fn sha256msg1(a: i32x4, b: i32x4) -> i32x4;
15    #[link_name = "llvm.x86.sha256msg2"]
16    fn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
17    #[link_name = "llvm.x86.sha256rnds2"]
18    fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
19    #[link_name = "llvm.x86.vsha512msg1"]
20    fn vsha512msg1(a: i64x4, b: i64x2) -> i64x4;
21    #[link_name = "llvm.x86.vsha512msg2"]
22    fn vsha512msg2(a: i64x4, b: i64x4) -> i64x4;
23    #[link_name = "llvm.x86.vsha512rnds2"]
24    fn vsha512rnds2(a: i64x4, b: i64x4, k: i64x2) -> i64x4;
25    #[link_name = "llvm.x86.vsm3msg1"]
26    fn vsm3msg1(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
27    #[link_name = "llvm.x86.vsm3msg2"]
28    fn vsm3msg2(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
29    #[link_name = "llvm.x86.vsm3rnds2"]
30    fn vsm3rnds2(a: i32x4, b: i32x4, c: i32x4, d: i32) -> i32x4;
31    #[link_name = "llvm.x86.vsm4key4128"]
32    fn vsm4key4128(a: i32x4, b: i32x4) -> i32x4;
33    #[link_name = "llvm.x86.vsm4key4256"]
34    fn vsm4key4256(a: i32x8, b: i32x8) -> i32x8;
35    #[link_name = "llvm.x86.vsm4rnds4128"]
36    fn vsm4rnds4128(a: i32x4, b: i32x4) -> i32x4;
37    #[link_name = "llvm.x86.vsm4rnds4256"]
38    fn vsm4rnds4256(a: i32x8, b: i32x8) -> i32x8;
39}
40
41#[cfg(test)]
42use stdarch_test::assert_instr;
43
44/// Performs an intermediate calculation for the next four SHA1 message values
45/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
46/// and returning the result.
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32)
49#[inline]
50#[target_feature(enable = "sha")]
51#[cfg_attr(test, assert_instr(sha1msg1))]
52#[stable(feature = "simd_x86", since = "1.27.0")]
53pub fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
54    unsafe { transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) }
55}
56
57/// Performs the final calculation for the next four SHA1 message values
58/// (unsigned 32-bit integers) using the intermediate result in `a` and the
59/// previous message values in `b`, and returns the result.
60///
61/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32)
62#[inline]
63#[target_feature(enable = "sha")]
64#[cfg_attr(test, assert_instr(sha1msg2))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66pub fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
67    unsafe { transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) }
68}
69
70/// Calculate SHA1 state variable E after four rounds of operation from the
71/// current SHA1 state variable `a`, add that value to the scheduled values
72/// (unsigned 32-bit integers) in `b`, and returns the result.
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32)
75#[inline]
76#[target_feature(enable = "sha")]
77#[cfg_attr(test, assert_instr(sha1nexte))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
80    unsafe { transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) }
81}
82
83/// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D)
84/// from `a` and some pre-computed sum of the next 4 round message values
85/// (unsigned 32-bit integers), and state variable E from `b`, and return the
86/// updated SHA1 state (A,B,C,D). `FUNC` contains the logic functions and round
87/// constants.
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32)
90#[inline]
91#[target_feature(enable = "sha")]
92#[cfg_attr(test, assert_instr(sha1rnds4, FUNC = 0))]
93#[rustc_legacy_const_generics(2)]
94#[stable(feature = "simd_x86", since = "1.27.0")]
95pub fn _mm_sha1rnds4_epu32<const FUNC: i32>(a: __m128i, b: __m128i) -> __m128i {
96    static_assert_uimm_bits!(FUNC, 2);
97    unsafe { transmute(sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) }
98}
99
100/// Performs an intermediate calculation for the next four SHA256 message values
101/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
102/// and return the result.
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32)
105#[inline]
106#[target_feature(enable = "sha")]
107#[cfg_attr(test, assert_instr(sha256msg1))]
108#[stable(feature = "simd_x86", since = "1.27.0")]
109pub fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
110    unsafe { transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) }
111}
112
113/// Performs the final calculation for the next four SHA256 message values
114/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
115/// and return the result.
116///
117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32)
118#[inline]
119#[target_feature(enable = "sha")]
120#[cfg_attr(test, assert_instr(sha256msg2))]
121#[stable(feature = "simd_x86", since = "1.27.0")]
122pub fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
123    unsafe { transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) }
124}
125
126/// Performs 2 rounds of SHA256 operation using an initial SHA256 state
127/// (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a
128/// pre-computed sum of the next 2 round message values (unsigned 32-bit
129/// integers) and the corresponding round constants from `k`, and store the
130/// updated SHA256 state (A,B,E,F) in dst.
131///
132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32)
133#[inline]
134#[target_feature(enable = "sha")]
135#[cfg_attr(test, assert_instr(sha256rnds2))]
136#[stable(feature = "simd_x86", since = "1.27.0")]
137pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i {
138    unsafe { transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) }
139}
140
141/// This intrinsic is one of the two SHA512 message scheduling instructions.
142/// The intrinsic performs an intermediate calculation for the next four SHA512
143/// message qwords. The calculated results are stored in dst.
144///
145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg1_epi64)
146#[inline]
147#[target_feature(enable = "sha512,avx")]
148#[cfg_attr(test, assert_instr(vsha512msg1))]
149#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
150pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
151    unsafe { transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2())) }
152}
153
154/// This intrinsic is one of the two SHA512 message scheduling instructions.
155/// The intrinsic performs the final calculation for the next four SHA512 message
156/// qwords. The calculated results are stored in dst.
157///
158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg2_epi64)
159#[inline]
160#[target_feature(enable = "sha512,avx")]
161#[cfg_attr(test, assert_instr(vsha512msg2))]
162#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
163pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
164    unsafe { transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4())) }
165}
166
167/// This intrinsic performs two rounds of SHA512 operation using initial SHA512 state
168/// `(C,D,G,H)` from `a`, an initial SHA512 state `(A,B,E,F)` from `b`, and a
169/// pre-computed sum of the next two round message qwords and the corresponding
170/// round constants from `c` (only the two lower qwords of the third operand). The
171/// updated SHA512 state `(A,B,E,F)` is written to dst, and dst can be used as the
172/// updated state `(C,D,G,H)` in later rounds.
173///
174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64)
175#[inline]
176#[target_feature(enable = "sha512,avx")]
177#[cfg_attr(test, assert_instr(vsha512rnds2))]
178#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
179pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
180    unsafe { transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) }
181}
182
183/// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs
184/// an initial calculation for the next four SM3 message words. The calculated results
185/// are stored in dst.
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg1_epi32)
188#[inline]
189#[target_feature(enable = "sm3,avx")]
190#[cfg_attr(test, assert_instr(vsm3msg1))]
191#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
192pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
193    unsafe { transmute(vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
194}
195
196/// This is one of the two SM3 message scheduling intrinsics. The intrinsic performs
197/// the final calculation for the next four SM3 message words. The calculated results
198/// are stored in dst.
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3msg2_epi32)
201#[inline]
202#[target_feature(enable = "sm3,avx")]
203#[cfg_attr(test, assert_instr(vsm3msg2))]
204#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
205pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
206    unsafe { transmute(vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
207}
208
209/// The intrinsic performs two rounds of SM3 operation using initial SM3 state `(C, D, G, H)`
210/// from `a`, an initial SM3 states `(A, B, E, F)` from `b` and a pre-computed words from the
211/// `c`. `a` with initial SM3 state of `(C, D, G, H)` assumes input of non-rotated left variables
212/// from previous state. The updated SM3 state `(A, B, E, F)` is written to `a`. The `imm8`
213/// should contain the even round number for the first of the two rounds computed by this instruction.
214/// The computation masks the `imm8` value by ANDing it with `0x3E` so that only even round numbers
215/// from 0 through 62 are used for this operation. The calculated results are stored in dst.
216///
217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm3rnds2_epi32)
218#[inline]
219#[target_feature(enable = "sm3,avx")]
220#[cfg_attr(test, assert_instr(vsm3rnds2, IMM8 = 0))]
221#[rustc_legacy_const_generics(3)]
222#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
223pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
224    static_assert!(
225        IMM8 == (IMM8 & 0x3e),
226        "IMM8 must be an even number in the range `0..=62`"
227    );
228    unsafe { transmute(vsm3rnds2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4(), IMM8)) }
229}
230
231/// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent
232/// 128-bit lanes. The calculated results are stored in dst.
233///
234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4key4_epi32)
235#[inline]
236#[target_feature(enable = "sm4,avx")]
237#[cfg_attr(test, assert_instr(vsm4key4))]
238#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
239pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
240    unsafe { transmute(vsm4key4128(a.as_i32x4(), b.as_i32x4())) }
241}
242
243/// This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent
244/// 128-bit lanes. The calculated results are stored in dst.
245///
246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4key4_epi32)
247#[inline]
248#[target_feature(enable = "sm4,avx")]
249#[cfg_attr(test, assert_instr(vsm4key4))]
250#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
251pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
252    unsafe { transmute(vsm4key4256(a.as_i32x8(), b.as_i32x8())) }
253}
254
255/// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent
256/// 128-bit lanes. The calculated results are stored in dst.
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sm4rnds4_epi32)
259#[inline]
260#[target_feature(enable = "sm4,avx")]
261#[cfg_attr(test, assert_instr(vsm4rnds4))]
262#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
263pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
264    unsafe { transmute(vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) }
265}
266
267/// This intrinsic performs four rounds of SM4 encryption. The intrinsic operates on independent
268/// 128-bit lanes. The calculated results are stored in dst.
269///
270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sm4rnds4_epi32)
271#[inline]
272#[target_feature(enable = "sm4,avx")]
273#[cfg_attr(test, assert_instr(vsm4rnds4))]
274#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
275pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i {
276    unsafe { transmute(vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) }
277}
278
279#[cfg(test)]
280mod tests {
281    use crate::{
282        core_arch::{simd::*, x86::*},
283        hint::black_box,
284    };
285    use stdarch_test::simd_test;
286
287    #[simd_test(enable = "sha")]
288    #[allow(overflowing_literals)]
289    unsafe fn test_mm_sha1msg1_epu32() {
290        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
291        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
292        let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c);
293        let r = _mm_sha1msg1_epu32(a, b);
294        assert_eq_m128i(r, expected);
295    }
296
297    #[simd_test(enable = "sha")]
298    #[allow(overflowing_literals)]
299    unsafe fn test_mm_sha1msg2_epu32() {
300        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
301        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
302        let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35);
303        let r = _mm_sha1msg2_epu32(a, b);
304        assert_eq_m128i(r, expected);
305    }
306
307    #[simd_test(enable = "sha")]
308    #[allow(overflowing_literals)]
309    unsafe fn test_mm_sha1nexte_epu32() {
310        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
311        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
312        let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b);
313        let r = _mm_sha1nexte_epu32(a, b);
314        assert_eq_m128i(r, expected);
315    }
316
317    #[simd_test(enable = "sha")]
318    #[allow(overflowing_literals)]
319    unsafe fn test_mm_sha1rnds4_epu32() {
320        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
321        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
322        let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f);
323        let r = _mm_sha1rnds4_epu32::<0>(a, b);
324        assert_eq_m128i(r, expected);
325
326        let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe);
327        let r = _mm_sha1rnds4_epu32::<1>(a, b);
328        assert_eq_m128i(r, expected);
329
330        let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001);
331        let r = _mm_sha1rnds4_epu32::<2>(a, b);
332        assert_eq_m128i(r, expected);
333
334        let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b);
335        let r = _mm_sha1rnds4_epu32::<3>(a, b);
336        assert_eq_m128i(r, expected);
337    }
338
339    #[simd_test(enable = "sha")]
340    #[allow(overflowing_literals)]
341    unsafe fn test_mm_sha256msg1_epu32() {
342        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
343        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
344        let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee);
345        let r = _mm_sha256msg1_epu32(a, b);
346        assert_eq_m128i(r, expected);
347    }
348
349    #[simd_test(enable = "sha")]
350    #[allow(overflowing_literals)]
351    unsafe fn test_mm_sha256msg2_epu32() {
352        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
353        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
354        let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450);
355        let r = _mm_sha256msg2_epu32(a, b);
356        assert_eq_m128i(r, expected);
357    }
358
359    #[simd_test(enable = "sha")]
360    #[allow(overflowing_literals)]
361    unsafe fn test_mm_sha256rnds2_epu32() {
362        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
363        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
364        let k = _mm_set_epi64x(0, 0x12835b01d807aa98);
365        let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19);
366        let r = _mm_sha256rnds2_epu32(a, b, k);
367        assert_eq_m128i(r, expected);
368    }
369
370    static DATA_64: [u64; 10] = [
371        0x0011223344556677,
372        0x8899aabbccddeeff,
373        0xffeeddccbbaa9988,
374        0x7766554433221100,
375        0x0123456789abcdef,
376        0xfedcba9876543210,
377        0x02468ace13579bdf,
378        0xfdb97531eca86420,
379        0x048c159d26ae37bf,
380        0xfb73ea62d951c840,
381    ];
382
383    #[simd_test(enable = "sha512,avx")]
384    unsafe fn test_mm256_sha512msg1_epi64() {
385        fn s0(word: u64) -> u64 {
386            word.rotate_right(1) ^ word.rotate_right(8) ^ (word >> 7)
387        }
388
389        let A = &DATA_64[0..4];
390        let B = &DATA_64[4..6];
391
392        let a = _mm256_loadu_si256(A.as_ptr().cast());
393        let b = _mm_loadu_si128(B.as_ptr().cast());
394
395        let r = _mm256_sha512msg1_epi64(a, b);
396
397        let e = _mm256_setr_epi64x(
398            A[0].wrapping_add(s0(A[1])) as _,
399            A[1].wrapping_add(s0(A[2])) as _,
400            A[2].wrapping_add(s0(A[3])) as _,
401            A[3].wrapping_add(s0(B[0])) as _,
402        );
403
404        assert_eq_m256i(r, e);
405    }
406
407    #[simd_test(enable = "sha512,avx")]
408    unsafe fn test_mm256_sha512msg2_epi64() {
409        fn s1(word: u64) -> u64 {
410            word.rotate_right(19) ^ word.rotate_right(61) ^ (word >> 6)
411        }
412
413        let A = &DATA_64[0..4];
414        let B = &DATA_64[4..8];
415
416        let a = _mm256_loadu_si256(A.as_ptr().cast());
417        let b = _mm256_loadu_si256(B.as_ptr().cast());
418
419        let r = _mm256_sha512msg2_epi64(a, b);
420
421        let e0 = A[0].wrapping_add(s1(B[2]));
422        let e1 = A[1].wrapping_add(s1(B[3]));
423        let e = _mm256_setr_epi64x(
424            e0 as _,
425            e1 as _,
426            A[2].wrapping_add(s1(e0)) as _,
427            A[3].wrapping_add(s1(e1)) as _,
428        );
429
430        assert_eq_m256i(r, e);
431    }
432
433    #[simd_test(enable = "sha512,avx")]
434    unsafe fn test_mm256_sha512rnds2_epi64() {
435        fn cap_sigma0(word: u64) -> u64 {
436            word.rotate_right(28) ^ word.rotate_right(34) ^ word.rotate_right(39)
437        }
438
439        fn cap_sigma1(word: u64) -> u64 {
440            word.rotate_right(14) ^ word.rotate_right(18) ^ word.rotate_right(41)
441        }
442
443        fn maj(a: u64, b: u64, c: u64) -> u64 {
444            (a & b) ^ (a & c) ^ (b & c)
445        }
446
447        fn ch(e: u64, f: u64, g: u64) -> u64 {
448            (e & f) ^ (g & !e)
449        }
450
451        let A = &DATA_64[0..4];
452        let B = &DATA_64[4..8];
453        let K = &DATA_64[8..10];
454
455        let a = _mm256_loadu_si256(A.as_ptr().cast());
456        let b = _mm256_loadu_si256(B.as_ptr().cast());
457        let k = _mm_loadu_si128(K.as_ptr().cast());
458
459        let r = _mm256_sha512rnds2_epi64(a, b, k);
460
461        let mut array = [B[3], B[2], A[3], A[2], B[1], B[0], A[1], A[0]];
462        for i in 0..2 {
463            let new_d = ch(array[4], array[5], array[6])
464                .wrapping_add(cap_sigma1(array[4]))
465                .wrapping_add(K[i])
466                .wrapping_add(array[7]);
467            array[7] = new_d
468                .wrapping_add(maj(array[0], array[1], array[2]))
469                .wrapping_add(cap_sigma0(array[0]));
470            array[3] = new_d.wrapping_add(array[3]);
471            array.rotate_right(1);
472        }
473        let e = _mm256_setr_epi64x(array[5] as _, array[4] as _, array[1] as _, array[0] as _);
474
475        assert_eq_m256i(r, e);
476    }
477
478    static DATA_32: [u32; 16] = [
479        0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff, 0xffeeddcc, 0xbbaa9988, 0x77665544,
480        0x33221100, 0x01234567, 0x89abcdef, 0xfedcba98, 0x76543210, 0x02468ace, 0x13579bdf,
481        0xfdb97531, 0xeca86420,
482    ];
483
484    #[simd_test(enable = "sm3,avx")]
485    unsafe fn test_mm_sm3msg1_epi32() {
486        fn p1(x: u32) -> u32 {
487            x ^ x.rotate_left(15) ^ x.rotate_left(23)
488        }
489        let A = &DATA_32[0..4];
490        let B = &DATA_32[4..8];
491        let C = &DATA_32[8..12];
492
493        let a = _mm_loadu_si128(A.as_ptr().cast());
494        let b = _mm_loadu_si128(B.as_ptr().cast());
495        let c = _mm_loadu_si128(C.as_ptr().cast());
496
497        let r = _mm_sm3msg1_epi32(a, b, c);
498
499        let e = _mm_setr_epi32(
500            p1(A[0] ^ C[0] ^ B[0].rotate_left(15)) as _,
501            p1(A[1] ^ C[1] ^ B[1].rotate_left(15)) as _,
502            p1(A[2] ^ C[2] ^ B[2].rotate_left(15)) as _,
503            p1(A[3] ^ C[3]) as _,
504        );
505
506        assert_eq_m128i(r, e);
507    }
508
509    #[simd_test(enable = "sm3,avx")]
510    unsafe fn test_mm_sm3msg2_epi32() {
511        let A = &DATA_32[0..4];
512        let B = &DATA_32[4..8];
513        let C = &DATA_32[8..12];
514
515        let a = _mm_loadu_si128(A.as_ptr().cast());
516        let b = _mm_loadu_si128(B.as_ptr().cast());
517        let c = _mm_loadu_si128(C.as_ptr().cast());
518
519        let r = _mm_sm3msg2_epi32(a, b, c);
520
521        let e0 = B[0].rotate_left(7) ^ C[0] ^ A[0];
522        let e = _mm_setr_epi32(
523            e0 as _,
524            (B[1].rotate_left(7) ^ C[1] ^ A[1]) as _,
525            (B[2].rotate_left(7) ^ C[2] ^ A[2]) as _,
526            (B[3].rotate_left(7)
527                ^ C[3]
528                ^ A[3]
529                ^ e0.rotate_left(6)
530                ^ e0.rotate_left(15)
531                ^ e0.rotate_left(30)) as _,
532        );
533
534        assert_eq_m128i(r, e);
535    }
536
537    #[simd_test(enable = "sm3,avx")]
538    unsafe fn test_mm_sm3rnds2_epi32() {
539        fn p0(x: u32) -> u32 {
540            x ^ x.rotate_left(9) ^ x.rotate_left(17)
541        }
542        fn ff(x: u32, y: u32, z: u32, round: u32) -> u32 {
543            if round < 16 {
544                x ^ y ^ z
545            } else {
546                (x & y) | (x & z) | (y & z)
547            }
548        }
549        fn gg(x: u32, y: u32, z: u32, round: u32) -> u32 {
550            if round < 16 {
551                x ^ y ^ z
552            } else {
553                (x & y) | (!x & z)
554            }
555        }
556
557        const ROUND: u32 = 30;
558
559        let A = &DATA_32[0..4];
560        let B = &DATA_32[4..8];
561        let C = &DATA_32[8..12];
562
563        let a = _mm_loadu_si128(A.as_ptr().cast());
564        let b = _mm_loadu_si128(B.as_ptr().cast());
565        let c = _mm_loadu_si128(C.as_ptr().cast());
566
567        let r = _mm_sm3rnds2_epi32::<{ ROUND as i32 }>(a, b, c);
568
569        let CONST: u32 = if ROUND < 16 { 0x79cc4519 } else { 0x7a879d8a };
570
571        let mut array = [
572            B[3],
573            B[2],
574            A[3].rotate_left(9),
575            A[2].rotate_left(9),
576            B[1],
577            B[0],
578            A[1].rotate_left(19),
579            A[0].rotate_left(19),
580        ];
581
582        for i in 0..2 {
583            let s1 = array[0]
584                .rotate_left(12)
585                .wrapping_add(array[4])
586                .wrapping_add(CONST.rotate_left(ROUND as u32 + i as u32))
587                .rotate_left(7);
588            let s2 = s1 ^ array[0].rotate_left(12);
589
590            let t1 = ff(array[0], array[1], array[2], ROUND)
591                .wrapping_add(array[3])
592                .wrapping_add(s2)
593                .wrapping_add(C[i] ^ C[i + 2]);
594            let t2 = gg(array[4], array[5], array[6], ROUND)
595                .wrapping_add(array[7])
596                .wrapping_add(s1)
597                .wrapping_add(C[i]);
598
599            array[3] = array[2];
600            array[2] = array[1].rotate_left(9);
601            array[1] = array[0];
602            array[0] = t1;
603            array[7] = array[6];
604            array[6] = array[5].rotate_left(19);
605            array[5] = array[4];
606            array[4] = p0(t2);
607        }
608
609        let e = _mm_setr_epi32(array[5] as _, array[4] as _, array[1] as _, array[0] as _);
610
611        assert_eq_m128i(r, e);
612    }
613
614    fn lower_t(x: u32) -> u32 {
615        static SBOX: [u8; 256] = [
616            0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB,
617            0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26,
618            0x49, 0x86, 0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54,
619            0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, 0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95,
620            0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73,
621            0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x68, 0x6B, 0x81, 0xB2,
622            0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, 0x1E, 0x24,
623            0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
624            0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4,
625            0xC8, 0x9E, 0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE,
626            0xF9, 0x61, 0x15, 0xA1, 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93,
627            0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
628            0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD,
629            0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, 0x8D, 0x1B, 0xAF, 0x92,
630            0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1,
631            0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
632            0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E,
633            0xC6, 0x84, 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E,
634            0xD7, 0xCB, 0x39, 0x48,
635        ];
636
637        ((SBOX[(x >> 24) as usize] as u32) << 24)
638            | ((SBOX[((x >> 16) & 0xff) as usize] as u32) << 16)
639            | ((SBOX[((x >> 8) & 0xff) as usize] as u32) << 8)
640            | (SBOX[(x & 0xff) as usize] as u32)
641    }
642
643    #[simd_test(enable = "sm4,avx")]
644    unsafe fn test_mm_sm4key4_epi32() {
645        fn l_key(x: u32) -> u32 {
646            x ^ x.rotate_left(13) ^ x.rotate_left(23)
647        }
648        fn f_key(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
649            x0 ^ l_key(lower_t(x1 ^ x2 ^ x3 ^ rk))
650        }
651
652        let A = &DATA_32[0..4];
653        let B = &DATA_32[4..8];
654
655        let a = _mm_loadu_si128(A.as_ptr().cast());
656        let b = _mm_loadu_si128(B.as_ptr().cast());
657
658        let r = _mm_sm4key4_epi32(a, b);
659
660        let e0 = f_key(A[0], A[1], A[2], A[3], B[0]);
661        let e1 = f_key(A[1], A[2], A[3], e0, B[1]);
662        let e2 = f_key(A[2], A[3], e0, e1, B[2]);
663        let e3 = f_key(A[3], e0, e1, e2, B[3]);
664        let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _);
665
666        assert_eq_m128i(r, e);
667    }
668
669    #[simd_test(enable = "sm4,avx")]
670    unsafe fn test_mm256_sm4key4_epi32() {
671        let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
672        let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast());
673        let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast());
674        let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast());
675
676        let a = _mm256_set_m128i(a_high, a_low);
677        let b = _mm256_set_m128i(b_high, b_low);
678
679        let r = _mm256_sm4key4_epi32(a, b);
680
681        let e_low = _mm_sm4key4_epi32(a_low, b_low);
682        let e_high = _mm_sm4key4_epi32(a_high, b_high);
683        let e = _mm256_set_m128i(e_high, e_low);
684
685        assert_eq_m256i(r, e);
686    }
687
688    #[simd_test(enable = "sm4,avx")]
689    unsafe fn test_mm_sm4rnds4_epi32() {
690        fn l_rnd(x: u32) -> u32 {
691            x ^ x.rotate_left(2) ^ x.rotate_left(10) ^ x.rotate_left(18) ^ x.rotate_left(24)
692        }
693        fn f_rnd(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
694            x0 ^ l_rnd(lower_t(x1 ^ x2 ^ x3 ^ rk))
695        }
696
697        let A = &DATA_32[0..4];
698        let B = &DATA_32[4..8];
699
700        let a = _mm_loadu_si128(A.as_ptr().cast());
701        let b = _mm_loadu_si128(B.as_ptr().cast());
702
703        let r = _mm_sm4rnds4_epi32(a, b);
704
705        let e0 = f_rnd(A[0], A[1], A[2], A[3], B[0]);
706        let e1 = f_rnd(A[1], A[2], A[3], e0, B[1]);
707        let e2 = f_rnd(A[2], A[3], e0, e1, B[2]);
708        let e3 = f_rnd(A[3], e0, e1, e2, B[3]);
709        let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _);
710
711        assert_eq_m128i(r, e);
712    }
713
714    #[simd_test(enable = "sm4,avx")]
715    unsafe fn test_mm256_sm4rnds4_epi32() {
716        let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
717        let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast());
718        let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast());
719        let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast());
720
721        let a = _mm256_set_m128i(a_high, a_low);
722        let b = _mm256_set_m128i(b_high, b_low);
723
724        let r = _mm256_sm4rnds4_epi32(a, b);
725
726        let e_low = _mm_sm4rnds4_epi32(a_low, b_low);
727        let e_high = _mm_sm4rnds4_epi32(a_high, b_high);
728        let e = _mm256_set_m128i(e_high, e_low);
729
730        assert_eq_m256i(r, e);
731    }
732}