core\stdarch\crates\core_arch\src/
simd.rs

1//! Internal `#[repr(simd)]` types
2
3#![allow(non_camel_case_types)]
4
5macro_rules! simd_ty {
6    ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
7        #[repr(simd)]
8        #[derive(Copy, Clone)]
9        pub(crate) struct $id([$elem_type; $len]);
10
11        #[allow(clippy::use_self)]
12        impl $id {
13            /// A value of this type where all elements are zeroed out.
14            pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
15
16            #[inline(always)]
17            pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
18                $id([$($param_name),*])
19            }
20            #[inline(always)]
21            pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
22                $id(elements)
23            }
24            // FIXME: Workaround rust@60637
25            #[inline(always)]
26            pub(crate) fn splat(value: $elem_type) -> Self {
27                #[derive(Copy, Clone)]
28                #[repr(simd)]
29                struct JustOne([$elem_type; 1]);
30                let one = JustOne([value]);
31                // SAFETY: 0 is always in-bounds because we're shuffling
32                // a simd type with exactly one element.
33                unsafe { simd_shuffle!(one, one, [0; $len]) }
34            }
35
36            /// Extract the element at position `index`.
37            /// `index` is not a constant so this is not efficient!
38            /// Use for testing only.
39            // FIXME: Workaround rust@60637
40            #[inline(always)]
41            pub(crate) fn extract(&self, index: usize) -> $elem_type {
42                self.as_array()[index]
43            }
44
45            #[inline]
46            pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
47                let simd_ptr: *const Self = self;
48                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
49                // SAFETY: We can always read the prefix of a simd type as an array.
50                // There might be more padding afterwards for some widths, but
51                // that's not a problem for reading less than that.
52                unsafe { &*array_ptr }
53            }
54        }
55
56        impl core::cmp::PartialEq for $id {
57            #[inline]
58            fn eq(&self, other: &Self) -> bool {
59                self.as_array() == other.as_array()
60            }
61        }
62
63        impl core::fmt::Debug for $id {
64            #[inline]
65            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
66                debug_simd_finish(f, stringify!($id), self.as_array())
67            }
68        }
69    }
70}
71
72macro_rules! simd_m_ty {
73    ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
74        #[repr(simd)]
75        #[derive(Copy, Clone)]
76        pub(crate) struct $id([$elem_type; $len]);
77
78        #[allow(clippy::use_self)]
79        impl $id {
80            #[inline(always)]
81            const fn bool_to_internal(x: bool) -> $elem_type {
82                [0 as $elem_type, !(0 as $elem_type)][x as usize]
83            }
84
85            #[inline(always)]
86            pub(crate) const fn new($($param_name: bool),*) -> Self {
87                $id([$(Self::bool_to_internal($param_name)),*])
88            }
89
90            // FIXME: Workaround rust@60637
91            #[inline(always)]
92            pub(crate) fn splat(value: bool) -> Self {
93                #[derive(Copy, Clone)]
94                #[repr(simd)]
95                struct JustOne([$elem_type; 1]);
96                let one = JustOne([Self::bool_to_internal(value)]);
97                // SAFETY: 0 is always in-bounds because we're shuffling
98                // a simd type with exactly one element.
99                unsafe { simd_shuffle!(one, one, [0; $len]) }
100            }
101
102            #[inline]
103            pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
104                let simd_ptr: *const Self = self;
105                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
106                // SAFETY: We can always read the prefix of a simd type as an array.
107                // There might be more padding afterwards for some widths, but
108                // that's not a problem for reading less than that.
109                unsafe { &*array_ptr }
110            }
111        }
112
113        impl core::cmp::PartialEq for $id {
114            #[inline]
115            fn eq(&self, other: &Self) -> bool {
116                self.as_array() == other.as_array()
117            }
118        }
119
120        impl core::fmt::Debug for $id {
121            #[inline]
122            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
123                debug_simd_finish(f, stringify!($id), self.as_array())
124            }
125        }
126    }
127}
128
129// 16-bit wide types:
130
131simd_ty!(u8x2[u8;2]: x0, x1);
132simd_ty!(i8x2[i8;2]: x0, x1);
133
134// 32-bit wide types:
135
136simd_ty!(u8x4[u8;4]: x0, x1, x2, x3);
137simd_ty!(u16x2[u16;2]: x0, x1);
138
139simd_ty!(i8x4[i8;4]: x0, x1, x2, x3);
140simd_ty!(i16x2[i16;2]: x0, x1);
141
142// 64-bit wide types:
143
144simd_ty!(
145    u8x8[u8;8]:
146    x0,
147    x1,
148    x2,
149    x3,
150    x4,
151    x5,
152    x6,
153    x7
154);
155simd_ty!(u16x4[u16;4]: x0, x1, x2, x3);
156simd_ty!(u32x2[u32;2]: x0, x1);
157simd_ty!(u64x1[u64;1]: x1);
158
159simd_ty!(
160    i8x8[i8;8]:
161    x0,
162    x1,
163    x2,
164    x3,
165    x4,
166    x5,
167    x6,
168    x7
169);
170simd_ty!(i16x4[i16;4]: x0, x1, x2, x3);
171simd_ty!(i32x2[i32;2]: x0, x1);
172simd_ty!(i64x1[i64;1]: x1);
173
174simd_ty!(f32x2[f32;2]: x0, x1);
175simd_ty!(f64x1[f64;1]: x1);
176
177// 128-bit wide types:
178
179simd_ty!(
180    u8x16[u8;16]:
181    x0,
182    x1,
183    x2,
184    x3,
185    x4,
186    x5,
187    x6,
188    x7,
189    x8,
190    x9,
191    x10,
192    x11,
193    x12,
194    x13,
195    x14,
196    x15
197);
198simd_ty!(
199    u16x8[u16;8]:
200    x0,
201    x1,
202    x2,
203    x3,
204    x4,
205    x5,
206    x6,
207    x7
208);
209simd_ty!(u32x4[u32;4]: x0, x1, x2, x3);
210simd_ty!(u64x2[u64;2]: x0, x1);
211
212simd_ty!(
213    i8x16[i8;16]:
214    x0,
215    x1,
216    x2,
217    x3,
218    x4,
219    x5,
220    x6,
221    x7,
222    x8,
223    x9,
224    x10,
225    x11,
226    x12,
227    x13,
228    x14,
229    x15
230);
231simd_ty!(
232    i16x8[i16;8]:
233    x0,
234    x1,
235    x2,
236    x3,
237    x4,
238    x5,
239    x6,
240    x7
241);
242simd_ty!(i32x4[i32;4]: x0, x1, x2, x3);
243simd_ty!(i64x2[i64;2]: x0, x1);
244
245simd_ty!(f16x4[f16;4]: x0, x1, x2, x3);
246
247simd_ty!(
248    f16x8[f16;8]:
249    x0,
250    x1,
251    x2,
252    x3,
253    x4,
254    x5,
255    x6,
256    x7
257);
258simd_ty!(f32x4[f32;4]: x0, x1, x2, x3);
259simd_ty!(f64x2[f64;2]: x0, x1);
260
261simd_m_ty!(
262    m8x16[i8;16]:
263    x0,
264    x1,
265    x2,
266    x3,
267    x4,
268    x5,
269    x6,
270    x7,
271    x8,
272    x9,
273    x10,
274    x11,
275    x12,
276    x13,
277    x14,
278    x15
279);
280simd_m_ty!(
281    m16x8[i16;8]:
282    x0,
283    x1,
284    x2,
285    x3,
286    x4,
287    x5,
288    x6,
289    x7
290);
291simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3);
292simd_m_ty!(m64x2[i64;2]: x0, x1);
293
294// 256-bit wide types:
295
296simd_ty!(
297    u8x32[u8;32]:
298    x0,
299    x1,
300    x2,
301    x3,
302    x4,
303    x5,
304    x6,
305    x7,
306    x8,
307    x9,
308    x10,
309    x11,
310    x12,
311    x13,
312    x14,
313    x15,
314    x16,
315    x17,
316    x18,
317    x19,
318    x20,
319    x21,
320    x22,
321    x23,
322    x24,
323    x25,
324    x26,
325    x27,
326    x28,
327    x29,
328    x30,
329    x31
330);
331simd_ty!(
332    u16x16[u16;16]:
333    x0,
334    x1,
335    x2,
336    x3,
337    x4,
338    x5,
339    x6,
340    x7,
341    x8,
342    x9,
343    x10,
344    x11,
345    x12,
346    x13,
347    x14,
348    x15
349);
350simd_ty!(
351    u32x8[u32;8]:
352    x0,
353    x1,
354    x2,
355    x3,
356    x4,
357    x5,
358    x6,
359    x7
360);
361simd_ty!(u64x4[u64;4]: x0, x1, x2, x3);
362
363simd_ty!(
364    i8x32[i8;32]:
365    x0,
366    x1,
367    x2,
368    x3,
369    x4,
370    x5,
371    x6,
372    x7,
373    x8,
374    x9,
375    x10,
376    x11,
377    x12,
378    x13,
379    x14,
380    x15,
381    x16,
382    x17,
383    x18,
384    x19,
385    x20,
386    x21,
387    x22,
388    x23,
389    x24,
390    x25,
391    x26,
392    x27,
393    x28,
394    x29,
395    x30,
396    x31
397);
398simd_ty!(
399    i16x16[i16;16]:
400    x0,
401    x1,
402    x2,
403    x3,
404    x4,
405    x5,
406    x6,
407    x7,
408    x8,
409    x9,
410    x10,
411    x11,
412    x12,
413    x13,
414    x14,
415    x15
416);
417simd_ty!(
418    i32x8[i32;8]:
419    x0,
420    x1,
421    x2,
422    x3,
423    x4,
424    x5,
425    x6,
426    x7
427);
428simd_ty!(i64x4[i64;4]: x0, x1, x2, x3);
429
430simd_ty!(
431    f16x16[f16;16]:
432    x0,
433    x1,
434    x2,
435    x3,
436    x4,
437    x5,
438    x6,
439    x7,
440    x8,
441    x9,
442    x10,
443    x11,
444    x12,
445    x13,
446    x14,
447    x15
448);
449simd_ty!(
450    f32x8[f32;8]:
451    x0,
452    x1,
453    x2,
454    x3,
455    x4,
456    x5,
457    x6,
458    x7
459);
460simd_ty!(f64x4[f64;4]: x0, x1, x2, x3);
461
462simd_m_ty!(
463    m8x32[i8;32]:
464    x0,
465    x1,
466    x2,
467    x3,
468    x4,
469    x5,
470    x6,
471    x7,
472    x8,
473    x9,
474    x10,
475    x11,
476    x12,
477    x13,
478    x14,
479    x15,
480    x16,
481    x17,
482    x18,
483    x19,
484    x20,
485    x21,
486    x22,
487    x23,
488    x24,
489    x25,
490    x26,
491    x27,
492    x28,
493    x29,
494    x30,
495    x31
496);
497simd_m_ty!(
498    m16x16[i16;16]:
499    x0,
500    x1,
501    x2,
502    x3,
503    x4,
504    x5,
505    x6,
506    x7,
507    x8,
508    x9,
509    x10,
510    x11,
511    x12,
512    x13,
513    x14,
514    x15
515);
516simd_m_ty!(
517    m32x8[i32;8]:
518    x0,
519    x1,
520    x2,
521    x3,
522    x4,
523    x5,
524    x6,
525    x7
526);
527
528// 512-bit wide types:
529
530simd_ty!(
531    i8x64[i8;64]:
532    x0,
533    x1,
534    x2,
535    x3,
536    x4,
537    x5,
538    x6,
539    x7,
540    x8,
541    x9,
542    x10,
543    x11,
544    x12,
545    x13,
546    x14,
547    x15,
548    x16,
549    x17,
550    x18,
551    x19,
552    x20,
553    x21,
554    x22,
555    x23,
556    x24,
557    x25,
558    x26,
559    x27,
560    x28,
561    x29,
562    x30,
563    x31,
564    x32,
565    x33,
566    x34,
567    x35,
568    x36,
569    x37,
570    x38,
571    x39,
572    x40,
573    x41,
574    x42,
575    x43,
576    x44,
577    x45,
578    x46,
579    x47,
580    x48,
581    x49,
582    x50,
583    x51,
584    x52,
585    x53,
586    x54,
587    x55,
588    x56,
589    x57,
590    x58,
591    x59,
592    x60,
593    x61,
594    x62,
595    x63
596);
597
598simd_ty!(
599    u8x64[u8;64]:
600    x0,
601    x1,
602    x2,
603    x3,
604    x4,
605    x5,
606    x6,
607    x7,
608    x8,
609    x9,
610    x10,
611    x11,
612    x12,
613    x13,
614    x14,
615    x15,
616    x16,
617    x17,
618    x18,
619    x19,
620    x20,
621    x21,
622    x22,
623    x23,
624    x24,
625    x25,
626    x26,
627    x27,
628    x28,
629    x29,
630    x30,
631    x31,
632    x32,
633    x33,
634    x34,
635    x35,
636    x36,
637    x37,
638    x38,
639    x39,
640    x40,
641    x41,
642    x42,
643    x43,
644    x44,
645    x45,
646    x46,
647    x47,
648    x48,
649    x49,
650    x50,
651    x51,
652    x52,
653    x53,
654    x54,
655    x55,
656    x56,
657    x57,
658    x58,
659    x59,
660    x60,
661    x61,
662    x62,
663    x63
664);
665
666simd_ty!(
667    i16x32[i16;32]:
668    x0,
669    x1,
670    x2,
671    x3,
672    x4,
673    x5,
674    x6,
675    x7,
676    x8,
677    x9,
678    x10,
679    x11,
680    x12,
681    x13,
682    x14,
683    x15,
684    x16,
685    x17,
686    x18,
687    x19,
688    x20,
689    x21,
690    x22,
691    x23,
692    x24,
693    x25,
694    x26,
695    x27,
696    x28,
697    x29,
698    x30,
699    x31
700);
701
702simd_ty!(
703    u16x32[u16;32]:
704    x0,
705    x1,
706    x2,
707    x3,
708    x4,
709    x5,
710    x6,
711    x7,
712    x8,
713    x9,
714    x10,
715    x11,
716    x12,
717    x13,
718    x14,
719    x15,
720    x16,
721    x17,
722    x18,
723    x19,
724    x20,
725    x21,
726    x22,
727    x23,
728    x24,
729    x25,
730    x26,
731    x27,
732    x28,
733    x29,
734    x30,
735    x31
736);
737
738simd_ty!(
739    i32x16[i32;16]:
740    x0,
741    x1,
742    x2,
743    x3,
744    x4,
745    x5,
746    x6,
747    x7,
748    x8,
749    x9,
750    x10,
751    x11,
752    x12,
753    x13,
754    x14,
755    x15
756);
757
758simd_ty!(
759    u32x16[u32;16]:
760    x0,
761    x1,
762    x2,
763    x3,
764    x4,
765    x5,
766    x6,
767    x7,
768    x8,
769    x9,
770    x10,
771    x11,
772    x12,
773    x13,
774    x14,
775    x15
776);
777
778simd_ty!(
779    f16x32[f16;32]:
780    x0,
781    x1,
782    x2,
783    x3,
784    x4,
785    x5,
786    x6,
787    x7,
788    x8,
789    x9,
790    x10,
791    x11,
792    x12,
793    x13,
794    x14,
795    x15,
796    x16,
797    x17,
798    x18,
799    x19,
800    x20,
801    x21,
802    x22,
803    x23,
804    x24,
805    x25,
806    x26,
807    x27,
808    x28,
809    x29,
810    x30,
811    x31
812);
813simd_ty!(
814    f32x16[f32;16]:
815    x0,
816    x1,
817    x2,
818    x3,
819    x4,
820    x5,
821    x6,
822    x7,
823    x8,
824    x9,
825    x10,
826    x11,
827    x12,
828    x13,
829    x14,
830    x15
831);
832
833simd_ty!(
834    i64x8[i64;8]:
835    x0,
836    x1,
837    x2,
838    x3,
839    x4,
840    x5,
841    x6,
842    x7
843);
844
845simd_ty!(
846    u64x8[u64;8]:
847    x0,
848    x1,
849    x2,
850    x3,
851    x4,
852    x5,
853    x6,
854    x7
855);
856
857simd_ty!(
858    f64x8[f64;8]:
859    x0,
860    x1,
861    x2,
862    x3,
863    x4,
864    x5,
865    x6,
866    x7
867);
868
869// 1024-bit wide types:
870simd_ty!(
871    u16x64[u16;64]:
872    x0,
873    x1,
874    x2,
875    x3,
876    x4,
877    x5,
878    x6,
879    x7,
880    x8,
881    x9,
882    x10,
883    x11,
884    x12,
885    x13,
886    x14,
887    x15,
888    x16,
889    x17,
890    x18,
891    x19,
892    x20,
893    x21,
894    x22,
895    x23,
896    x24,
897    x25,
898    x26,
899    x27,
900    x28,
901    x29,
902    x30,
903    x31,
904    x32,
905    x33,
906    x34,
907    x35,
908    x36,
909    x37,
910    x38,
911    x39,
912    x40,
913    x41,
914    x42,
915    x43,
916    x44,
917    x45,
918    x46,
919    x47,
920    x48,
921    x49,
922    x50,
923    x51,
924    x52,
925    x53,
926    x54,
927    x55,
928    x56,
929    x57,
930    x58,
931    x59,
932    x60,
933    x61,
934    x62,
935    x63
936);
937simd_ty!(
938    i32x32[i32;32]:
939    x0,
940    x1,
941    x2,
942    x3,
943    x4,
944    x5,
945    x6,
946    x7,
947    x8,
948    x9,
949    x10,
950    x11,
951    x12,
952    x13,
953    x14,
954    x15,
955    x16,
956    x17,
957    x18,
958    x19,
959    x20,
960    x21,
961    x22,
962    x23,
963    x24,
964    x25,
965    x26,
966    x27,
967    x28,
968    x29,
969    x30,
970    x31
971);
972simd_ty!(
973    u32x32[u32;32]:
974    x0,
975    x1,
976    x2,
977    x3,
978    x4,
979    x5,
980    x6,
981    x7,
982    x8,
983    x9,
984    x10,
985    x11,
986    x12,
987    x13,
988    x14,
989    x15,
990    x16,
991    x17,
992    x18,
993    x19,
994    x20,
995    x21,
996    x22,
997    x23,
998    x24,
999    x25,
1000    x26,
1001    x27,
1002    x28,
1003    x29,
1004    x30,
1005    x31
1006);
1007
1008/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they
1009/// were before moving to array-based simd.
1010#[inline]
1011pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
1012    formatter: &mut crate::fmt::Formatter<'_>,
1013    type_name: &str,
1014    array: &[T; N],
1015) -> crate::fmt::Result {
1016    crate::fmt::Formatter::debug_tuple_fields_finish(
1017        formatter,
1018        type_name,
1019        &crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i| &array[i]),
1020    )
1021}