From c8342feef25facca923532e4ff4fe29cf81b52f5 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 17 Feb 2024 09:43:49 +0100 Subject: [PATCH 1/2] put the idx arguments of simd_insert and simd_extract into const blocks --- .../core_arch/src/aarch64/neon/generated.rs | 400 +++++------ crates/core_arch/src/aarch64/neon/mod.rs | 24 +- .../src/arm_shared/neon/generated.rs | 108 +-- crates/core_arch/src/arm_shared/neon/mod.rs | 104 +-- crates/core_arch/src/macros.rs | 17 + crates/core_arch/src/wasm32/simd128.rs | 32 +- crates/core_arch/src/x86/avx.rs | 8 +- crates/core_arch/src/x86/avx2.rs | 10 +- crates/core_arch/src/x86/avx512f.rs | 638 +++++++++--------- crates/core_arch/src/x86/sse.rs | 4 +- crates/core_arch/src/x86/sse2.rs | 40 +- crates/core_arch/src/x86/sse41.rs | 10 +- crates/core_arch/src/x86_64/avx.rs | 2 +- crates/core_arch/src/x86_64/avx2.rs | 2 +- crates/core_arch/src/x86_64/avx512f.rs | 8 +- crates/core_arch/src/x86_64/sse2.rs | 4 +- crates/core_arch/src/x86_64/sse41.rs | 4 +- crates/stdarch-gen/neon.spec | 150 ++-- 18 files changed, 791 insertions(+), 774 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 196c6f34e4..ac960c657e 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -177,7 +177,7 @@ pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[cfg_attr(test, assert_instr(fabd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vabds_f32(a: f32, b: f32) -> f32 { - simd_extract(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) + simd_extract!(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } /// Floating-point absolute difference @@ -188,7 +188,7 @@ pub unsafe fn vabds_f32(a: f32, b: f32) -> f32 { #[cfg_attr(test, assert_instr(fabd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vabdd_f64(a: f64, b: f64) -> f64 { - simd_extract(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) + simd_extract!(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } /// Unsigned Absolute difference Long @@ -390,7 +390,7 @@ pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vceqs_f32(a: f32, b: f32) -> u32 { - simd_extract(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) + simd_extract!(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } /// Floating-point compare equal @@ -401,7 +401,7 @@ pub unsafe fn vceqs_f32(a: f32, b: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vceqd_f64(a: f64, b: f64) -> u64 { - simd_extract(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) + simd_extract!(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } /// Signed compare bitwise equal to zero @@ -722,7 +722,7 @@ pub unsafe fn vceqzd_u64(a: u64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vceqzs_f32(a: f32) -> u32 { - simd_extract(vceqz_f32(vdup_n_f32(a)), 0) + simd_extract!(vceqz_f32(vdup_n_f32(a)), 0) } /// Floating-point compare bitwise equal to zero @@ -733,7 +733,7 @@ pub unsafe fn vceqzs_f32(a: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vceqzd_f64(a: f64) -> u64 { - simd_extract(vceqz_f64(vdup_n_f64(a)), 0) + simd_extract!(vceqz_f64(vdup_n_f64(a)), 0) } /// Signed compare bitwise Test bits nonzero @@ -876,7 +876,7 @@ pub unsafe fn vuqaddd_s64(a: i64, b: u64) -> i64 { #[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vuqaddb_s8(a: i8, b: u8) -> i8 { - simd_extract(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0) + simd_extract!(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0) } /// Signed saturating accumulate of unsigned value @@ -887,7 +887,7 @@ pub unsafe fn vuqaddb_s8(a: i8, b: u8) -> i8 { #[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vuqaddh_s16(a: i16, b: u16) -> i16 { - simd_extract(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0) + simd_extract!(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0) } /// Floating-point absolute value @@ -1008,7 +1008,7 @@ pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcgts_f32(a: f32, b: f32) -> u32 { - simd_extract(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) + simd_extract!(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } /// Floating-point compare greater than @@ -1019,7 +1019,7 @@ pub unsafe fn vcgts_f32(a: f32, b: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcgtd_f64(a: f64, b: f64) -> u64 { - simd_extract(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) + simd_extract!(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } /// Compare signed less than @@ -1118,7 +1118,7 @@ pub unsafe fn vcltd_u64(a: u64, b: u64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vclts_f32(a: f32, b: f32) -> u32 { - simd_extract(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) + simd_extract!(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } /// Floating-point compare less than @@ -1129,7 +1129,7 @@ pub unsafe fn vclts_f32(a: f32, b: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltd_f64(a: f64, b: f64) -> u64 { - simd_extract(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) + simd_extract!(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } /// Compare signed less than or equal @@ -1184,7 +1184,7 @@ pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcges_f32(a: f32, b: f32) -> u32 { - simd_extract(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) + simd_extract!(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } /// Floating-point compare greater than or equal @@ -1195,7 +1195,7 @@ pub unsafe fn vcges_f32(a: f32, b: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcged_f64(a: f64, b: f64) -> u64 { - simd_extract(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) + simd_extract!(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } /// Compare unsigned less than or equal @@ -1272,7 +1272,7 @@ pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcles_f32(a: f32, b: f32) -> u32 { - simd_extract(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) + simd_extract!(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } /// Floating-point compare less than or equal @@ -1283,7 +1283,7 @@ pub unsafe fn vcles_f32(a: f32, b: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcled_f64(a: f64, b: f64) -> u64 { - simd_extract(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) + simd_extract!(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } /// Compare signed greater than or equal @@ -1515,7 +1515,7 @@ pub unsafe fn vcgezd_s64(a: i64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcgezs_f32(a: f32) -> u32 { - simd_extract(vcgez_f32(vdup_n_f32(a)), 0) + simd_extract!(vcgez_f32(vdup_n_f32(a)), 0) } /// Floating-point compare greater than or equal to zero @@ -1526,7 +1526,7 @@ pub unsafe fn vcgezs_f32(a: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcgezd_f64(a: f64) -> u64 { - simd_extract(vcgez_f64(vdup_n_f64(a)), 0) + simd_extract!(vcgez_f64(vdup_n_f64(a)), 0) } /// Compare signed greater than zero @@ -1692,7 +1692,7 @@ pub unsafe fn vcgtzd_s64(a: i64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcgtzs_f32(a: f32) -> u32 { - simd_extract(vcgtz_f32(vdup_n_f32(a)), 0) + simd_extract!(vcgtz_f32(vdup_n_f32(a)), 0) } /// Floating-point compare greater than zero @@ -1703,7 +1703,7 @@ pub unsafe fn vcgtzs_f32(a: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcgtzd_f64(a: f64) -> u64 { - simd_extract(vcgtz_f64(vdup_n_f64(a)), 0) + simd_extract!(vcgtz_f64(vdup_n_f64(a)), 0) } /// Compare signed less than or equal to zero @@ -1869,7 +1869,7 @@ pub unsafe fn vclezd_s64(a: i64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vclezs_f32(a: f32) -> u32 { - simd_extract(vclez_f32(vdup_n_f32(a)), 0) + simd_extract!(vclez_f32(vdup_n_f32(a)), 0) } /// Floating-point compare less than or equal to zero @@ -1880,7 +1880,7 @@ pub unsafe fn vclezs_f32(a: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vclezd_f64(a: f64) -> u64 { - simd_extract(vclez_f64(vdup_n_f64(a)), 0) + simd_extract!(vclez_f64(vdup_n_f64(a)), 0) } /// Compare signed less than zero @@ -2046,7 +2046,7 @@ pub unsafe fn vcltzd_s64(a: i64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltzs_f32(a: f32) -> u32 { - simd_extract(vcltz_f32(vdup_n_f32(a)), 0) + simd_extract!(vcltz_f32(vdup_n_f32(a)), 0) } /// Floating-point compare less than zero @@ -2057,7 +2057,7 @@ pub unsafe fn vcltzs_f32(a: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltzd_f64(a: f64) -> u64 { - simd_extract(vcltz_f64(vdup_n_f64(a)), 0) + simd_extract!(vcltz_f64(vdup_n_f64(a)), 0) } /// Floating-point absolute compare greater than @@ -3394,7 +3394,7 @@ pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t { #[cfg_attr(test, assert_instr(fcvtxn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 { - simd_extract(vcvtx_f32_f64(vdupq_n_f64(a)), 0) + simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0) } /// Floating-point convert to lower precision narrow, rounding to odd @@ -4704,7 +4704,7 @@ pub unsafe fn vdup_lane_f64(a: float64x1_t) -> float64x1_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdup_laneq_p64(a: poly64x2_t) -> poly64x1_t { static_assert_uimm_bits!(N, 1); - transmute::(simd_extract(a, N as u32)) + transmute::(simd_extract!(a, N as u32)) } /// Set all vector lanes to the same value @@ -4717,7 +4717,7 @@ pub unsafe fn vdup_laneq_p64(a: poly64x2_t) -> poly64x1_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdup_laneq_f64(a: float64x2_t) -> float64x1_t { static_assert_uimm_bits!(N, 1); - transmute::(simd_extract(a, N as u32)) + transmute::(simd_extract!(a, N as u32)) } /// Set all vector lanes to the same value @@ -4730,7 +4730,7 @@ pub unsafe fn vdup_laneq_f64(a: float64x2_t) -> float64x1_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupb_lane_s8(a: int8x8_t) -> i8 { static_assert_uimm_bits!(N, 3); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4743,7 +4743,7 @@ pub unsafe fn vdupb_lane_s8(a: int8x8_t) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupb_laneq_s8(a: int8x16_t) -> i8 { static_assert_uimm_bits!(N, 4); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4756,7 +4756,7 @@ pub unsafe fn vdupb_laneq_s8(a: int8x16_t) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vduph_lane_s16(a: int16x4_t) -> i16 { static_assert_uimm_bits!(N, 2); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4769,7 +4769,7 @@ pub unsafe fn vduph_lane_s16(a: int16x4_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vduph_laneq_s16(a: int16x8_t) -> i16 { static_assert_uimm_bits!(N, 3); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4782,7 +4782,7 @@ pub unsafe fn vduph_laneq_s16(a: int16x8_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdups_lane_s32(a: int32x2_t) -> i32 { static_assert_uimm_bits!(N, 1); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4795,7 +4795,7 @@ pub unsafe fn vdups_lane_s32(a: int32x2_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdups_laneq_s32(a: int32x4_t) -> i32 { static_assert_uimm_bits!(N, 2); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4808,7 +4808,7 @@ pub unsafe fn vdups_laneq_s32(a: int32x4_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupd_lane_s64(a: int64x1_t) -> i64 { static_assert!(N == 0); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4821,7 +4821,7 @@ pub unsafe fn vdupd_lane_s64(a: int64x1_t) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupd_laneq_s64(a: int64x2_t) -> i64 { static_assert_uimm_bits!(N, 1); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4834,7 +4834,7 @@ pub unsafe fn vdupd_laneq_s64(a: int64x2_t) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupb_lane_u8(a: uint8x8_t) -> u8 { static_assert_uimm_bits!(N, 3); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4847,7 +4847,7 @@ pub unsafe fn vdupb_lane_u8(a: uint8x8_t) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupb_laneq_u8(a: uint8x16_t) -> u8 { static_assert_uimm_bits!(N, 4); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4860,7 +4860,7 @@ pub unsafe fn vdupb_laneq_u8(a: uint8x16_t) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vduph_lane_u16(a: uint16x4_t) -> u16 { static_assert_uimm_bits!(N, 2); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4873,7 +4873,7 @@ pub unsafe fn vduph_lane_u16(a: uint16x4_t) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vduph_laneq_u16(a: uint16x8_t) -> u16 { static_assert_uimm_bits!(N, 3); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4886,7 +4886,7 @@ pub unsafe fn vduph_laneq_u16(a: uint16x8_t) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdups_lane_u32(a: uint32x2_t) -> u32 { static_assert_uimm_bits!(N, 1); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4899,7 +4899,7 @@ pub unsafe fn vdups_lane_u32(a: uint32x2_t) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdups_laneq_u32(a: uint32x4_t) -> u32 { static_assert_uimm_bits!(N, 2); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4912,7 +4912,7 @@ pub unsafe fn vdups_laneq_u32(a: uint32x4_t) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupd_lane_u64(a: uint64x1_t) -> u64 { static_assert!(N == 0); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4925,7 +4925,7 @@ pub unsafe fn vdupd_lane_u64(a: uint64x1_t) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupd_laneq_u64(a: uint64x2_t) -> u64 { static_assert_uimm_bits!(N, 1); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4938,7 +4938,7 @@ pub unsafe fn vdupd_laneq_u64(a: uint64x2_t) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupb_lane_p8(a: poly8x8_t) -> p8 { static_assert_uimm_bits!(N, 3); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4951,7 +4951,7 @@ pub unsafe fn vdupb_lane_p8(a: poly8x8_t) -> p8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupb_laneq_p8(a: poly8x16_t) -> p8 { static_assert_uimm_bits!(N, 4); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4964,7 +4964,7 @@ pub unsafe fn vdupb_laneq_p8(a: poly8x16_t) -> p8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vduph_lane_p16(a: poly16x4_t) -> p16 { static_assert_uimm_bits!(N, 2); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4977,7 +4977,7 @@ pub unsafe fn vduph_lane_p16(a: poly16x4_t) -> p16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vduph_laneq_p16(a: poly16x8_t) -> p16 { static_assert_uimm_bits!(N, 3); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -4990,7 +4990,7 @@ pub unsafe fn vduph_laneq_p16(a: poly16x8_t) -> p16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdups_lane_f32(a: float32x2_t) -> f32 { static_assert_uimm_bits!(N, 1); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -5003,7 +5003,7 @@ pub unsafe fn vdups_lane_f32(a: float32x2_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdups_laneq_f32(a: float32x4_t) -> f32 { static_assert_uimm_bits!(N, 2); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -5016,7 +5016,7 @@ pub unsafe fn vdups_laneq_f32(a: float32x4_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupd_lane_f64(a: float64x1_t) -> f64 { static_assert!(N == 0); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Set all vector lanes to the same value @@ -5029,7 +5029,7 @@ pub unsafe fn vdupd_lane_f64(a: float64x1_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vdupd_laneq_f64(a: float64x2_t) -> f64 { static_assert_uimm_bits!(N, 1); - simd_extract(a, N as u32) + simd_extract!(a, N as u32) } /// Extract vector from pair of vectors @@ -5729,7 +5729,7 @@ pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t { #[cfg_attr(test, assert_instr(sqneg))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqnegb_s8(a: i8) -> i8 { - simd_extract(vqneg_s8(vdup_n_s8(a)), 0) + simd_extract!(vqneg_s8(vdup_n_s8(a)), 0) } /// Signed saturating negate @@ -5740,7 +5740,7 @@ pub unsafe fn vqnegb_s8(a: i8) -> i8 { #[cfg_attr(test, assert_instr(sqneg))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqnegh_s16(a: i16) -> i16 { - simd_extract(vqneg_s16(vdup_n_s16(a)), 0) + simd_extract!(vqneg_s16(vdup_n_s16(a)), 0) } /// Signed saturating negate @@ -5751,7 +5751,7 @@ pub unsafe fn vqnegh_s16(a: i16) -> i16 { #[cfg_attr(test, assert_instr(sqneg))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqnegs_s32(a: i32) -> i32 { - simd_extract(vqneg_s32(vdup_n_s32(a)), 0) + simd_extract!(vqneg_s32(vdup_n_s32(a)), 0) } /// Signed saturating negate @@ -5762,7 +5762,7 @@ pub unsafe fn vqnegs_s32(a: i32) -> i32 { #[cfg_attr(test, assert_instr(sqneg))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqnegd_s64(a: i64) -> i64 { - simd_extract(vqneg_s64(vdup_n_s64(a)), 0) + simd_extract!(vqneg_s64(vdup_n_s64(a)), 0) } /// Saturating subtract @@ -5775,7 +5775,7 @@ pub unsafe fn vqnegd_s64(a: i64) -> i64 { pub unsafe fn vqsubb_s8(a: i8, b: i8) -> i8 { let a: int8x8_t = vdup_n_s8(a); let b: int8x8_t = vdup_n_s8(b); - simd_extract(vqsub_s8(a, b), 0) + simd_extract!(vqsub_s8(a, b), 0) } /// Saturating subtract @@ -5788,7 +5788,7 @@ pub unsafe fn vqsubb_s8(a: i8, b: i8) -> i8 { pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - simd_extract(vqsub_s16(a, b), 0) + simd_extract!(vqsub_s16(a, b), 0) } /// Saturating subtract @@ -5801,7 +5801,7 @@ pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 { pub unsafe fn vqsubb_u8(a: u8, b: u8) -> u8 { let a: uint8x8_t = vdup_n_u8(a); let b: uint8x8_t = vdup_n_u8(b); - simd_extract(vqsub_u8(a, b), 0) + simd_extract!(vqsub_u8(a, b), 0) } /// Saturating subtract @@ -5814,7 +5814,7 @@ pub unsafe fn vqsubb_u8(a: u8, b: u8) -> u8 { pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 { let a: uint16x4_t = vdup_n_u16(a); let b: uint16x4_t = vdup_n_u16(b); - simd_extract(vqsub_u16(a, b), 0) + simd_extract!(vqsub_u16(a, b), 0) } /// Saturating subtract @@ -6399,7 +6399,7 @@ pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t { pub unsafe fn vqaddb_s8(a: i8, b: i8) -> i8 { let a: int8x8_t = vdup_n_s8(a); let b: int8x8_t = vdup_n_s8(b); - simd_extract(vqadd_s8(a, b), 0) + simd_extract!(vqadd_s8(a, b), 0) } /// Saturating add @@ -6412,7 +6412,7 @@ pub unsafe fn vqaddb_s8(a: i8, b: i8) -> i8 { pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - simd_extract(vqadd_s16(a, b), 0) + simd_extract!(vqadd_s16(a, b), 0) } /// Saturating add @@ -6425,7 +6425,7 @@ pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 { pub unsafe fn vqaddb_u8(a: u8, b: u8) -> u8 { let a: uint8x8_t = vdup_n_u8(a); let b: uint8x8_t = vdup_n_u8(b); - simd_extract(vqadd_u8(a, b), 0) + simd_extract!(vqadd_u8(a, b), 0) } /// Saturating add @@ -6438,7 +6438,7 @@ pub unsafe fn vqaddb_u8(a: u8, b: u8) -> u8 { pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 { let a: uint16x4_t = vdup_n_u16(a); let b: uint16x4_t = vdup_n_u16(b); - simd_extract(vqadd_u16(a, b), 0) + simd_extract!(vqadd_u16(a, b), 0) } /// Saturating add @@ -7535,7 +7535,7 @@ pub unsafe fn vld4q_lane_f64(a: *const f64, b: float64x2x4_t) - #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vst1_lane_f64(a: *mut f64, b: float64x1_t) { static_assert!(LANE == 0); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -7548,7 +7548,7 @@ pub unsafe fn vst1_lane_f64(a: *mut f64, b: float64x1_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vst1q_lane_f64(a: *mut f64, b: float64x2_t) { static_assert_uimm_bits!(LANE, 1); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures to one, two, three, or four registers @@ -8475,7 +8475,7 @@ pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmul_lane_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { static_assert!(LANE == 0); - simd_mul(a, transmute::(simd_extract(b, LANE as u32))) + simd_mul(a, transmute::(simd_extract!(b, LANE as u32))) } /// Floating-point multiply @@ -8488,7 +8488,7 @@ pub unsafe fn vmul_lane_f64(a: float64x1_t, b: float64x1_t) -> #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmul_laneq_f64(a: float64x1_t, b: float64x2_t) -> float64x1_t { static_assert_uimm_bits!(LANE, 1); - simd_mul(a, transmute::(simd_extract(b, LANE as u32))) + simd_mul(a, transmute::(simd_extract!(b, LANE as u32))) } /// Floating-point multiply @@ -8527,7 +8527,7 @@ pub unsafe fn vmulq_laneq_f64(a: float64x2_t, b: float64x2_t) - #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmuls_lane_f32(a: f32, b: float32x2_t) -> f32 { static_assert_uimm_bits!(LANE, 1); - let b: f32 = simd_extract(b, LANE as u32); + let b: f32 = simd_extract!(b, LANE as u32); a * b } @@ -8541,7 +8541,7 @@ pub unsafe fn vmuls_lane_f32(a: f32, b: float32x2_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmuls_laneq_f32(a: f32, b: float32x4_t) -> f32 { static_assert_uimm_bits!(LANE, 2); - let b: f32 = simd_extract(b, LANE as u32); + let b: f32 = simd_extract!(b, LANE as u32); a * b } @@ -8555,7 +8555,7 @@ pub unsafe fn vmuls_laneq_f32(a: f32, b: float32x4_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmuld_lane_f64(a: f64, b: float64x1_t) -> f64 { static_assert!(LANE == 0); - let b: f64 = simd_extract(b, LANE as u32); + let b: f64 = simd_extract!(b, LANE as u32); a * b } @@ -8569,7 +8569,7 @@ pub unsafe fn vmuld_lane_f64(a: f64, b: float64x1_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmuld_laneq_f64(a: f64, b: float64x2_t) -> f64 { static_assert_uimm_bits!(LANE, 1); - let b: f64 = simd_extract(b, LANE as u32); + let b: f64 = simd_extract!(b, LANE as u32); a * b } @@ -8688,7 +8688,7 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t { #[cfg_attr(test, assert_instr(pmull))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 { - vmull_p64(simd_extract(a, 1), simd_extract(b, 1)) + vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1)) } /// Multiply long @@ -8913,7 +8913,7 @@ pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmulx_lane_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { static_assert!(LANE == 0); - vmulx_f64(a, transmute::(simd_extract(b, LANE as u32))) + vmulx_f64(a, transmute::(simd_extract!(b, LANE as u32))) } /// Floating-point multiply extended @@ -8926,7 +8926,7 @@ pub unsafe fn vmulx_lane_f64(a: float64x1_t, b: float64x1_t) -> #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmulx_laneq_f64(a: float64x1_t, b: float64x2_t) -> float64x1_t { static_assert_uimm_bits!(LANE, 1); - vmulx_f64(a, transmute::(simd_extract(b, LANE as u32))) + vmulx_f64(a, transmute::(simd_extract!(b, LANE as u32))) } /// Floating-point multiply extended @@ -9049,7 +9049,7 @@ pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmulxs_lane_f32(a: f32, b: float32x2_t) -> f32 { static_assert_uimm_bits!(LANE, 1); - vmulxs_f32(a, simd_extract(b, LANE as u32)) + vmulxs_f32(a, simd_extract!(b, LANE as u32)) } /// Floating-point multiply extended @@ -9062,7 +9062,7 @@ pub unsafe fn vmulxs_lane_f32(a: f32, b: float32x2_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmulxs_laneq_f32(a: f32, b: float32x4_t) -> f32 { static_assert_uimm_bits!(LANE, 2); - vmulxs_f32(a, simd_extract(b, LANE as u32)) + vmulxs_f32(a, simd_extract!(b, LANE as u32)) } /// Floating-point multiply extended @@ -9075,7 +9075,7 @@ pub unsafe fn vmulxs_laneq_f32(a: f32, b: float32x4_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmulxd_lane_f64(a: f64, b: float64x1_t) -> f64 { static_assert!(LANE == 0); - vmulxd_f64(a, simd_extract(b, LANE as u32)) + vmulxd_f64(a, simd_extract!(b, LANE as u32)) } /// Floating-point multiply extended @@ -9088,7 +9088,7 @@ pub unsafe fn vmulxd_lane_f64(a: f64, b: float64x1_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vmulxd_laneq_f64(a: f64, b: float64x2_t) -> f64 { static_assert_uimm_bits!(LANE, 1); - vmulxd_f64(a, simd_extract(b, LANE as u32)) + vmulxd_f64(a, simd_extract!(b, LANE as u32)) } /// Floating-point fused Multiply-Add to accumulator(vector) @@ -9155,7 +9155,7 @@ pub unsafe fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfma_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - vfma_f32(a, b, vdup_n_f32(simd_extract(c, LANE as u32))) + vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-add to accumulator @@ -9168,7 +9168,7 @@ pub unsafe fn vfma_lane_f32(a: float32x2_t, b: float32x2_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfma_laneq_f32(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - vfma_f32(a, b, vdup_n_f32(simd_extract(c, LANE as u32))) + vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-add to accumulator @@ -9181,7 +9181,7 @@ pub unsafe fn vfma_laneq_f32(a: float32x2_t, b: float32x2_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfmaq_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - vfmaq_f32(a, b, vdupq_n_f32(simd_extract(c, LANE as u32))) + vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-add to accumulator @@ -9194,7 +9194,7 @@ pub unsafe fn vfmaq_lane_f32(a: float32x4_t, b: float32x4_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfmaq_laneq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - vfmaq_f32(a, b, vdupq_n_f32(simd_extract(c, LANE as u32))) + vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-add to accumulator @@ -9207,7 +9207,7 @@ pub unsafe fn vfmaq_laneq_f32(a: float32x4_t, b: float32x4_t, c #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfma_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t { static_assert!(LANE == 0); - vfma_f64(a, b, vdup_n_f64(simd_extract(c, LANE as u32))) + vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-add to accumulator @@ -9220,7 +9220,7 @@ pub unsafe fn vfma_lane_f64(a: float64x1_t, b: float64x1_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfma_laneq_f64(a: float64x1_t, b: float64x1_t, c: float64x2_t) -> float64x1_t { static_assert_uimm_bits!(LANE, 1); - vfma_f64(a, b, vdup_n_f64(simd_extract(c, LANE as u32))) + vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-add to accumulator @@ -9233,7 +9233,7 @@ pub unsafe fn vfma_laneq_f64(a: float64x1_t, b: float64x1_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfmaq_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x1_t) -> float64x2_t { static_assert!(LANE == 0); - vfmaq_f64(a, b, vdupq_n_f64(simd_extract(c, LANE as u32))) + vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-add to accumulator @@ -9246,7 +9246,7 @@ pub unsafe fn vfmaq_lane_f64(a: float64x2_t, b: float64x2_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfmaq_laneq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - vfmaq_f64(a, b, vdupq_n_f64(simd_extract(c, LANE as u32))) + vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-add to accumulator @@ -9264,7 +9264,7 @@ pub unsafe fn vfmas_lane_f32(a: f32, b: f32, c: float32x2_t) -> fn vfmas_lane_f32_(a: f32, b: f32, c: f32) -> f32; } static_assert_uimm_bits!(LANE, 1); - let c: f32 = simd_extract(c, LANE as u32); + let c: f32 = simd_extract!(c, LANE as u32); vfmas_lane_f32_(b, c, a) } @@ -9283,7 +9283,7 @@ pub unsafe fn vfmas_laneq_f32(a: f32, b: f32, c: float32x4_t) - fn vfmas_laneq_f32_(a: f32, b: f32, c: f32) -> f32; } static_assert_uimm_bits!(LANE, 2); - let c: f32 = simd_extract(c, LANE as u32); + let c: f32 = simd_extract!(c, LANE as u32); vfmas_laneq_f32_(b, c, a) } @@ -9302,7 +9302,7 @@ pub unsafe fn vfmad_lane_f64(a: f64, b: f64, c: float64x1_t) -> fn vfmad_lane_f64_(a: f64, b: f64, c: f64) -> f64; } static_assert!(LANE == 0); - let c: f64 = simd_extract(c, LANE as u32); + let c: f64 = simd_extract!(c, LANE as u32); vfmad_lane_f64_(b, c, a) } @@ -9321,7 +9321,7 @@ pub unsafe fn vfmad_laneq_f64(a: f64, b: f64, c: float64x2_t) - fn vfmad_laneq_f64_(a: f64, b: f64, c: f64) -> f64; } static_assert_uimm_bits!(LANE, 1); - let c: f64 = simd_extract(c, LANE as u32); + let c: f64 = simd_extract!(c, LANE as u32); vfmad_laneq_f64_(b, c, a) } @@ -9381,7 +9381,7 @@ pub unsafe fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfms_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - vfms_f32(a, b, vdup_n_f32(simd_extract(c, LANE as u32))) + vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-subtract to accumulator @@ -9394,7 +9394,7 @@ pub unsafe fn vfms_lane_f32(a: float32x2_t, b: float32x2_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfms_laneq_f32(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - vfms_f32(a, b, vdup_n_f32(simd_extract(c, LANE as u32))) + vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-subtract to accumulator @@ -9407,7 +9407,7 @@ pub unsafe fn vfms_laneq_f32(a: float32x2_t, b: float32x2_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfmsq_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - vfmsq_f32(a, b, vdupq_n_f32(simd_extract(c, LANE as u32))) + vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-subtract to accumulator @@ -9420,7 +9420,7 @@ pub unsafe fn vfmsq_lane_f32(a: float32x4_t, b: float32x4_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfmsq_laneq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - vfmsq_f32(a, b, vdupq_n_f32(simd_extract(c, LANE as u32))) + vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-subtract to accumulator @@ -9433,7 +9433,7 @@ pub unsafe fn vfmsq_laneq_f32(a: float32x4_t, b: float32x4_t, c #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfms_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t { static_assert!(LANE == 0); - vfms_f64(a, b, vdup_n_f64(simd_extract(c, LANE as u32))) + vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-subtract to accumulator @@ -9446,7 +9446,7 @@ pub unsafe fn vfms_lane_f64(a: float64x1_t, b: float64x1_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfms_laneq_f64(a: float64x1_t, b: float64x1_t, c: float64x2_t) -> float64x1_t { static_assert_uimm_bits!(LANE, 1); - vfms_f64(a, b, vdup_n_f64(simd_extract(c, LANE as u32))) + vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-subtract to accumulator @@ -9459,7 +9459,7 @@ pub unsafe fn vfms_laneq_f64(a: float64x1_t, b: float64x1_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfmsq_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x1_t) -> float64x2_t { static_assert!(LANE == 0); - vfmsq_f64(a, b, vdupq_n_f64(simd_extract(c, LANE as u32))) + vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-subtract to accumulator @@ -9472,7 +9472,7 @@ pub unsafe fn vfmsq_lane_f64(a: float64x2_t, b: float64x2_t, c: #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vfmsq_laneq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - vfmsq_f64(a, b, vdupq_n_f64(simd_extract(c, LANE as u32))) + vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) } /// Floating-point fused multiply-subtract to accumulator @@ -11119,8 +11119,8 @@ pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[cfg_attr(test, assert_instr(nop))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 { - let a1: f32 = simd_extract(a, 0); - let a2: f32 = simd_extract(a, 1); + let a1: f32 = simd_extract!(a, 0); + let a2: f32 = simd_extract!(a, 1); a1 + a2 } @@ -11132,8 +11132,8 @@ pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 { #[cfg_attr(test, assert_instr(nop))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vpaddd_f64(a: float64x2_t) -> f64 { - let a1: f64 = simd_extract(a, 0); - let a2: f64 = simd_extract(a, 1); + let a1: f64 = simd_extract!(a, 0); + let a2: f64 = simd_extract!(a, 1); a1 + a2 } @@ -11259,7 +11259,7 @@ pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 { pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - simd_extract(vqdmull_s16(a, b), 0) + simd_extract!(vqdmull_s16(a, b), 0) } /// Signed saturating doubling multiply long @@ -11368,7 +11368,7 @@ pub unsafe fn vqdmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmullh_lane_s16(a: i16, b: int16x4_t) -> i32 { static_assert_uimm_bits!(N, 2); - let b: i16 = simd_extract(b, N as u32); + let b: i16 = simd_extract!(b, N as u32); vqdmullh_s16(a, b) } @@ -11382,7 +11382,7 @@ pub unsafe fn vqdmullh_lane_s16(a: i16, b: int16x4_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmullh_laneq_s16(a: i16, b: int16x8_t) -> i32 { static_assert_uimm_bits!(N, 3); - let b: i16 = simd_extract(b, N as u32); + let b: i16 = simd_extract!(b, N as u32); vqdmullh_s16(a, b) } @@ -11396,7 +11396,7 @@ pub unsafe fn vqdmullh_laneq_s16(a: i16, b: int16x8_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulls_lane_s32(a: i32, b: int32x2_t) -> i64 { static_assert_uimm_bits!(N, 1); - let b: i32 = simd_extract(b, N as u32); + let b: i32 = simd_extract!(b, N as u32); vqdmulls_s32(a, b) } @@ -11410,7 +11410,7 @@ pub unsafe fn vqdmulls_lane_s32(a: i32, b: int32x2_t) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulls_laneq_s32(a: i32, b: int32x4_t) -> i64 { static_assert_uimm_bits!(N, 2); - let b: i32 = simd_extract(b, N as u32); + let b: i32 = simd_extract!(b, N as u32); vqdmulls_s32(a, b) } @@ -11605,7 +11605,7 @@ pub unsafe fn vqdmlal_high_laneq_s32(a: int64x2_t, b: int32x4_t, c #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 { let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c)); - vqadds_s32(a, simd_extract(x, 0)) + vqadds_s32(a, simd_extract!(x, 0)) } /// Signed saturating doubling multiply-add long @@ -11630,7 +11630,7 @@ pub unsafe fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlalh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - vqdmlalh_s16(a, b, simd_extract(c, LANE as u32)) + vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating doubling multiply-add long @@ -11643,7 +11643,7 @@ pub unsafe fn vqdmlalh_lane_s16(a: i32, b: i16, c: int16x4_t) - #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlalh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32 { static_assert_uimm_bits!(LANE, 3); - vqdmlalh_s16(a, b, simd_extract(c, LANE as u32)) + vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating doubling multiply-add long @@ -11656,7 +11656,7 @@ pub unsafe fn vqdmlalh_laneq_s16(a: i32, b: i16, c: int16x8_t) #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlals_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64 { static_assert_uimm_bits!(LANE, 1); - vqdmlals_s32(a, b, simd_extract(c, LANE as u32)) + vqdmlals_s32(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating doubling multiply-add long @@ -11669,7 +11669,7 @@ pub unsafe fn vqdmlals_lane_s32(a: i64, b: i32, c: int32x2_t) - #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlals_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64 { static_assert_uimm_bits!(LANE, 2); - vqdmlals_s32(a, b, simd_extract(c, LANE as u32)) + vqdmlals_s32(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating doubling multiply-subtract long @@ -11803,7 +11803,7 @@ pub unsafe fn vqdmlsl_high_laneq_s32(a: int64x2_t, b: int32x4_t, c #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 { let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c)); - vqsubs_s32(a, simd_extract(x, 0)) + vqsubs_s32(a, simd_extract!(x, 0)) } /// Signed saturating doubling multiply-subtract long @@ -11828,7 +11828,7 @@ pub unsafe fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlslh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - vqdmlslh_s16(a, b, simd_extract(c, LANE as u32)) + vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating doubling multiply-subtract long @@ -11841,7 +11841,7 @@ pub unsafe fn vqdmlslh_lane_s16(a: i32, b: i16, c: int16x4_t) - #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlslh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32 { static_assert_uimm_bits!(LANE, 3); - vqdmlslh_s16(a, b, simd_extract(c, LANE as u32)) + vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating doubling multiply-subtract long @@ -11854,7 +11854,7 @@ pub unsafe fn vqdmlslh_laneq_s16(a: i32, b: i16, c: int16x8_t) #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlsls_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64 { static_assert_uimm_bits!(LANE, 1); - vqdmlsls_s32(a, b, simd_extract(c, LANE as u32)) + vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating doubling multiply-subtract long @@ -11867,7 +11867,7 @@ pub unsafe fn vqdmlsls_lane_s32(a: i64, b: i32, c: int32x2_t) - #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmlsls_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64 { static_assert_uimm_bits!(LANE, 2); - vqdmlsls_s32(a, b, simd_extract(c, LANE as u32)) + vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating doubling multiply returning high half @@ -11880,7 +11880,7 @@ pub unsafe fn vqdmlsls_laneq_s32(a: i64, b: i32, c: int32x4_t) pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - simd_extract(vqdmulh_s16(a, b), 0) + simd_extract!(vqdmulh_s16(a, b), 0) } /// Signed saturating doubling multiply returning high half @@ -11893,7 +11893,7 @@ pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 { pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 { let a: int32x2_t = vdup_n_s32(a); let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqdmulh_s32(a, b), 0) + simd_extract!(vqdmulh_s32(a, b), 0) } /// Signed saturating doubling multiply returning high half @@ -11906,7 +11906,7 @@ pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16 { static_assert_uimm_bits!(N, 2); - let b: i16 = simd_extract(b, N as u32); + let b: i16 = simd_extract!(b, N as u32); vqdmulhh_s16(a, b) } @@ -11920,7 +11920,7 @@ pub unsafe fn vqdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16 { static_assert_uimm_bits!(N, 3); - let b: i16 = simd_extract(b, N as u32); + let b: i16 = simd_extract!(b, N as u32); vqdmulhh_s16(a, b) } @@ -11934,7 +11934,7 @@ pub unsafe fn vqdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32 { static_assert_uimm_bits!(N, 1); - let b: i32 = simd_extract(b, N as u32); + let b: i32 = simd_extract!(b, N as u32); vqdmulhs_s32(a, b) } @@ -11948,7 +11948,7 @@ pub unsafe fn vqdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32 { static_assert_uimm_bits!(N, 2); - let b: i32 = simd_extract(b, N as u32); + let b: i32 = simd_extract!(b, N as u32); vqdmulhs_s32(a, b) } @@ -11962,7 +11962,7 @@ pub unsafe fn vqdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); - vqdmulh_s16(a, vdup_n_s16(simd_extract(b, LANE as u32))) + vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) } /// Vector saturating doubling multiply high by scalar @@ -11975,7 +11975,7 @@ pub unsafe fn vqdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> i #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); - vqdmulhq_s16(a, vdupq_n_s16(simd_extract(b, LANE as u32))) + vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) } /// Vector saturating doubling multiply high by scalar @@ -11988,7 +11988,7 @@ pub unsafe fn vqdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - vqdmulh_s32(a, vdup_n_s32(simd_extract(b, LANE as u32))) + vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) } /// Vector saturating doubling multiply high by scalar @@ -12001,7 +12001,7 @@ pub unsafe fn vqdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> i #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - vqdmulhq_s32(a, vdupq_n_s32(simd_extract(b, LANE as u32))) + vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) } /// Saturating extract narrow @@ -12012,7 +12012,7 @@ pub unsafe fn vqdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> #[cfg_attr(test, assert_instr(sqxtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqmovnh_s16(a: i16) -> i8 { - simd_extract(vqmovn_s16(vdupq_n_s16(a)), 0) + simd_extract!(vqmovn_s16(vdupq_n_s16(a)), 0) } /// Saturating extract narrow @@ -12023,7 +12023,7 @@ pub unsafe fn vqmovnh_s16(a: i16) -> i8 { #[cfg_attr(test, assert_instr(sqxtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqmovns_s32(a: i32) -> i16 { - simd_extract(vqmovn_s32(vdupq_n_s32(a)), 0) + simd_extract!(vqmovn_s32(vdupq_n_s32(a)), 0) } /// Saturating extract narrow @@ -12034,7 +12034,7 @@ pub unsafe fn vqmovns_s32(a: i32) -> i16 { #[cfg_attr(test, assert_instr(uqxtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqmovnh_u16(a: u16) -> u8 { - simd_extract(vqmovn_u16(vdupq_n_u16(a)), 0) + simd_extract!(vqmovn_u16(vdupq_n_u16(a)), 0) } /// Saturating extract narrow @@ -12045,7 +12045,7 @@ pub unsafe fn vqmovnh_u16(a: u16) -> u8 { #[cfg_attr(test, assert_instr(uqxtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqmovns_u32(a: u32) -> u16 { - simd_extract(vqmovn_u32(vdupq_n_u32(a)), 0) + simd_extract!(vqmovn_u32(vdupq_n_u32(a)), 0) } /// Saturating extract narrow @@ -12154,7 +12154,7 @@ pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { #[cfg_attr(test, assert_instr(sqxtun))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqmovunh_s16(a: i16) -> u8 { - simd_extract(vqmovun_s16(vdupq_n_s16(a)), 0) + simd_extract!(vqmovun_s16(vdupq_n_s16(a)), 0) } /// Signed saturating extract unsigned narrow @@ -12165,7 +12165,7 @@ pub unsafe fn vqmovunh_s16(a: i16) -> u8 { #[cfg_attr(test, assert_instr(sqxtun))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqmovuns_s32(a: i32) -> u16 { - simd_extract(vqmovun_s32(vdupq_n_s32(a)), 0) + simd_extract!(vqmovun_s32(vdupq_n_s32(a)), 0) } /// Signed saturating extract unsigned narrow @@ -12176,7 +12176,7 @@ pub unsafe fn vqmovuns_s32(a: i32) -> u16 { #[cfg_attr(test, assert_instr(sqxtun))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqmovund_s64(a: i64) -> u32 { - simd_extract(vqmovun_s64(vdupq_n_s64(a)), 0) + simd_extract!(vqmovun_s64(vdupq_n_s64(a)), 0) } /// Signed saturating extract unsigned narrow @@ -12220,7 +12220,7 @@ pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { #[cfg_attr(test, assert_instr(sqrdmulh))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 { - simd_extract(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0) + simd_extract!(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0) } /// Signed saturating rounding doubling multiply returning high half @@ -12231,7 +12231,7 @@ pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 { #[cfg_attr(test, assert_instr(sqrdmulh))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 { - simd_extract(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0) + simd_extract!(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0) } /// Signed saturating rounding doubling multiply returning high half @@ -12244,7 +12244,7 @@ pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqrdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16 { static_assert_uimm_bits!(LANE, 2); - vqrdmulhh_s16(a, simd_extract(b, LANE as u32)) + vqrdmulhh_s16(a, simd_extract!(b, LANE as u32)) } /// Signed saturating rounding doubling multiply returning high half @@ -12257,7 +12257,7 @@ pub unsafe fn vqrdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqrdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16 { static_assert_uimm_bits!(LANE, 3); - vqrdmulhh_s16(a, simd_extract(b, LANE as u32)) + vqrdmulhh_s16(a, simd_extract!(b, LANE as u32)) } /// Signed saturating rounding doubling multiply returning high half @@ -12270,7 +12270,7 @@ pub unsafe fn vqrdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqrdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32 { static_assert_uimm_bits!(LANE, 1); - vqrdmulhs_s32(a, simd_extract(b, LANE as u32)) + vqrdmulhs_s32(a, simd_extract!(b, LANE as u32)) } /// Signed saturating rounding doubling multiply returning high half @@ -12283,7 +12283,7 @@ pub unsafe fn vqrdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqrdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - vqrdmulhs_s32(a, simd_extract(b, LANE as u32)) + vqrdmulhs_s32(a, simd_extract!(b, LANE as u32)) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -12361,7 +12361,7 @@ pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); let c: int16x4_t = vdup_n_s16(c); - simd_extract(vqrdmlah_s16(a, b, c), 0) + simd_extract!(vqrdmlah_s16(a, b, c), 0) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -12375,7 +12375,7 @@ pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 { let a: int32x2_t = vdup_n_s32(a); let b: int32x2_t = vdup_n_s32(b); let c: int32x2_t = vdup_n_s32(c); - simd_extract(vqrdmlah_s32(a, b, c), 0) + simd_extract!(vqrdmlah_s32(a, b, c), 0) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -12500,7 +12500,7 @@ pub unsafe fn vqrdmlahq_laneq_s32(a: int32x4_t, b: int32x4_t, c #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub unsafe fn vqrdmlahh_lane_s16(a: i16, b: i16, c: int16x4_t) -> i16 { static_assert_uimm_bits!(LANE, 2); - vqrdmlahh_s16(a, b, simd_extract(c, LANE as u32)) + vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -12513,7 +12513,7 @@ pub unsafe fn vqrdmlahh_lane_s16(a: i16, b: i16, c: int16x4_t) #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub unsafe fn vqrdmlahh_laneq_s16(a: i16, b: i16, c: int16x8_t) -> i16 { static_assert_uimm_bits!(LANE, 3); - vqrdmlahh_s16(a, b, simd_extract(c, LANE as u32)) + vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -12526,7 +12526,7 @@ pub unsafe fn vqrdmlahh_laneq_s16(a: i16, b: i16, c: int16x8_t) #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub unsafe fn vqrdmlahs_lane_s32(a: i32, b: i32, c: int32x2_t) -> i32 { static_assert_uimm_bits!(LANE, 1); - vqrdmlahs_s32(a, b, simd_extract(c, LANE as u32)) + vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -12539,7 +12539,7 @@ pub unsafe fn vqrdmlahs_lane_s32(a: i32, b: i32, c: int32x2_t) #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub unsafe fn vqrdmlahs_laneq_s32(a: i32, b: i32, c: int32x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - vqrdmlahs_s32(a, b, simd_extract(c, LANE as u32)) + vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating rounding doubling multiply subtract returning high half @@ -12617,7 +12617,7 @@ pub unsafe fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); let c: int16x4_t = vdup_n_s16(c); - simd_extract(vqrdmlsh_s16(a, b, c), 0) + simd_extract!(vqrdmlsh_s16(a, b, c), 0) } /// Signed saturating rounding doubling multiply subtract returning high half @@ -12631,7 +12631,7 @@ pub unsafe fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 { let a: int32x2_t = vdup_n_s32(a); let b: int32x2_t = vdup_n_s32(b); let c: int32x2_t = vdup_n_s32(c); - simd_extract(vqrdmlsh_s32(a, b, c), 0) + simd_extract!(vqrdmlsh_s32(a, b, c), 0) } /// Signed saturating rounding doubling multiply subtract returning high half @@ -12756,7 +12756,7 @@ pub unsafe fn vqrdmlshq_laneq_s32(a: int32x4_t, b: int32x4_t, c #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub unsafe fn vqrdmlshh_lane_s16(a: i16, b: i16, c: int16x4_t) -> i16 { static_assert_uimm_bits!(LANE, 2); - vqrdmlshh_s16(a, b, simd_extract(c, LANE as u32)) + vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating rounding doubling multiply subtract returning high half @@ -12769,7 +12769,7 @@ pub unsafe fn vqrdmlshh_lane_s16(a: i16, b: i16, c: int16x4_t) #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub unsafe fn vqrdmlshh_laneq_s16(a: i16, b: i16, c: int16x8_t) -> i16 { static_assert_uimm_bits!(LANE, 3); - vqrdmlshh_s16(a, b, simd_extract(c, LANE as u32)) + vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating rounding doubling multiply subtract returning high half @@ -12782,7 +12782,7 @@ pub unsafe fn vqrdmlshh_laneq_s16(a: i16, b: i16, c: int16x8_t) #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub unsafe fn vqrdmlshs_lane_s32(a: i32, b: i32, c: int32x2_t) -> i32 { static_assert_uimm_bits!(LANE, 1); - vqrdmlshs_s32(a, b, simd_extract(c, LANE as u32)) + vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating rounding doubling multiply subtract returning high half @@ -12795,7 +12795,7 @@ pub unsafe fn vqrdmlshs_lane_s32(a: i32, b: i32, c: int32x2_t) #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub unsafe fn vqrdmlshs_laneq_s32(a: i32, b: i32, c: int32x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - vqrdmlshs_s32(a, b, simd_extract(c, LANE as u32)) + vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32)) } /// Signed saturating rounding shift left @@ -12840,7 +12840,7 @@ pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 { pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 { let a: int8x8_t = vdup_n_s8(a); let b: int8x8_t = vdup_n_s8(b); - simd_extract(vqrshl_s8(a, b), 0) + simd_extract!(vqrshl_s8(a, b), 0) } /// Signed saturating rounding shift left @@ -12853,7 +12853,7 @@ pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 { pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - simd_extract(vqrshl_s16(a, b), 0) + simd_extract!(vqrshl_s16(a, b), 0) } /// Unsigned signed saturating rounding shift left @@ -12898,7 +12898,7 @@ pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 { pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 { let a: uint8x8_t = vdup_n_u8(a); let b: int8x8_t = vdup_n_s8(b); - simd_extract(vqrshl_u8(a, b), 0) + simd_extract!(vqrshl_u8(a, b), 0) } /// Unsigned signed saturating rounding shift left @@ -12911,7 +12911,7 @@ pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 { pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 { let a: uint16x4_t = vdup_n_u16(a); let b: int16x4_t = vdup_n_s16(b); - simd_extract(vqrshl_u16(a, b), 0) + simd_extract!(vqrshl_u16(a, b), 0) } /// Signed saturating rounded shift right narrow @@ -12925,7 +12925,7 @@ pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 { pub unsafe fn vqrshrnh_n_s16(a: i16) -> i8 { static_assert!(N >= 1 && N <= 8); let a: int16x8_t = vdupq_n_s16(a); - simd_extract(vqrshrn_n_s16::(a), 0) + simd_extract!(vqrshrn_n_s16::(a), 0) } /// Signed saturating rounded shift right narrow @@ -12939,7 +12939,7 @@ pub unsafe fn vqrshrnh_n_s16(a: i16) -> i8 { pub unsafe fn vqrshrns_n_s32(a: i32) -> i16 { static_assert!(N >= 1 && N <= 16); let a: int32x4_t = vdupq_n_s32(a); - simd_extract(vqrshrn_n_s32::(a), 0) + simd_extract!(vqrshrn_n_s32::(a), 0) } /// Signed saturating rounded shift right narrow @@ -12953,7 +12953,7 @@ pub unsafe fn vqrshrns_n_s32(a: i32) -> i16 { pub unsafe fn vqrshrnd_n_s64(a: i64) -> i32 { static_assert!(N >= 1 && N <= 32); let a: int64x2_t = vdupq_n_s64(a); - simd_extract(vqrshrn_n_s64::(a), 0) + simd_extract!(vqrshrn_n_s64::(a), 0) } /// Signed saturating rounded shift right narrow @@ -13006,7 +13006,7 @@ pub unsafe fn vqrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> in pub unsafe fn vqrshrnh_n_u16(a: u16) -> u8 { static_assert!(N >= 1 && N <= 8); let a: uint16x8_t = vdupq_n_u16(a); - simd_extract(vqrshrn_n_u16::(a), 0) + simd_extract!(vqrshrn_n_u16::(a), 0) } /// Unsigned saturating rounded shift right narrow @@ -13020,7 +13020,7 @@ pub unsafe fn vqrshrnh_n_u16(a: u16) -> u8 { pub unsafe fn vqrshrns_n_u32(a: u32) -> u16 { static_assert!(N >= 1 && N <= 16); let a: uint32x4_t = vdupq_n_u32(a); - simd_extract(vqrshrn_n_u32::(a), 0) + simd_extract!(vqrshrn_n_u32::(a), 0) } /// Unsigned saturating rounded shift right narrow @@ -13034,7 +13034,7 @@ pub unsafe fn vqrshrns_n_u32(a: u32) -> u16 { pub unsafe fn vqrshrnd_n_u64(a: u64) -> u32 { static_assert!(N >= 1 && N <= 32); let a: uint64x2_t = vdupq_n_u64(a); - simd_extract(vqrshrn_n_u64::(a), 0) + simd_extract!(vqrshrn_n_u64::(a), 0) } /// Unsigned saturating rounded shift right narrow @@ -13087,7 +13087,7 @@ pub unsafe fn vqrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> pub unsafe fn vqrshrunh_n_s16(a: i16) -> u8 { static_assert!(N >= 1 && N <= 8); let a: int16x8_t = vdupq_n_s16(a); - simd_extract(vqrshrun_n_s16::(a), 0) + simd_extract!(vqrshrun_n_s16::(a), 0) } /// Signed saturating rounded shift right unsigned narrow @@ -13101,7 +13101,7 @@ pub unsafe fn vqrshrunh_n_s16(a: i16) -> u8 { pub unsafe fn vqrshruns_n_s32(a: i32) -> u16 { static_assert!(N >= 1 && N <= 16); let a: int32x4_t = vdupq_n_s32(a); - simd_extract(vqrshrun_n_s32::(a), 0) + simd_extract!(vqrshrun_n_s32::(a), 0) } /// Signed saturating rounded shift right unsigned narrow @@ -13115,7 +13115,7 @@ pub unsafe fn vqrshruns_n_s32(a: i32) -> u16 { pub unsafe fn vqrshrund_n_s64(a: i64) -> u32 { static_assert!(N >= 1 && N <= 32); let a: int64x2_t = vdupq_n_s64(a); - simd_extract(vqrshrun_n_s64::(a), 0) + simd_extract!(vqrshrun_n_s64::(a), 0) } /// Signed saturating rounded shift right unsigned narrow @@ -13182,7 +13182,7 @@ pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 { let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b)); - simd_extract(c, 0) + simd_extract!(c, 0) } /// Signed saturating shift left @@ -13194,7 +13194,7 @@ pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 { let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b)); - simd_extract(c, 0) + simd_extract!(c, 0) } /// Signed saturating shift left @@ -13206,7 +13206,7 @@ pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 { let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b)); - simd_extract(c, 0) + simd_extract!(c, 0) } /// Unsigned saturating shift left @@ -13234,7 +13234,7 @@ pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 { let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b)); - simd_extract(c, 0) + simd_extract!(c, 0) } /// Unsigned saturating shift left @@ -13246,7 +13246,7 @@ pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 { let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b)); - simd_extract(c, 0) + simd_extract!(c, 0) } /// Unsigned saturating shift left @@ -13258,7 +13258,7 @@ pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 { let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b)); - simd_extract(c, 0) + simd_extract!(c, 0) } /// Signed saturating shift left @@ -13271,7 +13271,7 @@ pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlb_n_s8(a: i8) -> i8 { static_assert_uimm_bits!(N, 3); - simd_extract(vqshl_n_s8::(vdup_n_s8(a)), 0) + simd_extract!(vqshl_n_s8::(vdup_n_s8(a)), 0) } /// Signed saturating shift left @@ -13284,7 +13284,7 @@ pub unsafe fn vqshlb_n_s8(a: i8) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlh_n_s16(a: i16) -> i16 { static_assert_uimm_bits!(N, 4); - simd_extract(vqshl_n_s16::(vdup_n_s16(a)), 0) + simd_extract!(vqshl_n_s16::(vdup_n_s16(a)), 0) } /// Signed saturating shift left @@ -13297,7 +13297,7 @@ pub unsafe fn vqshlh_n_s16(a: i16) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshls_n_s32(a: i32) -> i32 { static_assert_uimm_bits!(N, 5); - simd_extract(vqshl_n_s32::(vdup_n_s32(a)), 0) + simd_extract!(vqshl_n_s32::(vdup_n_s32(a)), 0) } /// Signed saturating shift left @@ -13310,7 +13310,7 @@ pub unsafe fn vqshls_n_s32(a: i32) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshld_n_s64(a: i64) -> i64 { static_assert_uimm_bits!(N, 6); - simd_extract(vqshl_n_s64::(vdup_n_s64(a)), 0) + simd_extract!(vqshl_n_s64::(vdup_n_s64(a)), 0) } /// Unsigned saturating shift left @@ -13323,7 +13323,7 @@ pub unsafe fn vqshld_n_s64(a: i64) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlb_n_u8(a: u8) -> u8 { static_assert_uimm_bits!(N, 3); - simd_extract(vqshl_n_u8::(vdup_n_u8(a)), 0) + simd_extract!(vqshl_n_u8::(vdup_n_u8(a)), 0) } /// Unsigned saturating shift left @@ -13336,7 +13336,7 @@ pub unsafe fn vqshlb_n_u8(a: u8) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlh_n_u16(a: u16) -> u16 { static_assert_uimm_bits!(N, 4); - simd_extract(vqshl_n_u16::(vdup_n_u16(a)), 0) + simd_extract!(vqshl_n_u16::(vdup_n_u16(a)), 0) } /// Unsigned saturating shift left @@ -13349,7 +13349,7 @@ pub unsafe fn vqshlh_n_u16(a: u16) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshls_n_u32(a: u32) -> u32 { static_assert_uimm_bits!(N, 5); - simd_extract(vqshl_n_u32::(vdup_n_u32(a)), 0) + simd_extract!(vqshl_n_u32::(vdup_n_u32(a)), 0) } /// Unsigned saturating shift left @@ -13362,7 +13362,7 @@ pub unsafe fn vqshls_n_u32(a: u32) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshld_n_u64(a: u64) -> u64 { static_assert_uimm_bits!(N, 6); - simd_extract(vqshl_n_u64::(vdup_n_u64(a)), 0) + simd_extract!(vqshl_n_u64::(vdup_n_u64(a)), 0) } /// Signed saturating shift left unsigned @@ -13375,7 +13375,7 @@ pub unsafe fn vqshld_n_u64(a: u64) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlub_n_s8(a: i8) -> u8 { static_assert_uimm_bits!(N, 3); - simd_extract(vqshlu_n_s8::(vdup_n_s8(a)), 0) + simd_extract!(vqshlu_n_s8::(vdup_n_s8(a)), 0) } /// Signed saturating shift left unsigned @@ -13388,7 +13388,7 @@ pub unsafe fn vqshlub_n_s8(a: i8) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshluh_n_s16(a: i16) -> u16 { static_assert_uimm_bits!(N, 4); - simd_extract(vqshlu_n_s16::(vdup_n_s16(a)), 0) + simd_extract!(vqshlu_n_s16::(vdup_n_s16(a)), 0) } /// Signed saturating shift left unsigned @@ -13401,7 +13401,7 @@ pub unsafe fn vqshluh_n_s16(a: i16) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlus_n_s32(a: i32) -> u32 { static_assert_uimm_bits!(N, 5); - simd_extract(vqshlu_n_s32::(vdup_n_s32(a)), 0) + simd_extract!(vqshlu_n_s32::(vdup_n_s32(a)), 0) } /// Signed saturating shift left unsigned @@ -13414,7 +13414,7 @@ pub unsafe fn vqshlus_n_s32(a: i32) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshlud_n_s64(a: i64) -> u64 { static_assert_uimm_bits!(N, 6); - simd_extract(vqshlu_n_s64::(vdup_n_s64(a)), 0) + simd_extract!(vqshlu_n_s64::(vdup_n_s64(a)), 0) } /// Signed saturating shift right narrow @@ -13445,7 +13445,7 @@ pub unsafe fn vqshrnd_n_s64(a: i64) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshrnh_n_s16(a: i16) -> i8 { static_assert!(N >= 1 && N <= 8); - simd_extract(vqshrn_n_s16::(vdupq_n_s16(a)), 0) + simd_extract!(vqshrn_n_s16::(vdupq_n_s16(a)), 0) } /// Signed saturating shift right narrow @@ -13458,7 +13458,7 @@ pub unsafe fn vqshrnh_n_s16(a: i16) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshrns_n_s32(a: i32) -> i16 { static_assert!(N >= 1 && N <= 16); - simd_extract(vqshrn_n_s32::(vdupq_n_s32(a)), 0) + simd_extract!(vqshrn_n_s32::(vdupq_n_s32(a)), 0) } /// Signed saturating shift right narrow @@ -13528,7 +13528,7 @@ pub unsafe fn vqshrnd_n_u64(a: u64) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshrnh_n_u16(a: u16) -> u8 { static_assert!(N >= 1 && N <= 8); - simd_extract(vqshrn_n_u16::(vdupq_n_u16(a)), 0) + simd_extract!(vqshrn_n_u16::(vdupq_n_u16(a)), 0) } /// Unsigned saturating shift right narrow @@ -13541,7 +13541,7 @@ pub unsafe fn vqshrnh_n_u16(a: u16) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshrns_n_u32(a: u32) -> u16 { static_assert!(N >= 1 && N <= 16); - simd_extract(vqshrn_n_u32::(vdupq_n_u32(a)), 0) + simd_extract!(vqshrn_n_u32::(vdupq_n_u32(a)), 0) } /// Unsigned saturating shift right narrow @@ -13593,7 +13593,7 @@ pub unsafe fn vqshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> u #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshrunh_n_s16(a: i16) -> u8 { static_assert!(N >= 1 && N <= 8); - simd_extract(vqshrun_n_s16::(vdupq_n_s16(a)), 0) + simd_extract!(vqshrun_n_s16::(vdupq_n_s16(a)), 0) } /// Signed saturating shift right unsigned narrow @@ -13606,7 +13606,7 @@ pub unsafe fn vqshrunh_n_s16(a: i16) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshruns_n_s32(a: i32) -> u16 { static_assert!(N >= 1 && N <= 16); - simd_extract(vqshrun_n_s32::(vdupq_n_s32(a)), 0) + simd_extract!(vqshrun_n_s32::(vdupq_n_s32(a)), 0) } /// Signed saturating shift right unsigned narrow @@ -13619,7 +13619,7 @@ pub unsafe fn vqshruns_n_s32(a: i32) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqshrund_n_s64(a: i64) -> u32 { static_assert!(N >= 1 && N <= 32); - simd_extract(vqshrun_n_s64::(vdupq_n_s64(a)), 0) + simd_extract!(vqshrun_n_s64::(vdupq_n_s64(a)), 0) } /// Signed saturating shift right unsigned narrow @@ -13669,7 +13669,7 @@ pub unsafe fn vqshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> u #[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vsqaddb_u8(a: u8, b: i8) -> u8 { - simd_extract(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0) + simd_extract!(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0) } /// Unsigned saturating accumulate of signed value @@ -13680,7 +13680,7 @@ pub unsafe fn vsqaddb_u8(a: u8, b: i8) -> u8 { #[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 { - simd_extract(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0) + simd_extract!(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0) } /// Unsigned saturating accumulate of signed value @@ -14975,7 +14975,7 @@ pub unsafe fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> u #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vset_lane_f64(a: f64, b: float64x1_t) -> float64x1_t { static_assert!(LANE == 0); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -14988,7 +14988,7 @@ pub unsafe fn vset_lane_f64(a: f64, b: float64x1_t) -> float64x #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vsetq_lane_f64(a: f64, b: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Signed Shift left @@ -15396,7 +15396,7 @@ pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint32x.f64")] fn vrnd32x_f64_(a: f64) -> f64; } - transmute(vrnd32x_f64_(simd_extract(a, 0))) + transmute(vrnd32x_f64_(simd_extract!(a, 0))) } /// Floating-point round to 32-bit integer toward zero @@ -15460,7 +15460,7 @@ pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint32z.f64")] fn vrnd32z_f64_(a: f64) -> f64; } - transmute(vrnd32z_f64_(simd_extract(a, 0))) + transmute(vrnd32z_f64_(simd_extract!(a, 0))) } /// Floating-point round to 64-bit integer, using current rounding mode @@ -15524,7 +15524,7 @@ pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint64x.f64")] fn vrnd64x_f64_(a: f64) -> f64; } - transmute(vrnd64x_f64_(simd_extract(a, 0))) + transmute(vrnd64x_f64_(simd_extract!(a, 0))) } /// Floating-point round to 64-bit integer toward zero @@ -15588,7 +15588,7 @@ pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint64z.f64")] fn vrnd64z_f64_(a: f64) -> f64; } - transmute(vrnd64z_f64_(simd_extract(a, 0))) + transmute(vrnd64z_f64_(simd_extract!(a, 0))) } /// Transpose vectors @@ -17170,7 +17170,7 @@ pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t { #[cfg_attr(test, assert_instr(sqabs))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqabsb_s8(a: i8) -> i8 { - simd_extract(vqabs_s8(vdup_n_s8(a)), 0) + simd_extract!(vqabs_s8(vdup_n_s8(a)), 0) } /// Signed saturating absolute value @@ -17181,7 +17181,7 @@ pub unsafe fn vqabsb_s8(a: i8) -> i8 { #[cfg_attr(test, assert_instr(sqabs))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vqabsh_s16(a: i16) -> i16 { - simd_extract(vqabs_s16(vdup_n_s16(a)), 0) + simd_extract!(vqabs_s16(vdup_n_s16(a)), 0) } /// Signed saturating absolute value diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 567eeb37cc..7556f2915e 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -436,7 +436,7 @@ pub unsafe fn vcopy_laneq_s64( ) -> int64x1_t { static_assert!(LANE1 == 0); static_assert_uimm_bits!(LANE2, 1); - transmute::(simd_extract(b, LANE2 as u32)) + transmute::(simd_extract!(b, LANE2 as u32)) } /// Duplicate vector element to vector or scalar @@ -451,7 +451,7 @@ pub unsafe fn vcopy_laneq_u64( ) -> uint64x1_t { static_assert!(LANE1 == 0); static_assert_uimm_bits!(LANE2, 1); - transmute::(simd_extract(b, LANE2 as u32)) + transmute::(simd_extract!(b, LANE2 as u32)) } /// Duplicate vector element to vector or scalar @@ -466,7 +466,7 @@ pub unsafe fn vcopy_laneq_p64( ) -> poly64x1_t { static_assert!(LANE1 == 0); static_assert_uimm_bits!(LANE2, 1); - transmute::(simd_extract(b, LANE2 as u32)) + transmute::(simd_extract!(b, LANE2 as u32)) } /// Duplicate vector element to vector or scalar @@ -481,7 +481,7 @@ pub unsafe fn vcopy_laneq_f64( ) -> float64x1_t { static_assert!(LANE1 == 0); static_assert_uimm_bits!(LANE2, 1); - transmute::(simd_extract(b, LANE2 as u32)) + transmute::(simd_extract!(b, LANE2 as u32)) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -749,7 +749,7 @@ pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vld1_lane_f64(ptr: *const f64, src: float64x1_t) -> float64x1_t { static_assert!(LANE == 0); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -760,7 +760,7 @@ pub unsafe fn vld1_lane_f64(ptr: *const f64, src: float64x1_t) #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vld1q_lane_f64(ptr: *const f64, src: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Store multiple single-element structures from one, two, three, or four registers. @@ -2038,7 +2038,7 @@ pub unsafe fn vmovq_n_f64(value: f64) -> float64x2_t { #[cfg_attr(test, assert_instr(mov))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vget_high_f64(a: float64x2_t) -> float64x1_t { - float64x1_t(simd_extract(a, 1)) + float64x1_t(simd_extract!(a, 1)) } /// Duplicate vector element to vector or scalar @@ -2047,7 +2047,7 @@ pub unsafe fn vget_high_f64(a: float64x2_t) -> float64x1_t { #[cfg_attr(test, assert_instr(ext))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { - transmute(u64x1::new(simd_extract(a, 1))) + transmute(u64x1::new(simd_extract!(a, 1))) } /// Duplicate vector element to vector or scalar @@ -2056,7 +2056,7 @@ pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { #[cfg_attr(test, assert_instr(nop))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vget_low_f64(a: float64x2_t) -> float64x1_t { - float64x1_t(simd_extract(a, 0)) + float64x1_t(simd_extract!(a, 0)) } /// Duplicate vector element to vector or scalar @@ -2065,7 +2065,7 @@ pub unsafe fn vget_low_f64(a: float64x2_t) -> float64x1_t { #[cfg_attr(test, assert_instr(nop))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { - transmute(u64x1::new(simd_extract(a, 0))) + transmute(u64x1::new(simd_extract!(a, 0))) } /// Duplicate vector element to vector or scalar @@ -2076,7 +2076,7 @@ pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))] pub unsafe fn vget_lane_f64(v: float64x1_t) -> f64 { static_assert!(IMM5 == 0); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Duplicate vector element to vector or scalar @@ -2087,7 +2087,7 @@ pub unsafe fn vget_lane_f64(v: float64x1_t) -> f64 { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))] pub unsafe fn vgetq_lane_f64(v: float64x2_t) -> f64 { static_assert_uimm_bits!(IMM5, 1); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Vector combine diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 0c0fd53e18..631c302db9 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -4141,7 +4141,7 @@ pub unsafe fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { static_assert_uimm_bits!(N, 1); - transmute::(simd_extract(a, N as u32)) + transmute::(simd_extract!(a, N as u32)) } /// Set all vector lanes to the same value @@ -4157,7 +4157,7 @@ pub unsafe fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { static_assert_uimm_bits!(N, 1); - transmute::(simd_extract(a, N as u32)) + transmute::(simd_extract!(a, N as u32)) } /// Extract vector from pair of vectors @@ -13117,7 +13117,7 @@ vld4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13133,7 +13133,7 @@ pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { static_assert_uimm_bits!(LANE, 2); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13149,7 +13149,7 @@ pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { static_assert_uimm_bits!(LANE, 1); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13165,7 +13165,7 @@ pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { static_assert!(LANE == 0); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13181,7 +13181,7 @@ pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { static_assert_uimm_bits!(LANE, 4); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13197,7 +13197,7 @@ pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13213,7 +13213,7 @@ pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t) { static_assert_uimm_bits!(LANE, 2); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13229,7 +13229,7 @@ pub unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t) { static_assert_uimm_bits!(LANE, 1); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13245,7 +13245,7 @@ pub unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13261,7 +13261,7 @@ pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { static_assert_uimm_bits!(LANE, 2); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13277,7 +13277,7 @@ pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { static_assert_uimm_bits!(LANE, 1); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13293,7 +13293,7 @@ pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { static_assert!(LANE == 0); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13309,7 +13309,7 @@ pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { static_assert_uimm_bits!(LANE, 4); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13325,7 +13325,7 @@ pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13341,7 +13341,7 @@ pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { static_assert_uimm_bits!(LANE, 2); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13357,7 +13357,7 @@ pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { static_assert_uimm_bits!(LANE, 1); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13373,7 +13373,7 @@ pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13389,7 +13389,7 @@ pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { static_assert_uimm_bits!(LANE, 2); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13405,7 +13405,7 @@ pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { static_assert_uimm_bits!(LANE, 4); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13421,7 +13421,7 @@ pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13437,7 +13437,7 @@ pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { static_assert!(LANE == 0); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13453,7 +13453,7 @@ pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { static_assert_uimm_bits!(LANE, 1); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13469,7 +13469,7 @@ pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { static_assert_uimm_bits!(LANE, 1); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -13485,7 +13485,7 @@ pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t) { static_assert_uimm_bits!(LANE, 2); - *a = simd_extract(b, LANE as u32); + *a = simd_extract!(b, LANE as u32); } /// Store multiple single-element structures from one, two, three, or four registers @@ -21151,7 +21151,7 @@ pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 3); - vqdmulhq_s16(a, vdupq_n_s16(simd_extract(b, LANE as u32))) + vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) } /// Vector saturating doubling multiply high by scalar @@ -21167,7 +21167,7 @@ pub unsafe fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 3); - vqdmulh_s16(a, vdup_n_s16(simd_extract(b, LANE as u32))) + vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) } /// Vector saturating doubling multiply high by scalar @@ -21183,7 +21183,7 @@ pub unsafe fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - vqdmulhq_s32(a, vdupq_n_s32(simd_extract(b, LANE as u32))) + vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) } /// Vector saturating doubling multiply high by scalar @@ -21199,7 +21199,7 @@ pub unsafe fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); - vqdmulh_s32(a, vdup_n_s32(simd_extract(b, LANE as u32))) + vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) } /// Signed saturating extract narrow @@ -28751,7 +28751,7 @@ pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28767,7 +28767,7 @@ pub unsafe fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28783,7 +28783,7 @@ pub unsafe fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28799,7 +28799,7 @@ pub unsafe fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { static_assert!(LANE == 0); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28815,7 +28815,7 @@ pub unsafe fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28831,7 +28831,7 @@ pub unsafe fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28847,7 +28847,7 @@ pub unsafe fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_ #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28863,7 +28863,7 @@ pub unsafe fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_ #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { static_assert!(LANE == 0); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28879,7 +28879,7 @@ pub unsafe fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_ #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28895,7 +28895,7 @@ pub unsafe fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28911,7 +28911,7 @@ pub unsafe fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_ #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { static_assert!(LANE == 0); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28927,7 +28927,7 @@ pub unsafe fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_ #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { static_assert_uimm_bits!(LANE, 4); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28943,7 +28943,7 @@ pub unsafe fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28959,7 +28959,7 @@ pub unsafe fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28975,7 +28975,7 @@ pub unsafe fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -28991,7 +28991,7 @@ pub unsafe fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { static_assert_uimm_bits!(LANE, 4); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -29007,7 +29007,7 @@ pub unsafe fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -29023,7 +29023,7 @@ pub unsafe fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -29039,7 +29039,7 @@ pub unsafe fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -29055,7 +29055,7 @@ pub unsafe fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { static_assert_uimm_bits!(LANE, 4); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -29071,7 +29071,7 @@ pub unsafe fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -29087,7 +29087,7 @@ pub unsafe fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -29103,7 +29103,7 @@ pub unsafe fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Insert vector element from another vector element @@ -29119,7 +29119,7 @@ pub unsafe fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub unsafe fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(b, LANE as u32, a) + simd_insert!(b, LANE as u32, a) } /// Signed Shift left diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs index 2d12f5e99b..12da187067 100644 --- a/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -1294,7 +1294,7 @@ extern "unadjusted" { )] pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> int8x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1314,7 +1314,7 @@ pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> in )] pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> int8x16_t { static_assert_uimm_bits!(LANE, 4); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1334,7 +1334,7 @@ pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> )] pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1354,7 +1354,7 @@ pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> )] pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1374,7 +1374,7 @@ pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) - )] pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1394,7 +1394,7 @@ pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> )] pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1414,7 +1414,7 @@ pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) - )] pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> int64x1_t { static_assert!(LANE == 0); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1434,7 +1434,7 @@ pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> )] pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1454,7 +1454,7 @@ pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) - )] pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1474,7 +1474,7 @@ pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> u )] pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> uint8x16_t { static_assert_uimm_bits!(LANE, 4); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1494,7 +1494,7 @@ pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> )] pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) -> uint16x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1514,7 +1514,7 @@ pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) - )] pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) -> uint16x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1534,7 +1534,7 @@ pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) )] pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1554,7 +1554,7 @@ pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) - )] pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1574,7 +1574,7 @@ pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) )] pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) -> uint64x1_t { static_assert!(LANE == 0); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1594,7 +1594,7 @@ pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) - )] pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1614,7 +1614,7 @@ pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) )] pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> poly8x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1634,7 +1634,7 @@ pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> p )] pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> poly8x16_t { static_assert_uimm_bits!(LANE, 4); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1654,7 +1654,7 @@ pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> )] pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) -> poly16x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1674,7 +1674,7 @@ pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) - )] pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) -> poly16x8_t { static_assert_uimm_bits!(LANE, 3); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1696,7 +1696,7 @@ pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) )] pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) -> poly64x1_t { static_assert!(LANE == 0); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1718,7 +1718,7 @@ pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) - )] pub unsafe fn vld1q_lane_p64(ptr: *const p64, src: poly64x2_t) -> poly64x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1738,7 +1738,7 @@ pub unsafe fn vld1q_lane_p64(ptr: *const p64, src: poly64x2_t) )] pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure to one lane of one register. @@ -1758,7 +1758,7 @@ pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) )] pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - simd_insert(src, LANE as u32, *ptr) + simd_insert!(src, LANE as u32, *ptr) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -5918,7 +5918,7 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { )] pub unsafe fn vgetq_lane_u64(v: uint64x2_t) -> u64 { static_assert_uimm_bits!(IMM5, 1); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -5937,7 +5937,7 @@ pub unsafe fn vgetq_lane_u64(v: uint64x2_t) -> u64 { )] pub unsafe fn vget_lane_u64(v: uint64x1_t) -> u64 { static_assert!(IMM5 == 0); - simd_extract(v, 0) + simd_extract!(v, 0) } /// Move vector element to general-purpose register @@ -5956,7 +5956,7 @@ pub unsafe fn vget_lane_u64(v: uint64x1_t) -> u64 { )] pub unsafe fn vget_lane_u16(v: uint16x4_t) -> u16 { static_assert_uimm_bits!(IMM5, 2); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -5975,7 +5975,7 @@ pub unsafe fn vget_lane_u16(v: uint16x4_t) -> u16 { )] pub unsafe fn vget_lane_s16(v: int16x4_t) -> i16 { static_assert_uimm_bits!(IMM5, 2); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -5994,7 +5994,7 @@ pub unsafe fn vget_lane_s16(v: int16x4_t) -> i16 { )] pub unsafe fn vget_lane_p16(v: poly16x4_t) -> p16 { static_assert_uimm_bits!(IMM5, 2); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6013,7 +6013,7 @@ pub unsafe fn vget_lane_p16(v: poly16x4_t) -> p16 { )] pub unsafe fn vget_lane_u32(v: uint32x2_t) -> u32 { static_assert_uimm_bits!(IMM5, 1); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6032,7 +6032,7 @@ pub unsafe fn vget_lane_u32(v: uint32x2_t) -> u32 { )] pub unsafe fn vget_lane_s32(v: int32x2_t) -> i32 { static_assert_uimm_bits!(IMM5, 1); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Duplicate vector element to vector or scalar @@ -6051,7 +6051,7 @@ pub unsafe fn vget_lane_s32(v: int32x2_t) -> i32 { )] pub unsafe fn vget_lane_f32(v: float32x2_t) -> f32 { static_assert_uimm_bits!(IMM5, 1); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Duplicate vector element to vector or scalar @@ -6070,7 +6070,7 @@ pub unsafe fn vget_lane_f32(v: float32x2_t) -> f32 { )] pub unsafe fn vgetq_lane_f32(v: float32x4_t) -> f32 { static_assert_uimm_bits!(IMM5, 2); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6089,7 +6089,7 @@ pub unsafe fn vgetq_lane_f32(v: float32x4_t) -> f32 { )] pub unsafe fn vget_lane_p64(v: poly64x1_t) -> p64 { static_assert!(IMM5 == 0); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6108,7 +6108,7 @@ pub unsafe fn vget_lane_p64(v: poly64x1_t) -> p64 { )] pub unsafe fn vgetq_lane_p64(v: poly64x2_t) -> p64 { static_assert_uimm_bits!(IMM5, 1); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6127,7 +6127,7 @@ pub unsafe fn vgetq_lane_p64(v: poly64x2_t) -> p64 { )] pub unsafe fn vget_lane_s64(v: int64x1_t) -> i64 { static_assert!(IMM5 == 0); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6146,7 +6146,7 @@ pub unsafe fn vget_lane_s64(v: int64x1_t) -> i64 { )] pub unsafe fn vgetq_lane_s64(v: int64x2_t) -> i64 { static_assert_uimm_bits!(IMM5, 1); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6165,7 +6165,7 @@ pub unsafe fn vgetq_lane_s64(v: int64x2_t) -> i64 { )] pub unsafe fn vgetq_lane_u16(v: uint16x8_t) -> u16 { static_assert_uimm_bits!(IMM5, 3); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6184,7 +6184,7 @@ pub unsafe fn vgetq_lane_u16(v: uint16x8_t) -> u16 { )] pub unsafe fn vgetq_lane_u32(v: uint32x4_t) -> u32 { static_assert_uimm_bits!(IMM5, 2); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6203,7 +6203,7 @@ pub unsafe fn vgetq_lane_u32(v: uint32x4_t) -> u32 { )] pub unsafe fn vgetq_lane_s16(v: int16x8_t) -> i16 { static_assert_uimm_bits!(IMM5, 3); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6222,7 +6222,7 @@ pub unsafe fn vgetq_lane_s16(v: int16x8_t) -> i16 { )] pub unsafe fn vgetq_lane_p16(v: poly16x8_t) -> p16 { static_assert_uimm_bits!(IMM5, 3); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6241,7 +6241,7 @@ pub unsafe fn vgetq_lane_p16(v: poly16x8_t) -> p16 { )] pub unsafe fn vgetq_lane_s32(v: int32x4_t) -> i32 { static_assert_uimm_bits!(IMM5, 2); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6260,7 +6260,7 @@ pub unsafe fn vgetq_lane_s32(v: int32x4_t) -> i32 { )] pub unsafe fn vget_lane_u8(v: uint8x8_t) -> u8 { static_assert_uimm_bits!(IMM5, 3); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6279,7 +6279,7 @@ pub unsafe fn vget_lane_u8(v: uint8x8_t) -> u8 { )] pub unsafe fn vget_lane_s8(v: int8x8_t) -> i8 { static_assert_uimm_bits!(IMM5, 3); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6298,7 +6298,7 @@ pub unsafe fn vget_lane_s8(v: int8x8_t) -> i8 { )] pub unsafe fn vget_lane_p8(v: poly8x8_t) -> p8 { static_assert_uimm_bits!(IMM5, 3); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6317,7 +6317,7 @@ pub unsafe fn vget_lane_p8(v: poly8x8_t) -> p8 { )] pub unsafe fn vgetq_lane_u8(v: uint8x16_t) -> u8 { static_assert_uimm_bits!(IMM5, 4); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6336,7 +6336,7 @@ pub unsafe fn vgetq_lane_u8(v: uint8x16_t) -> u8 { )] pub unsafe fn vgetq_lane_s8(v: int8x16_t) -> i8 { static_assert_uimm_bits!(IMM5, 4); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Move vector element to general-purpose register @@ -6355,7 +6355,7 @@ pub unsafe fn vgetq_lane_s8(v: int8x16_t) -> i8 { )] pub unsafe fn vgetq_lane_p8(v: poly8x16_t) -> p8 { static_assert_uimm_bits!(IMM5, 4); - simd_extract(v, IMM5 as u32) + simd_extract!(v, IMM5 as u32) } /// Duplicate vector element to vector or scalar @@ -6427,7 +6427,7 @@ pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t { - int64x1_t(simd_extract(a, 1)) + int64x1_t(simd_extract!(a, 1)) } /// Duplicate vector element to vector or scalar @@ -6499,7 +6499,7 @@ pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub unsafe fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { - uint64x1_t(simd_extract(a, 1)) + uint64x1_t(simd_extract!(a, 1)) } /// Duplicate vector element to vector or scalar @@ -6621,7 +6621,7 @@ pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t { - int64x1_t(simd_extract(a, 0)) + int64x1_t(simd_extract!(a, 0)) } /// Duplicate vector element to vector or scalar @@ -6689,7 +6689,7 @@ pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { - uint64x1_t(simd_extract(a, 0)) + uint64x1_t(simd_extract!(a, 0)) } /// Duplicate vector element to vector or scalar diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 56d922b0fd..4c3bbc9395 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -76,3 +76,20 @@ macro_rules! simd_shuffle { ) }}; } + +#[allow(unused)] +macro_rules! simd_insert { + ($x:expr, $idx:expr, $val:expr $(,)?) => {{ + simd_insert($x, const { $idx }, $val) + }}; +} + +#[allow(unused)] +macro_rules! simd_extract { + ($x:expr, $idx:expr $(,)?) => {{ + simd_extract($x, const { $idx }) + }}; + ($x:expr, $idx:expr, $ty:ty $(,)?) => {{ + simd_extract::<_, $ty>($x, const { $idx }) + }}; +} diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs index 4819195dc6..f376bdbe63 100644 --- a/crates/core_arch/src/wasm32/simd128.rs +++ b/crates/core_arch/src/wasm32/simd128.rs @@ -1088,7 +1088,7 @@ pub use i64x2_shuffle as u64x2_shuffle; #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i8x16_extract_lane(a: v128) -> i8 { static_assert!(N < 16); - unsafe { simd_extract(a.as_i8x16(), N as u32) } + unsafe { simd_extract!(a.as_i8x16(), N as u32) } } /// Extracts a lane from a 128-bit vector interpreted as 16 packed u8 numbers. @@ -1102,7 +1102,7 @@ pub fn i8x16_extract_lane(a: v128) -> i8 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn u8x16_extract_lane(a: v128) -> u8 { static_assert!(N < 16); - unsafe { simd_extract(a.as_u8x16(), N as u32) } + unsafe { simd_extract!(a.as_u8x16(), N as u32) } } /// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers. @@ -1116,7 +1116,7 @@ pub fn u8x16_extract_lane(a: v128) -> u8 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i8x16_replace_lane(a: v128, val: i8) -> v128 { static_assert!(N < 16); - unsafe { simd_insert(a.as_i8x16(), N as u32, val).v128() } + unsafe { simd_insert!(a.as_i8x16(), N as u32, val).v128() } } /// Replaces a lane from a 128-bit vector interpreted as 16 packed u8 numbers. @@ -1130,7 +1130,7 @@ pub fn i8x16_replace_lane(a: v128, val: i8) -> v128 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn u8x16_replace_lane(a: v128, val: u8) -> v128 { static_assert!(N < 16); - unsafe { simd_insert(a.as_u8x16(), N as u32, val).v128() } + unsafe { simd_insert!(a.as_u8x16(), N as u32, val).v128() } } /// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers. @@ -1144,7 +1144,7 @@ pub fn u8x16_replace_lane(a: v128, val: u8) -> v128 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i16x8_extract_lane(a: v128) -> i16 { static_assert!(N < 8); - unsafe { simd_extract(a.as_i16x8(), N as u32) } + unsafe { simd_extract!(a.as_i16x8(), N as u32) } } /// Extracts a lane from a 128-bit vector interpreted as 8 packed u16 numbers. @@ -1158,7 +1158,7 @@ pub fn i16x8_extract_lane(a: v128) -> i16 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn u16x8_extract_lane(a: v128) -> u16 { static_assert!(N < 8); - unsafe { simd_extract(a.as_u16x8(), N as u32) } + unsafe { simd_extract!(a.as_u16x8(), N as u32) } } /// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers. @@ -1172,7 +1172,7 @@ pub fn u16x8_extract_lane(a: v128) -> u16 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i16x8_replace_lane(a: v128, val: i16) -> v128 { static_assert!(N < 8); - unsafe { simd_insert(a.as_i16x8(), N as u32, val).v128() } + unsafe { simd_insert!(a.as_i16x8(), N as u32, val).v128() } } /// Replaces a lane from a 128-bit vector interpreted as 8 packed u16 numbers. @@ -1186,7 +1186,7 @@ pub fn i16x8_replace_lane(a: v128, val: i16) -> v128 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn u16x8_replace_lane(a: v128, val: u16) -> v128 { static_assert!(N < 8); - unsafe { simd_insert(a.as_u16x8(), N as u32, val).v128() } + unsafe { simd_insert!(a.as_u16x8(), N as u32, val).v128() } } /// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers. @@ -1200,7 +1200,7 @@ pub fn u16x8_replace_lane(a: v128, val: u16) -> v128 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i32x4_extract_lane(a: v128) -> i32 { static_assert!(N < 4); - unsafe { simd_extract(a.as_i32x4(), N as u32) } + unsafe { simd_extract!(a.as_i32x4(), N as u32) } } /// Extracts a lane from a 128-bit vector interpreted as 4 packed u32 numbers. @@ -1226,7 +1226,7 @@ pub fn u32x4_extract_lane(a: v128) -> u32 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i32x4_replace_lane(a: v128, val: i32) -> v128 { static_assert!(N < 4); - unsafe { simd_insert(a.as_i32x4(), N as u32, val).v128() } + unsafe { simd_insert!(a.as_i32x4(), N as u32, val).v128() } } /// Replaces a lane from a 128-bit vector interpreted as 4 packed u32 numbers. @@ -1252,7 +1252,7 @@ pub fn u32x4_replace_lane(a: v128, val: u32) -> v128 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i64x2_extract_lane(a: v128) -> i64 { static_assert!(N < 2); - unsafe { simd_extract(a.as_i64x2(), N as u32) } + unsafe { simd_extract!(a.as_i64x2(), N as u32) } } /// Extracts a lane from a 128-bit vector interpreted as 2 packed u64 numbers. @@ -1278,7 +1278,7 @@ pub fn u64x2_extract_lane(a: v128) -> u64 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i64x2_replace_lane(a: v128, val: i64) -> v128 { static_assert!(N < 2); - unsafe { simd_insert(a.as_i64x2(), N as u32, val).v128() } + unsafe { simd_insert!(a.as_i64x2(), N as u32, val).v128() } } /// Replaces a lane from a 128-bit vector interpreted as 2 packed u64 numbers. @@ -1304,7 +1304,7 @@ pub fn u64x2_replace_lane(a: v128, val: u64) -> v128 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn f32x4_extract_lane(a: v128) -> f32 { static_assert!(N < 4); - unsafe { simd_extract(a.as_f32x4(), N as u32) } + unsafe { simd_extract!(a.as_f32x4(), N as u32) } } /// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers. @@ -1318,7 +1318,7 @@ pub fn f32x4_extract_lane(a: v128) -> f32 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn f32x4_replace_lane(a: v128, val: f32) -> v128 { static_assert!(N < 4); - unsafe { simd_insert(a.as_f32x4(), N as u32, val).v128() } + unsafe { simd_insert!(a.as_f32x4(), N as u32, val).v128() } } /// Extracts a lane from a 128-bit vector interpreted as 2 packed f64 numbers. @@ -1332,7 +1332,7 @@ pub fn f32x4_replace_lane(a: v128, val: f32) -> v128 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn f64x2_extract_lane(a: v128) -> f64 { static_assert!(N < 2); - unsafe { simd_extract(a.as_f64x2(), N as u32) } + unsafe { simd_extract!(a.as_f64x2(), N as u32) } } /// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers. @@ -1346,7 +1346,7 @@ pub fn f64x2_extract_lane(a: v128) -> f64 { #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn f64x2_replace_lane(a: v128, val: f64) -> v128 { static_assert!(N < 2); - unsafe { simd_insert(a.as_f64x2(), N as u32, val).v128() } + unsafe { simd_insert!(a.as_f64x2(), N as u32, val).v128() } } /// Returns a new vector with lanes selected from the lanes of the first input diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 9a05ef620e..72eb43a5c1 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -1329,7 +1329,7 @@ pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i) - #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i { static_assert_uimm_bits!(INDEX, 5); - transmute(simd_insert(a.as_i8x32(), INDEX as u32, i)) + transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) } /// Copies `a` to result, and inserts the 16-bit integer `i` into result @@ -1343,7 +1343,7 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m256i { static_assert_uimm_bits!(INDEX, 4); - transmute(simd_insert(a.as_i16x16(), INDEX as u32, i)) + transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) } /// Copies `a` to result, and inserts the 32-bit integer `i` into result @@ -1357,7 +1357,7 @@ pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m25 #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32) -> __m256i { static_assert_uimm_bits!(INDEX, 3); - transmute(simd_insert(a.as_i32x8(), INDEX as u32, i)) + transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) } /// Loads 256-bits (composed of 4 packed double-precision (64-bit) @@ -2914,7 +2914,7 @@ pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a //#[cfg_attr(test, assert_instr(movss))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 { - simd_extract(a, 0) + simd_extract!(a, 0) } // LLVM intrinsics used in the above functions diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index c4a117424d..1f7a0b771a 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -3586,7 +3586,7 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi8(a: __m256i) -> i32 { static_assert_uimm_bits!(INDEX, 5); - simd_extract::<_, u8>(a.as_u8x32(), INDEX as u32) as i32 + simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 } /// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit @@ -3602,7 +3602,7 @@ pub unsafe fn _mm256_extract_epi8(a: __m256i) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi16(a: __m256i) -> i32 { static_assert_uimm_bits!(INDEX, 4); - simd_extract::<_, u16>(a.as_u16x16(), INDEX as u32) as i32 + simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 } /// Extracts a 32-bit integer from `a`, selected with `INDEX`. @@ -3615,7 +3615,7 @@ pub unsafe fn _mm256_extract_epi16(a: __m256i) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { static_assert_uimm_bits!(INDEX, 3); - simd_extract(a.as_i32x8(), INDEX as u32) + simd_extract!(a.as_i32x8(), INDEX as u32) } /// Returns the first element of the input vector of `[4 x double]`. @@ -3626,7 +3626,7 @@ pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { //#[cfg_attr(test, assert_instr(movsd))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { - simd_extract(a, 0) + simd_extract!(a, 0) } /// Returns the first element of the input vector of `[8 x i32]`. @@ -3636,7 +3636,7 @@ pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { #[target_feature(enable = "avx2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { - simd_extract(a.as_i32x8(), 0) + simd_extract!(a.as_i32x8(), 0) } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index af95505547..3def4a39d6 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -25556,7 +25556,7 @@ pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(vmovd))] pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 { - let extract: i32 = simd_extract(a.as_i32x16(), 0); + let extract: i32 = simd_extract!(a.as_i32x16(), 0); extract } @@ -34622,12 +34622,12 @@ pub unsafe fn _mm512_set_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovss))] pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract(src, 0); + let extractsrc: f32 = simd_extract!(src, 0); let mut mov: f32 = extractsrc; if (k & 0b00000001) != 0 { - mov = simd_extract(b, 0); + mov = simd_extract!(b, 0); } - simd_insert(a, 0, mov) + simd_insert!(a, 0, mov) } /// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34640,9 +34640,9 @@ pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) - pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { let mut mov: f32 = 0.; if (k & 0b00000001) != 0 { - mov = simd_extract(b, 0); + mov = simd_extract!(b, 0); } - simd_insert(a, 0, mov) + simd_insert!(a, 0, mov) } /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34653,12 +34653,12 @@ pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsd))] pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract(src, 0); + let extractsrc: f64 = simd_extract!(src, 0); let mut mov: f64 = extractsrc; if (k & 0b00000001) != 0 { - mov = simd_extract(b, 0); + mov = simd_extract!(b, 0); } - simd_insert(a, 0, mov) + simd_insert!(a, 0, mov) } /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34671,9 +34671,9 @@ pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let mut mov: f64 = 0.; if (k & 0b00000001) != 0 { - mov = simd_extract(b, 0); + mov = simd_extract!(b, 0); } - simd_insert(a, 0, mov) + simd_insert!(a, 0, mov) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34684,14 +34684,14 @@ pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddss))] pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract(src, 0); + let extractsrc: f32 = simd_extract!(src, 0); let mut add: f32 = extractsrc; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); add = extracta + extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34704,11 +34704,11 @@ pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { let mut add: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); add = extracta + extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34719,14 +34719,14 @@ pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddsd))] pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract(src, 0); + let extractsrc: f64 = simd_extract!(src, 0); let mut add: f64 = extractsrc; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); add = extracta + extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34739,11 +34739,11 @@ pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let mut add: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); add = extracta + extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34754,14 +34754,14 @@ pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubss))] pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract(src, 0); + let extractsrc: f32 = simd_extract!(src, 0); let mut add: f32 = extractsrc; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); add = extracta - extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34774,11 +34774,11 @@ pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { let mut add: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); add = extracta - extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34789,14 +34789,14 @@ pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubsd))] pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract(src, 0); + let extractsrc: f64 = simd_extract!(src, 0); let mut add: f64 = extractsrc; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); add = extracta - extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34809,11 +34809,11 @@ pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let mut add: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); add = extracta - extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34824,14 +34824,14 @@ pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulss))] pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract(src, 0); + let extractsrc: f32 = simd_extract!(src, 0); let mut add: f32 = extractsrc; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); add = extracta * extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34844,11 +34844,11 @@ pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { let mut add: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); add = extracta * extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34859,14 +34859,14 @@ pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulsd))] pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract(src, 0); + let extractsrc: f64 = simd_extract!(src, 0); let mut add: f64 = extractsrc; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); add = extracta * extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34879,11 +34879,11 @@ pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let mut add: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); add = extracta * extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34894,14 +34894,14 @@ pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivss))] pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract(src, 0); + let extractsrc: f32 = simd_extract!(src, 0); let mut add: f32 = extractsrc; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); add = extracta / extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34914,11 +34914,11 @@ pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { let mut add: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); add = extracta / extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34929,14 +34929,14 @@ pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivsd))] pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract(src, 0); + let extractsrc: f64 = simd_extract!(src, 0); let mut add: f64 = extractsrc; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); add = extracta / extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34949,11 +34949,11 @@ pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let mut add: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); add = extracta / extractb; } - simd_insert(a, 0, add) + simd_insert!(a, 0, add) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35904,13 +35904,13 @@ pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd213ss))] pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - let mut fmadd: f32 = simd_extract(a, 0); + let mut fmadd: f32 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); fmadd = vfmadd132ss(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35923,12 +35923,12 @@ pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { let mut fmadd: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); fmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. @@ -35939,13 +35939,13 @@ pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd213ss))] pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - let mut fmadd: f32 = simd_extract(c, 0); + let mut fmadd: f32 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); fmadd = vfmadd132ss(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION); } - simd_insert(c, 0, fmadd) + simd_insert!(c, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35956,13 +35956,13 @@ pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd213sd))] pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - let mut fmadd: f64 = simd_extract(a, 0); + let mut fmadd: f64 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); fmadd = vfmadd132sd(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35975,12 +35975,12 @@ pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { let mut fmadd: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); fmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. @@ -35991,13 +35991,13 @@ pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd213sd))] pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - let mut fmadd: f64 = simd_extract(c, 0); + let mut fmadd: f64 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); fmadd = vfmadd132sd(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION); } - simd_insert(c, 0, fmadd) + simd_insert!(c, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36008,14 +36008,14 @@ pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub213ss))] pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - let mut fmsub: f32 = simd_extract(a, 0); + let mut fmsub: f32 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; fmsub = vfmadd132ss(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36028,13 +36028,13 @@ pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { let mut fmsub: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. @@ -36045,14 +36045,14 @@ pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub213ss))] pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - let mut fmsub: f32 = simd_extract(c, 0); + let mut fmsub: f32 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); let extractc = -fmsub; fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(c, 0, fmsub) + simd_insert!(c, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -36063,14 +36063,14 @@ pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub213sd))] pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - let mut fmsub: f64 = simd_extract(a, 0); + let mut fmsub: f64 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; fmsub = vfmadd132sd(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -36083,13 +36083,13 @@ pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { let mut fmsub: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. @@ -36100,14 +36100,14 @@ pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub213sd))] pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - let mut fmsub: f64 = simd_extract(c, 0); + let mut fmsub: f64 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); let extractc = -fmsub; fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(c, 0, fmsub) + simd_insert!(c, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36118,14 +36118,14 @@ pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd213ss))] pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - let mut fnmadd: f32 = simd_extract(a, 0); + let mut fnmadd: f32 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmadd; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36138,13 +36138,13 @@ pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) - pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { let mut fnmadd: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. @@ -36155,14 +36155,14 @@ pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd213ss))] pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - let mut fnmadd: f32 = simd_extract(c, 0); + let mut fnmadd: f32 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); + let extractb: f32 = simd_extract!(b, 0); fnmadd = vfmadd132ss(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION); } - simd_insert(c, 0, fnmadd) + simd_insert!(c, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -36173,14 +36173,14 @@ pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd213sd))] pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - let mut fnmadd: f64 = simd_extract(a, 0); + let mut fnmadd: f64 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmadd; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -36193,13 +36193,13 @@ pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { let mut fnmadd: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. @@ -36210,14 +36210,14 @@ pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd213sd))] pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - let mut fnmadd: f64 = simd_extract(c, 0); + let mut fnmadd: f64 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); + let extractb: f64 = simd_extract!(b, 0); fnmadd = vfmadd132sd(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION); } - simd_insert(c, 0, fnmadd) + simd_insert!(c, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36228,15 +36228,15 @@ pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub213ss))] pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - let mut fnmsub: f32 = simd_extract(a, 0); + let mut fnmsub: f32 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmsub; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36249,14 +36249,14 @@ pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) - pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { let mut fnmsub: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. @@ -36267,15 +36267,15 @@ pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub213ss))] pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - let mut fnmsub: f32 = simd_extract(c, 0); + let mut fnmsub: f32 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); + let extractb: f32 = simd_extract!(b, 0); let extractc = -fnmsub; fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(c, 0, fnmsub) + simd_insert!(c, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -36286,15 +36286,15 @@ pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub213sd))] pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - let mut fnmsub: f64 = simd_extract(a, 0); + let mut fnmsub: f64 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmsub; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -36307,14 +36307,14 @@ pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { let mut fnmsub: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. @@ -36325,15 +36325,15 @@ pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub213sd))] pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - let mut fnmsub: f64 = simd_extract(c, 0); + let mut fnmsub: f64 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); + let extractb: f64 = simd_extract!(b, 0); let extractc = -fnmsub; fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION); } - simd_insert(c, 0, fnmsub) + simd_insert!(c, 0, fnmsub) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38090,11 +38090,11 @@ pub unsafe fn _mm_maskz_scalef_round_sd( #[rustc_legacy_const_generics(3)] pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { static_assert_rounding!(ROUNDING); - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let r = vfmadd132ss(extracta, extractb, extractc, ROUNDING); - simd_insert(a, 0, r) + simd_insert!(a, 0, r) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38119,13 +38119,13 @@ pub unsafe fn _mm_mask_fmadd_round_ss( c: __m128, ) -> __m128 { static_assert_rounding!(ROUNDING); - let mut fmadd: f32 = simd_extract(a, 0); + let mut fmadd: f32 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); fmadd = vfmadd132ss(fmadd, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38152,12 +38152,12 @@ pub unsafe fn _mm_maskz_fmadd_round_ss( static_assert_rounding!(ROUNDING); let mut fmadd: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); fmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ @@ -38182,13 +38182,13 @@ pub unsafe fn _mm_mask3_fmadd_round_ss( k: __mmask8, ) -> __m128 { static_assert_rounding!(ROUNDING); - let mut fmadd: f32 = simd_extract(c, 0); + let mut fmadd: f32 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); fmadd = vfmadd132ss(extracta, extractb, fmadd, ROUNDING); } - simd_insert(c, 0, fmadd) + simd_insert!(c, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -38212,11 +38212,11 @@ pub unsafe fn _mm_fmadd_round_sd( c: __m128d, ) -> __m128d { static_assert_rounding!(ROUNDING); - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38241,13 +38241,13 @@ pub unsafe fn _mm_mask_fmadd_round_sd( c: __m128d, ) -> __m128d { static_assert_rounding!(ROUNDING); - let mut fmadd: f64 = simd_extract(a, 0); + let mut fmadd: f64 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); fmadd = vfmadd132sd(fmadd, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38274,12 +38274,12 @@ pub unsafe fn _mm_maskz_fmadd_round_sd( static_assert_rounding!(ROUNDING); let mut fmadd: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fmadd) + simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ @@ -38304,13 +38304,13 @@ pub unsafe fn _mm_mask3_fmadd_round_sd( k: __mmask8, ) -> __m128d { static_assert_rounding!(ROUNDING); - let mut fmadd: f64 = simd_extract(c, 0); + let mut fmadd: f64 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); fmadd = vfmadd132sd(extracta, extractb, fmadd, ROUNDING); } - simd_insert(c, 0, fmadd) + simd_insert!(c, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38330,12 +38330,12 @@ pub unsafe fn _mm_mask3_fmadd_round_sd( #[rustc_legacy_const_generics(3)] pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { static_assert_rounding!(ROUNDING); - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; let fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38360,14 +38360,14 @@ pub unsafe fn _mm_mask_fmsub_round_ss( c: __m128, ) -> __m128 { static_assert_rounding!(ROUNDING); - let mut fmsub: f32 = simd_extract(a, 0); + let mut fmsub: f32 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; fmsub = vfmadd132ss(fmsub, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38394,13 +38394,13 @@ pub unsafe fn _mm_maskz_fmsub_round_ss( static_assert_rounding!(ROUNDING); let mut fmsub: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ @@ -38425,14 +38425,14 @@ pub unsafe fn _mm_mask3_fmsub_round_ss( k: __mmask8, ) -> __m128 { static_assert_rounding!(ROUNDING); - let mut fmsub: f32 = simd_extract(c, 0); + let mut fmsub: f32 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); - let extractb: f32 = simd_extract(b, 0); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); let extractc = -fmsub; fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } - simd_insert(c, 0, fmsub) + simd_insert!(c, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -38456,12 +38456,12 @@ pub unsafe fn _mm_fmsub_round_sd( c: __m128d, ) -> __m128d { static_assert_rounding!(ROUNDING); - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; let fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38486,14 +38486,14 @@ pub unsafe fn _mm_mask_fmsub_round_sd( c: __m128d, ) -> __m128d { static_assert_rounding!(ROUNDING); - let mut fmsub: f64 = simd_extract(a, 0); + let mut fmsub: f64 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; fmsub = vfmadd132sd(fmsub, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38520,13 +38520,13 @@ pub unsafe fn _mm_maskz_fmsub_round_sd( static_assert_rounding!(ROUNDING); let mut fmsub: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fmsub) + simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ @@ -38551,14 +38551,14 @@ pub unsafe fn _mm_mask3_fmsub_round_sd( k: __mmask8, ) -> __m128d { static_assert_rounding!(ROUNDING); - let mut fmsub: f64 = simd_extract(c, 0); + let mut fmsub: f64 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); - let extractb: f64 = simd_extract(b, 0); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); let extractc = -fmsub; fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } - simd_insert(c, 0, fmsub) + simd_insert!(c, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38578,12 +38578,12 @@ pub unsafe fn _mm_mask3_fmsub_round_sd( #[rustc_legacy_const_generics(3)] pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { static_assert_rounding!(ROUNDING); - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38608,14 +38608,14 @@ pub unsafe fn _mm_mask_fnmadd_round_ss( c: __m128, ) -> __m128 { static_assert_rounding!(ROUNDING); - let mut fnmadd: f32 = simd_extract(a, 0); + let mut fnmadd: f32 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmadd; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38642,13 +38642,13 @@ pub unsafe fn _mm_maskz_fnmadd_round_ss( static_assert_rounding!(ROUNDING); let mut fnmadd: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ @@ -38673,14 +38673,14 @@ pub unsafe fn _mm_mask3_fnmadd_round_ss( k: __mmask8, ) -> __m128 { static_assert_rounding!(ROUNDING); - let mut fnmadd: f32 = simd_extract(c, 0); + let mut fnmadd: f32 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); + let extractb: f32 = simd_extract!(b, 0); fnmadd = vfmadd132ss(extracta, extractb, fnmadd, ROUNDING); } - simd_insert(c, 0, fnmadd) + simd_insert!(c, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -38704,12 +38704,12 @@ pub unsafe fn _mm_fnmadd_round_sd( c: __m128d, ) -> __m128d { static_assert_rounding!(ROUNDING); - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38734,14 +38734,14 @@ pub unsafe fn _mm_mask_fnmadd_round_sd( c: __m128d, ) -> __m128d { static_assert_rounding!(ROUNDING); - let mut fnmadd: f64 = simd_extract(a, 0); + let mut fnmadd: f64 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmadd; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38768,13 +38768,13 @@ pub unsafe fn _mm_maskz_fnmadd_round_sd( static_assert_rounding!(ROUNDING); let mut fnmadd: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fnmadd) + simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ @@ -38799,14 +38799,14 @@ pub unsafe fn _mm_mask3_fnmadd_round_sd( k: __mmask8, ) -> __m128d { static_assert_rounding!(ROUNDING); - let mut fnmadd: f64 = simd_extract(c, 0); + let mut fnmadd: f64 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); + let extractb: f64 = simd_extract!(b, 0); fnmadd = vfmadd132sd(extracta, extractb, fnmadd, ROUNDING); } - simd_insert(c, 0, fnmadd) + simd_insert!(c, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38826,13 +38826,13 @@ pub unsafe fn _mm_mask3_fnmadd_round_sd( #[rustc_legacy_const_generics(3)] pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { static_assert_rounding!(ROUNDING); - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; let fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38857,15 +38857,15 @@ pub unsafe fn _mm_mask_fnmsub_round_ss( c: __m128, ) -> __m128 { static_assert_rounding!(ROUNDING); - let mut fnmsub: f32 = simd_extract(a, 0); + let mut fnmsub: f32 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmsub; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38892,14 +38892,14 @@ pub unsafe fn _mm_maskz_fnmsub_round_ss( static_assert_rounding!(ROUNDING); let mut fnmsub: f32 = 0.; if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); - let extractc: f32 = simd_extract(c, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); let extractc = -extractc; fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ @@ -38924,15 +38924,15 @@ pub unsafe fn _mm_mask3_fnmsub_round_ss( k: __mmask8, ) -> __m128 { static_assert_rounding!(ROUNDING); - let mut fnmsub: f32 = simd_extract(c, 0); + let mut fnmsub: f32 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract(a, 0); + let extracta: f32 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f32 = simd_extract(b, 0); + let extractb: f32 = simd_extract!(b, 0); let extractc = -fnmsub; fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } - simd_insert(c, 0, fnmsub) + simd_insert!(c, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -38956,13 +38956,13 @@ pub unsafe fn _mm_fnmsub_round_sd( c: __m128d, ) -> __m128d { static_assert_rounding!(ROUNDING); - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; let fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38987,15 +38987,15 @@ pub unsafe fn _mm_mask_fnmsub_round_sd( c: __m128d, ) -> __m128d { static_assert_rounding!(ROUNDING); - let mut fnmsub: f64 = simd_extract(a, 0); + let mut fnmsub: f64 = simd_extract!(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmsub; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -39022,14 +39022,14 @@ pub unsafe fn _mm_maskz_fnmsub_round_sd( static_assert_rounding!(ROUNDING); let mut fnmsub: f64 = 0.; if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); - let extractc: f64 = simd_extract(c, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); let extractc = -extractc; fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } - simd_insert(a, 0, fnmsub) + simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ @@ -39054,15 +39054,15 @@ pub unsafe fn _mm_mask3_fnmsub_round_sd( k: __mmask8, ) -> __m128d { static_assert_rounding!(ROUNDING); - let mut fnmsub: f64 = simd_extract(c, 0); + let mut fnmsub: f64 = simd_extract!(c, 0); if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract(a, 0); + let extracta: f64 = simd_extract!(a, 0); let extracta = -extracta; - let extractb: f64 = simd_extract(b, 0); + let extractb: f64 = simd_extract!(b, 0); let extractc = -fnmsub; fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } - simd_insert(c, 0, fnmsub) + simd_insert!(c, 0, fnmsub) } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting. @@ -39079,8 +39079,8 @@ pub unsafe fn _mm_fixupimm_ss(a: __m128, b: __m128, c: __m128i) let b = b.as_f32x4(); let c = c.as_i32x4(); let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f32 = simd_extract(r, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39103,8 +39103,8 @@ pub unsafe fn _mm_mask_fixupimm_ss( let b = b.as_f32x4(); let c = c.as_i32x4(); let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f32 = simd_extract(fixupimm, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f32 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39127,8 +39127,8 @@ pub unsafe fn _mm_maskz_fixupimm_ss( let b = b.as_f32x4(); let c = c.as_i32x4(); let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f32 = simd_extract(fixupimm, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f32 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39146,8 +39146,8 @@ pub unsafe fn _mm_fixupimm_sd(a: __m128d, b: __m128d, c: __m128 let b = b.as_f64x2(); let c = c.as_i64x2(); let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f64 = simd_extract(fixupimm, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39170,8 +39170,8 @@ pub unsafe fn _mm_mask_fixupimm_sd( let b = b.as_f64x2(); let c = c.as_i64x2(); let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f64 = simd_extract(fixupimm, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39194,8 +39194,8 @@ pub unsafe fn _mm_maskz_fixupimm_sd( let b = b.as_f64x2(); let c = c.as_i64x2(); let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f64 = simd_extract(fixupimm, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39219,8 +39219,8 @@ pub unsafe fn _mm_fixupimm_round_ss( let b = b.as_f32x4(); let c = c.as_i32x4(); let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE); - let fixupimm: f32 = simd_extract(r, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39245,8 +39245,8 @@ pub unsafe fn _mm_mask_fixupimm_round_ss( let b = b.as_f32x4(); let c = c.as_i32x4(); let r = vfixupimmss(a, b, c, IMM8, k, SAE); - let fixupimm: f32 = simd_extract(r, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39271,8 +39271,8 @@ pub unsafe fn _mm_maskz_fixupimm_round_ss( let b = b.as_f32x4(); let c = c.as_i32x4(); let r = vfixupimmssz(a, b, c, IMM8, k, SAE); - let fixupimm: f32 = simd_extract(r, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39296,8 +39296,8 @@ pub unsafe fn _mm_fixupimm_round_sd( let b = b.as_f64x2(); let c = c.as_i64x2(); let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE); - let fixupimm: f64 = simd_extract(r, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39322,8 +39322,8 @@ pub unsafe fn _mm_mask_fixupimm_round_sd( let b = b.as_f64x2(); let c = c.as_i64x2(); let r = vfixupimmsd(a, b, c, IMM8, k, SAE); - let fixupimm: f64 = simd_extract(r, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39348,8 +39348,8 @@ pub unsafe fn _mm_maskz_fixupimm_round_sd( let b = b.as_f64x2(); let c = c.as_i64x2(); let r = vfixupimmsdz(a, b, c, IMM8, k, SAE); - let fixupimm: f64 = simd_extract(r, 0); - let r = simd_insert(a, 0, fixupimm); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); transmute(r) } @@ -39800,7 +39800,7 @@ pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32) -> __m #[cfg_attr(test, assert_instr(vcvtsi2ss))] pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 { let b = b as f32; - simd_insert(a, 0, b) + simd_insert!(a, 0, b) } /// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -39812,7 +39812,7 @@ pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 { #[cfg_attr(test, assert_instr(vcvtsi2sd))] pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d { let b = b as f64; - simd_insert(a, 0, b) + simd_insert!(a, 0, b) } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ @@ -39958,7 +39958,7 @@ pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 { #[cfg_attr(test, assert_instr(vcvtusi2ss))] pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 { let b = b as f32; - simd_insert(a, 0, b) + simd_insert!(a, 0, b) } /// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -39970,7 +39970,7 @@ pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 { #[cfg_attr(test, assert_instr(vcvtusi2sd))] pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d { let b = b as f64; - simd_insert(a, 0, b) + simd_insert!(a, 0, b) } /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\ diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 17c4c07e94..2ec0ad4c1b 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -853,7 +853,7 @@ pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 { // no-op, and on Windows it's just a `mov`. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 { - simd_extract(a, 0) + simd_extract!(a, 0) } /// Converts a 32 bit integer to a 32 bit float. The result vector is the input @@ -1224,7 +1224,7 @@ pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i { #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { - *p = simd_extract(a, 0); + *p = simd_extract!(a, 0); } /// Stores the lowest 32 bit float of `a` repeated four times into *aligned* diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index afc2aaede1..d1bb92ce6a 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -955,7 +955,7 @@ pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { #[cfg_attr(test, assert_instr(cvtsi2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { - simd_insert(a, 0, b as f64) + simd_insert!(a, 0, b as f64) } /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) @@ -1000,7 +1000,7 @@ pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { - simd_extract(a.as_i32x4(), 0) + simd_extract!(a.as_i32x4(), 0) } /// Sets packed 64-bit integers with the supplied values, from highest to @@ -1399,7 +1399,7 @@ pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi16(a: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 3); - simd_extract::<_, u16>(a.as_u16x8(), IMM8 as u32) as i32 + simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 } /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. @@ -1412,7 +1412,7 @@ pub unsafe fn _mm_extract_epi16(a: __m128i) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 3); - transmute(simd_insert(a.as_i16x8(), IMM8 as u32, i as i16)) + transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) } /// Returns a mask of the most significant bit of each element in `a`. @@ -1623,7 +1623,7 @@ pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(addsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) + simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } /// Adds packed double-precision (64-bit) floating-point elements in `a` and @@ -1647,7 +1647,7 @@ pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(divsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) + simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) } /// Divide packed double-precision (64-bit) floating-point elements in `a` by @@ -1719,7 +1719,7 @@ pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(mulsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) + simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -1743,7 +1743,7 @@ pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(sqrtsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b))) + simd_insert!(a, 0, _mm_cvtsd_f64(sqrtsd(b))) } /// Returns a new vector with the square root of each of the values in `a`. @@ -1766,7 +1766,7 @@ pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(subsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) + simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) } /// Subtract packed double-precision (64-bit) floating-point elements in `b` @@ -1879,7 +1879,7 @@ pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(cmpltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) + simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) } /// Returns a new vector with the low element of `a` replaced by the @@ -1891,7 +1891,7 @@ pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(cmplesd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) + simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) } /// Returns a new vector with the low element of `a` replaced by the result @@ -1966,7 +1966,7 @@ pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(cmpnltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) + simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) } /// Returns a new vector with the low element of `a` replaced by the @@ -1978,7 +1978,7 @@ pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(cmpnlesd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) + simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) } /// Compares corresponding elements in `a` and `b` for equality. @@ -2319,7 +2319,7 @@ pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { - simd_extract(a, 0) + simd_extract!(a, 0) } /// Converts the lower single-precision (32-bit) floating-point element in `b` @@ -2493,7 +2493,7 @@ pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { #[cfg_attr(test, assert_instr(movhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { - _mm_setr_pd(simd_extract(a, 0), *mem_addr) + _mm_setr_pd(simd_extract!(a, 0), *mem_addr) } /// Loads a double-precision value into the low-order bits of a 128-bit @@ -2506,7 +2506,7 @@ pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { #[cfg_attr(test, assert_instr(movlps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { - _mm_setr_pd(*mem_addr, simd_extract(a, 1)) + _mm_setr_pd(*mem_addr, simd_extract!(a, 1)) } /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit @@ -2533,7 +2533,7 @@ pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { - *mem_addr = simd_extract(a, 0) + *mem_addr = simd_extract!(a, 0) } /// Stores 128-bits (composed of 2 packed double-precision (64-bit) @@ -2615,7 +2615,7 @@ pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { - *mem_addr = simd_extract(a, 1); + *mem_addr = simd_extract!(a, 1); } /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a @@ -2627,7 +2627,7 @@ pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { - *mem_addr = simd_extract(a, 0); + *mem_addr = simd_extract!(a, 0); } /// Loads a double-precision (64-bit) floating-point element from memory @@ -2713,7 +2713,7 @@ pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d #[cfg_attr(test, assert_instr(movsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { - _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1)) + _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) } /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs index af51a53feb..7fc3c79428 100644 --- a/crates/core_arch/src/x86/sse41.rs +++ b/crates/core_arch/src/x86/sse41.rs @@ -201,7 +201,7 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_ps(a: __m128) -> i32 { static_assert_uimm_bits!(IMM8, 2); - simd_extract::<_, f32>(a, IMM8 as u32).to_bits() as i32 + simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 } /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit @@ -217,7 +217,7 @@ pub unsafe fn _mm_extract_ps(a: __m128) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi8(a: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 4); - simd_extract::<_, u8>(a.as_u8x16(), IMM8 as u32) as i32 + simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 } /// Extracts an 32-bit integer from `a` selected with `IMM8` @@ -233,7 +233,7 @@ pub unsafe fn _mm_extract_epi8(a: __m128i) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi32(a: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 2); - simd_extract::<_, i32>(a.as_i32x4(), IMM8 as u32) + simd_extract!(a.as_i32x4(), IMM8 as u32, i32) } /// Select a single value in `a` to store at some position in `b`, @@ -281,7 +281,7 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 4); - transmute(simd_insert(a.as_i8x16(), IMM8 as u32, i as i8)) + transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) } /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a @@ -295,7 +295,7 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 2); - transmute(simd_insert(a.as_i32x4(), IMM8 as u32, i)) + transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) } /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum diff --git a/crates/core_arch/src/x86_64/avx.rs b/crates/core_arch/src/x86_64/avx.rs index f699f61648..5715097d72 100644 --- a/crates/core_arch/src/x86_64/avx.rs +++ b/crates/core_arch/src/x86_64/avx.rs @@ -29,7 +29,7 @@ use crate::{ #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m256i { static_assert_uimm_bits!(INDEX, 2); - transmute(simd_insert(a.as_i64x4(), INDEX as u32, i)) + transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i)) } #[cfg(test)] diff --git a/crates/core_arch/src/x86_64/avx2.rs b/crates/core_arch/src/x86_64/avx2.rs index 3388568eb4..b3b1431e56 100644 --- a/crates/core_arch/src/x86_64/avx2.rs +++ b/crates/core_arch/src/x86_64/avx2.rs @@ -30,7 +30,7 @@ use crate::core_arch::{simd_llvm::*, x86::*}; #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi64(a: __m256i) -> i64 { static_assert_uimm_bits!(INDEX, 2); - simd_extract(a.as_i64x4(), INDEX as u32) + simd_extract!(a.as_i64x4(), INDEX as u32) } #[cfg(test)] diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index d31110d758..fa58a443dc 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -59,7 +59,7 @@ pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 { #[cfg_attr(test, assert_instr(vcvtsi2ss))] pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 { let b = b as f32; - simd_insert(a, 0, b) + simd_insert!(a, 0, b) } /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -71,7 +71,7 @@ pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 { #[cfg_attr(test, assert_instr(vcvtsi2sd))] pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d { let b = b as f64; - simd_insert(a, 0, b) + simd_insert!(a, 0, b) } /// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -83,7 +83,7 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d { #[cfg_attr(test, assert_instr(vcvtusi2ss))] pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { let b = b as f32; - simd_insert(a, 0, b) + simd_insert!(a, 0, b) } /// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -95,7 +95,7 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { #[cfg_attr(test, assert_instr(vcvtusi2sd))] pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { let b = b as f64; - simd_insert(a, 0, b) + simd_insert!(a, 0, b) } /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. diff --git a/crates/core_arch/src/x86_64/sse2.rs b/crates/core_arch/src/x86_64/sse2.rs index 9619cb7480..f0c7623ac0 100644 --- a/crates/core_arch/src/x86_64/sse2.rs +++ b/crates/core_arch/src/x86_64/sse2.rs @@ -107,7 +107,7 @@ pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { #[cfg_attr(all(test, not(windows)), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { - simd_extract(a.as_i64x2(), 0) + simd_extract!(a.as_i64x2(), 0) } /// Returns the lowest element of `a`. @@ -130,7 +130,7 @@ pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { #[cfg_attr(test, assert_instr(cvtsi2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { - simd_insert(a, 0, b as f64) + simd_insert!(a, 0, b as f64) } /// Returns `a` with its lower element replaced by `b` after converting it to diff --git a/crates/core_arch/src/x86_64/sse41.rs b/crates/core_arch/src/x86_64/sse41.rs index d815a69a7e..49c6d95943 100644 --- a/crates/core_arch/src/x86_64/sse41.rs +++ b/crates/core_arch/src/x86_64/sse41.rs @@ -18,7 +18,7 @@ use stdarch_test::assert_instr; #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { static_assert_uimm_bits!(IMM1, 1); - simd_extract(a.as_i64x2(), IMM1 as u32) + simd_extract!(a.as_i64x2(), IMM1 as u32) } /// Returns a copy of `a` with the 64-bit integer from `i` inserted at a @@ -32,7 +32,7 @@ pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64) -> __m128i { static_assert_uimm_bits!(IMM1, 1); - transmute(simd_insert(a.as_i64x2(), IMM1 as u32, i)) + transmute(simd_insert!(a.as_i64x2(), IMM1 as u32, i)) } #[cfg(test)] diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index 559b8f4473..d6a2212cf2 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -156,7 +156,7 @@ generate float*_t /// Floating-point absolute difference name = vabd -multi_fn = simd_extract, {vabd-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vabd-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 1.0 b = 9.0 validate 8.0 @@ -341,7 +341,7 @@ generate i64:u64, u64 /// Floating-point compare equal name = vceq -multi_fn = simd_extract, {vceq-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vceq-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 1. b = 2. validate 0 @@ -390,7 +390,7 @@ generate i64:u64, u64 /// Floating-point compare bitwise equal to zero name = vceqz -multi_fn = simd_extract, {vceqz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vceqz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 a = 1. validate 0 @@ -453,7 +453,7 @@ generate i32:u32:i32, i64:u64:i64 /// Signed saturating accumulate of unsigned value name = vuqadd out-suffix -multi_fn = simd_extract, {vuqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vuqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 1 b = 2 validate 3 @@ -530,7 +530,7 @@ generate i64:u64, u64 /// Floating-point compare greater than name = vcgt -multi_fn = simd_extract, {vcgt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vcgt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 1. b = 2. validate 0 @@ -592,7 +592,7 @@ generate i64:u64, u64 /// Floating-point compare less than name = vclt -multi_fn = simd_extract, {vclt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vclt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 2. b = 1. validate 0 @@ -629,7 +629,7 @@ generate i64:u64, u64 /// Floating-point compare greater than or equal name = vcge -multi_fn = simd_extract, {vcge-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vcge-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 1. b = 2. validate 0 @@ -674,7 +674,7 @@ generate i64:u64, u64 /// Floating-point compare less than or equal name = vcle -multi_fn = simd_extract, {vcle-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vcle-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 2. b = 1. validate 0 @@ -756,7 +756,7 @@ generate i64:u64 /// Floating-point compare greater than or equal to zero name = vcgez -multi_fn = simd_extract, {vcgez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vcgez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 a = -1. validate 0 @@ -794,7 +794,7 @@ generate i64:u64 /// Floating-point compare greater than zero name = vcgtz -multi_fn = simd_extract, {vcgtz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vcgtz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 a = -1. validate 0 @@ -832,7 +832,7 @@ generate i64:u64 /// Floating-point compare less than or equal to zero name = vclez -multi_fn = simd_extract, {vclez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vclez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 a = 2. validate 0 @@ -870,7 +870,7 @@ generate i64:u64 /// Floating-point compare less than zero name = vcltz -multi_fn = simd_extract, {vcltz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vcltz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 a = 2. validate 0 @@ -1190,7 +1190,7 @@ generate float64x2_t:float32x2_t /// Floating-point convert to lower precision narrow, rounding to odd name = vcvtx double-suffixes -multi_fn = simd_extract, {vcvtx-_f32_f64-noext, {vdupq_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vcvtx-_f32_f64-noext, {vdupq_n-in_ntt-noext, a}}, 0 a = -1.0 validate -1.0 @@ -1513,7 +1513,7 @@ name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = transmute--, {simd_extract, a, N as u32} +multi_fn = transmute--, {simd_extract!, a, N as u32} a = 0, 1 n = HFLEN validate 1 @@ -1529,7 +1529,7 @@ name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = transmute--, {simd_extract, a, N as u32} +multi_fn = transmute--, {simd_extract!, a, N as u32} a = 0., 1. n = HFLEN validate 1. @@ -1542,7 +1542,7 @@ name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_extract, a, N as u32 +multi_fn = simd_extract!, a, N as u32 a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 n = HFLEN validate 1 @@ -1557,7 +1557,7 @@ name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_extract, a, N as u32 +multi_fn = simd_extract!, a, N as u32 a = 1., 1., 1., 4. n = HFLEN validate 1. @@ -2092,7 +2092,7 @@ generate int*_t /// Signed saturating negate name = vqneg -multi_fn = simd_extract, {vqneg-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqneg-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 a = 1 validate -1 @@ -2121,7 +2121,7 @@ generate int*_t, int64x*_t name = vqsub multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b -multi_fn = simd_extract, {vqsub-in_ntt-noext, a, b}, 0 +multi_fn = simd_extract!, {vqsub-in_ntt-noext, a, b}, 0 a = 42 b = 1 validate 41 @@ -2300,7 +2300,7 @@ generate int*_t, int64x*_t name = vqadd multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b -multi_fn = simd_extract, {vqadd-in_ntt-noext, a, b}, 0 +multi_fn = simd_extract!, {vqadd-in_ntt-noext, a, b}, 0 a = 42 b = 1 validate 43 @@ -2984,7 +2984,7 @@ generate *const f32:float32x2x4_t:float32x2x4_t, *const f32:float32x4x4_t:float3 name = vst1 in1-lane-nox multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = *a, {simd_extract, b, LANE as u32} +multi_fn = *a, {simd_extract!, b, LANE as u32} constn = LANE a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 n = 0 @@ -3005,7 +3005,7 @@ generate *mut p64:poly64x1_t:void, *mut p64:poly64x2_t:void name = vst1 in1-lane-nox multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = *a, {simd_extract, b, LANE as u32} +multi_fn = *a, {simd_extract!, b, LANE as u32} constn = LANE a = 0., 1., 2., 3., 4., 5., 6., 7., 8. n = 0 @@ -3696,7 +3696,7 @@ name = vmul lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_mul, a, {transmute--, {simd_extract, b, LANE as u32}} +multi_fn = simd_mul, a, {transmute--, {simd_extract!, b, LANE as u32}} a = 1., 2., 3., 4. b = 2., 0., 0., 0. n = 0 @@ -3726,7 +3726,7 @@ generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2 name = vmuls_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_extract, b:f32, b, LANE as u32 +multi_fn = simd_extract!, b:f32, b, LANE as u32 multi_fn = a * b a = 1. b = 2., 0., 0., 0. @@ -3739,7 +3739,7 @@ generate f32:float32x2_t:f32, f32:float32x4_t:f32 name = vmuld_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_extract, b:f64, b, LANE as u32 +multi_fn = simd_extract!, b:f64, b, LANE as u32 multi_fn = a * b a = 1. b = 2., 0. @@ -3845,7 +3845,7 @@ generate poly8x16_t:poly8x16_t:poly16x8_t /// Polynomial multiply long name = vmull_high no-q -multi_fn = vmull-noqself-noext, {simd_extract, a, 1}, {simd_extract, b, 1} +multi_fn = vmull-noqself-noext, {simd_extract!, a, 1}, {simd_extract!, b, 1} a = 1, 15 b = 1, 3 validate 17 @@ -3931,7 +3931,7 @@ name = vmulx lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = vmulx-in0-noext, a, {transmute--, {simd_extract, b, LANE as u32}} +multi_fn = vmulx-in0-noext, a, {transmute--, {simd_extract!, b, LANE as u32}} a = 1. b = 2., 0. n = 0 @@ -3970,7 +3970,7 @@ name = vmulx lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = vmulx-out-noext, a, {simd_extract, b, LANE as u32} +multi_fn = vmulx-out-noext, a, {simd_extract!, b, LANE as u32} a = 2. b = 3., 0., 0., 0. @@ -4022,7 +4022,7 @@ name = vfma in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vfma-out-noext, a, b, {vdup-nout-noext, {simd_extract, c, LANE as u32}} +multi_fn = vfma-out-noext, a, b, {vdup-nout-noext, {simd_extract!, c, LANE as u32}} a = 2., 3., 4., 5. b = 6., 4., 7., 8. c = 2., 0., 0., 0. @@ -4041,7 +4041,7 @@ name = vfma in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = simd_extract, c:out_t, c, LANE as u32 +multi_fn = simd_extract!, c:out_t, c, LANE as u32 multi_fn = vfma-in2lane-_, b, c, a a = 2. b = 6. @@ -4096,7 +4096,7 @@ name = vfms in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vfms-out-noext, a, b, {vdup-nout-noext, {simd_extract, c, LANE as u32}} +multi_fn = vfms-out-noext, a, b, {vdup-nout-noext, {simd_extract!, c, LANE as u32}} a = 14., 11., 18., 21. b = 6., 4., 7., 8. c = 2., 0., 0., 0. @@ -4982,8 +4982,8 @@ generate float32x2_t /// Floating-point add pairwise name = vpadd out-suffix -multi_fn = simd_extract, a1:out_t, a, 0 -multi_fn = simd_extract, a2:out_t, a, 1 +multi_fn = simd_extract!, a1:out_t, a, 0 +multi_fn = simd_extract!, a2:out_t, a, 1 multi_fn = a1 + a2 a = 1., 2. validate 3. @@ -5050,7 +5050,7 @@ generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t name = vqdmull multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b -multi_fn = simd_extract, {vqdmull-in_ntt-noext, a, b}, 0 +multi_fn = simd_extract!, {vqdmull-in_ntt-noext, a, b}, 0 a = 2 b = 3 validate 12 @@ -5127,7 +5127,7 @@ generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t name = vqdmullh_lane constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_extract, b:in_t0, b, N as u32 +multi_fn = simd_extract!, b:in_t0, b, N as u32 multi_fn = vqdmullh-noqself-noext, a, b a = 2 b = 0, 2, 2, 0, 2, 0, 0, 0 @@ -5141,7 +5141,7 @@ generate i16:int16x4_t:i32, i16:int16x8_t:i32 name = vqdmulls_lane constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_extract, b:in_t0, b, N as u32 +multi_fn = simd_extract!, b:in_t0, b, N as u32 multi_fn = vqdmulls-noqself-noext, a, b a = 2 b = 0, 2, 2, 0, 2, 0, 0, 0 @@ -5266,7 +5266,7 @@ generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t: /// Signed saturating doubling multiply-add long name = vqdmlal multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c} -multi_fn = vqadd-out-noext, a, {simd_extract, x, 0} +multi_fn = vqadd-out-noext, a, {simd_extract!, x, 0} a = 1 b = 1 c = 2 @@ -5292,7 +5292,7 @@ name = vqdmlalh_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vqdmlal-self-noext, a, b, {simd_extract, c, LANE as u32} +multi_fn = vqdmlal-self-noext, a, b, {simd_extract!, c, LANE as u32} a = 1 b = 1 c = 2, 1, 1, 1, 1, 1, 1, 1 @@ -5390,7 +5390,7 @@ generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t: /// Signed saturating doubling multiply-subtract long name = vqdmlsl multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c} -multi_fn = vqsub-out-noext, a, {simd_extract, x, 0} +multi_fn = vqsub-out-noext, a, {simd_extract!, x, 0} a = 10 b = 1 c = 2 @@ -5416,7 +5416,7 @@ name = vqdmlslh_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vqdmlsl-self-noext, a, b, {simd_extract, c, LANE as u32} +multi_fn = vqdmlsl-self-noext, a, b, {simd_extract!, c, LANE as u32} a = 10 b = 1 c = 2, 1, 1, 1, 1, 1, 1, 1 @@ -5445,7 +5445,7 @@ generate int16x4_t, int16x8_t, int32x2_t, int32x4_t name = vqdmulh multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b -multi_fn = simd_extract, {vqdmulh-in_ntt-noext, a, b}, 0 +multi_fn = simd_extract!, {vqdmulh-in_ntt-noext, a, b}, 0 a = 1 b = 2 validate 0 @@ -5483,7 +5483,7 @@ generate int16x8_t:i16:int16x8_t, int32x4_t:i32:int32x4_t name = vqdmulhh_lane constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_extract, b:in_t0, b, N as u32 +multi_fn = simd_extract!, b:in_t0, b, N as u32 multi_fn = vqdmulhh-out_ntt-noext, a, b a = 2 b = 0, 0, MAX, 0, 0, 0, 0, 0 @@ -5497,7 +5497,7 @@ generate i16:int16x4_t:i16, i16:int16x8_t:i16 name = vqdmulhs_lane constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_extract, b:in_t0, b, N as u32 +multi_fn = simd_extract!, b:in_t0, b, N as u32 multi_fn = vqdmulhs-out_ntt-noext, a, b a = 2 b = 0, MAX, 0, 0 @@ -5512,7 +5512,7 @@ name = vqdmulh lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vqdmulh-out-noext, a, {vdup-nout-noext, {simd_extract, b, LANE as u32}} +multi_fn = vqdmulh-out-noext, a, {vdup-nout-noext, {simd_extract!, b, LANE as u32}} a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX b = 2, 1, 1, 1, 1, 1, 1, 1 n = 0 @@ -5551,7 +5551,7 @@ generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t /// Saturating extract narrow name = vqmovn -multi_fn = simd_extract, {vqmovn-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqmovn-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0 a = 1 validate 1 @@ -5600,7 +5600,7 @@ generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t /// Signed saturating extract unsigned narrow name = vqmovun -multi_fn = simd_extract, {vqmovun-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqmovun-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0 a = 1 validate 1 @@ -5632,7 +5632,7 @@ generate int16x4_t, int16x8_t, int32x2_t, int32x4_t /// Signed saturating rounding doubling multiply returning high half name = vqrdmulh -multi_fn = simd_extract, {vqrdmulh-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vqrdmulh-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 1 b = 2 validate 0 @@ -5674,7 +5674,7 @@ name = vqrdmulh lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = vqrdmulh-out-noext, a, {simd_extract, b, LANE as u32} +multi_fn = vqrdmulh-out-noext, a, {simd_extract!, b, LANE as u32} a = 1 b = 0, 2, 0, 0, 0, 0, 0, 0, n = 1 @@ -5700,7 +5700,7 @@ name = vqrdmlah multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = vdup_n-in_ntt-noext, c:in_ntt, c -multi_fn = simd_extract, {vqrdmlah-in_ntt-noext, a, b, c}, 0 +multi_fn = simd_extract!, {vqrdmlah-in_ntt-noext, a, b, c}, 0 a = 1 b = 1 c = 2 @@ -5733,7 +5733,7 @@ name = vqrdmlah in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vqrdmlah-self-noext, a, b, {simd_extract, c, LANE as u32} +multi_fn = vqrdmlah-self-noext, a, b, {simd_extract!, c, LANE as u32} a = 1 b = 1 c = 0, 2, 0, 0, 0, 0, 0, 0 @@ -5761,7 +5761,7 @@ name = vqrdmlsh multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = vdup_n-in_ntt-noext, c:in_ntt, c -multi_fn = simd_extract, {vqrdmlsh-in_ntt-noext, a, b, c}, 0 +multi_fn = simd_extract!, {vqrdmlsh-in_ntt-noext, a, b, c}, 0 a = 1 b = 1 c = 2 @@ -5794,7 +5794,7 @@ name = vqrdmlsh in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vqrdmlsh-self-noext, a, b, {simd_extract, c, LANE as u32} +multi_fn = vqrdmlsh-self-noext, a, b, {simd_extract!, c, LANE as u32} a = 1 b = 1 c = 0, 2, 0, 0, 0, 0, 0, 0 @@ -5823,7 +5823,7 @@ generate int*_t, int64x*_t name = vqrshl multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b -multi_fn = simd_extract, {vqrshl-in_ntt-noext, a, b}, 0 +multi_fn = simd_extract!, {vqrshl-in_ntt-noext, a, b}, 0 a = 1 b = 2 validate 4 @@ -5852,7 +5852,7 @@ name = vqrshl out-suffix multi_fn = vdup_n-out_ntt-noext, a:out_ntt, a multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b -multi_fn = simd_extract, {vqrshl-out_ntt-noext, a, b}, 0 +multi_fn = simd_extract!, {vqrshl-out_ntt-noext, a, b}, 0 a = 1 b = 2 validate 4 @@ -5885,7 +5885,7 @@ noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a -multi_fn = simd_extract, {vqrshrn_n-in_ntt-::, a}, 0 +multi_fn = simd_extract!, {vqrshrn_n-in_ntt-::, a}, 0 a = 4 n = 2 validate 1 @@ -5932,7 +5932,7 @@ noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a -multi_fn = simd_extract, {vqrshrn_n-in_ntt-::, a}, 0 +multi_fn = simd_extract!, {vqrshrn_n-in_ntt-::, a}, 0 a = 4 n = 2 validate 1 @@ -5979,7 +5979,7 @@ noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a -multi_fn = simd_extract, {vqrshrun_n-in_ntt-::, a}, 0 +multi_fn = simd_extract!, {vqrshrun_n-in_ntt-::, a}, 0 a = 4 n = 2 validate 1 @@ -6018,7 +6018,7 @@ generate int*_t, int64x*_t /// Signed saturating shift left name = vqshl multi_fn = vqshl-in_ntt-noext, c:in_ntt, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b} -multi_fn = simd_extract, c, 0 +multi_fn = simd_extract!, c, 0 a = 1 b = 2 validate 4 @@ -6046,7 +6046,7 @@ generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint6 name = vqshl out-suffix multi_fn = vqshl-out_ntt-noext, c:out_ntt, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b} -multi_fn = simd_extract, c, 0 +multi_fn = simd_extract!, c, 0 a = 1 b = 2 validate 4 @@ -6073,7 +6073,7 @@ name = vqshl n-suffix constn = N multi_fn = static_assert_imm-out_bits_exp_len-N -multi_fn = simd_extract, {vqshl_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqshl_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 a = 1 n = 2 validate 4 @@ -6100,7 +6100,7 @@ name = vqshl n-suffix constn = N multi_fn = static_assert_imm-out_bits_exp_len-N -multi_fn = simd_extract, {vqshl_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqshl_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 a = 1 n = 2 validate 4 @@ -6132,7 +6132,7 @@ name = vqshlu n-suffix constn = N multi_fn = static_assert_imm-out_bits_exp_len-N -multi_fn = simd_extract, {vqshlu_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqshlu_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 a = 1 n = 2 validate 4 @@ -6165,7 +6165,7 @@ name = vqshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_extract, {vqshrn_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqshrn_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 a = 4 n = 2 validate 1 @@ -6212,7 +6212,7 @@ name = vqshrn noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_extract, {vqshrn_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqshrn_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 a = 4 n = 2 validate 1 @@ -6258,7 +6258,7 @@ name = vqshrun noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_extract, {vqshrun_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqshrun_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 a = 4 n = 2 validate 1 @@ -6283,7 +6283,7 @@ generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32 /// Unsigned saturating accumulate of signed value name = vsqadd out-suffix -multi_fn = simd_extract, {vsqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 +multi_fn = simd_extract!, {vsqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0 a = 2 b = 2 validate 4 @@ -6845,7 +6845,7 @@ generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint3 name = vset_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_insert, b, LANE as u32, a +multi_fn = simd_insert!, b, LANE as u32, a a = 1 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 n = 0 @@ -6867,7 +6867,7 @@ name = vsetq_lane no-q constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_insert, b, LANE as u32, a +multi_fn = simd_insert!, b, LANE as u32, a a = 1 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 n = 0 @@ -6888,7 +6888,7 @@ generate p64:poly64x2_t:poly64x2_t name = vset_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_insert, b, LANE as u32, a +multi_fn = simd_insert!, b, LANE as u32, a a = 1. b = 0., 2., 3., 4. n = 0 @@ -6905,7 +6905,7 @@ name = vsetq_lane no-q constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_insert, b, LANE as u32, a +multi_fn = simd_insert!, b, LANE as u32, a a = 1. b = 0., 2., 3., 4. n = 0 @@ -7241,7 +7241,7 @@ validate -2147483648.0 a = -2147483648.500000477 validate -2147483648.0 -multi_fn = transmute, {self-out-_, {simd_extract, a, 0}} +multi_fn = transmute, {self-out-_, {simd_extract!, a, 0}} link-aarch64 = llvm.aarch64.frint32x.f64:f64:::f64 generate float64x1_t @@ -7282,7 +7282,7 @@ validate -2147483648.0 a = -2147483649.0 validate -2147483648.0 -multi_fn = transmute, {self-out-_, {simd_extract, a, 0}} +multi_fn = transmute, {self-out-_, {simd_extract!, a, 0}} link-aarch64 = llvm.aarch64.frint32z.f64:f64:::f64 generate float64x1_t @@ -7324,7 +7324,7 @@ validate -9223372036854775808.0 a = -9223372036854777856.0 validate -9223372036854775808.0 -multi_fn = transmute, {self-out-_, {simd_extract, a, 0}} +multi_fn = transmute, {self-out-_, {simd_extract!, a, 0}} link-aarch64 = llvm.aarch64.frint64x.f64:f64:::f64 generate float64x1_t @@ -7365,7 +7365,7 @@ validate -9223372036854775808.0 a = -9223372036854777856.0 validate -9223372036854775808.0 -multi_fn = transmute, {self-out-_, {simd_extract, a, 0}} +multi_fn = transmute, {self-out-_, {simd_extract!, a, 0}} link-aarch64 = llvm.aarch64.frint64z.f64:f64:::f64 generate float64x1_t @@ -7799,7 +7799,7 @@ generate int64x*_t /// Signed saturating absolute value name = vqabs -multi_fn = simd_extract, {vqabs-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 +multi_fn = simd_extract!, {vqabs-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0 a = -7 validate 7 From b754cf27ab44cf012f9480e2bec996d949c16752 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 17 Feb 2024 13:28:37 +0100 Subject: [PATCH 2/2] avoid using simd_extract in SimdTy::extract (since the index is not a constant there) --- crates/core_arch/src/simd.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs index 281fefba42..b386b17848 100644 --- a/crates/core_arch/src/simd.rs +++ b/crates/core_arch/src/simd.rs @@ -18,17 +18,28 @@ macro_rules! simd_ty { #[inline(always)] pub(crate) const fn splat(value: $ety) -> Self { $id($({ + // We want this to be repeated for each element. + // So we need to use `elem_name` in a `$(...)`. + // But we don't actually need that name for anything so we use a dummy struct. #[allow(non_camel_case_types, dead_code)] struct $elem_name; value }),*) } + /// Extract the element at position `index`. + /// `index` is not a constant so this is not efficient! + /// Use for testing only. // FIXME: Workaround rust@60637 #[inline(always)] pub(crate) fn extract(self, index: usize) -> $ety { + // Here we assume that there is no padding. + let len = crate::mem::size_of::() / crate::mem::size_of::<$ety>(); + assert!(index < len); + // Now that we know this is in-bounds, use pointer arithmetic to access the right element. + let self_ptr = &self as *const Self as *const $ety; unsafe { - crate::core_arch::simd_llvm::simd_extract(self, index as u32) + self_ptr.add(index).read() } } } @@ -62,15 +73,6 @@ macro_rules! simd_m_ty { Self::bool_to_internal(value) }),*) } - - // FIXME: Workaround rust@60637 - #[inline(always)] - pub(crate) fn extract(self, index: usize) -> bool { - let r: $ety = unsafe { - crate::core_arch::simd_llvm::simd_extract(self, index as u32) - }; - r != 0 - } } } }