From 374bc27b882b6857f169b5cd9cff7ad5bc4f49ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20BRANSTETT?= Date: Tue, 1 Feb 2022 19:19:01 +0100 Subject: [PATCH 01/12] Simplify the code of fixup by making it's code flow more natural --- compiler/rustc_middle/src/ty/layout.rs | 30 +++++++++++++++----------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index 3b05e42a53ead..fa4e0e85a4e84 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -3360,7 +3360,22 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { } match arg.layout.abi { - Abi::Aggregate { .. } => {} + Abi::Aggregate { .. } => { + // Pass and return structures up to 2 pointers in size by value, + // matching `ScalarPair`. LLVM will usually pass these in 2 registers + // which is more efficient than by-ref. + let max_by_val_size = Pointer.size(self) * 2; + let size = arg.layout.size; + + if arg.layout.is_unsized() || size > max_by_val_size { + arg.make_indirect(); + } else { + // We want to pass small aggregates as immediates, but using + // a LLVM aggregate type for this leads to bad optimizations, + // so we pick an appropriately sized integer type instead. + arg.cast_to(Reg { kind: RegKind::Integer, size }); + } + } // This is a fun case! The gist of what this is doing is // that we want callers and callees to always agree on the @@ -3386,20 +3401,9 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { && self.tcx.sess.target.simd_types_indirect => { arg.make_indirect(); - return; } - _ => return, - } - - let size = arg.layout.size; - if arg.layout.is_unsized() || size > Pointer.size(self) { - arg.make_indirect(); - } else { - // We want to pass small aggregates as immediates, but using - // a LLVM aggregate type for this leads to bad optimizations, - // so we pick an appropriately sized integer type instead. - arg.cast_to(Reg { kind: RegKind::Integer, size }); + _ => {}, } }; fixup(&mut fn_abi.ret); From bf97e79437100cb76003dd16b4626d045201154c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20BRANSTETT?= Date: Tue, 1 Feb 2022 22:10:47 +0100 Subject: [PATCH 02/12] Don't aggregate homogeneous floats in the Rust ABI --- compiler/rustc_middle/src/ty/layout.rs | 22 ++++++++- src/test/assembly/x86-64-homogenous-floats.rs | 45 ++++++++++++++++++ src/test/codegen/homogeneous-floats.rs | 32 +++++++++++++ src/test/ui/abi/homogenous-floats.rs | 46 +++++++++++++++++++ 4 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 src/test/assembly/x86-64-homogenous-floats.rs create mode 100644 src/test/codegen/homogeneous-floats.rs create mode 100644 src/test/ui/abi/homogenous-floats.rs diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index fa4e0e85a4e84..847a0e3e58e34 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -14,7 +14,8 @@ use rustc_session::{config::OptLevel, DataTypeKind, FieldInfo, SizeKind, Variant use rustc_span::symbol::Symbol; use rustc_span::{Span, DUMMY_SP}; use rustc_target::abi::call::{ - ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, Reg, RegKind, + ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, HomogeneousAggregate, PassMode, + Reg, RegKind, }; use rustc_target::abi::*; use rustc_target::spec::{abi::Abi as SpecAbi, HasTargetSpec, PanicStrategy, Target}; @@ -3369,10 +3370,27 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { if arg.layout.is_unsized() || size > max_by_val_size { arg.make_indirect(); + } else if let Ok(HomogeneousAggregate::Homogeneous(Reg { + kind: RegKind::Float, + .. + })) = arg.layout.homogeneous_aggregate(self) + { + // We don't want to aggregate floats as an aggregates of Integer + // because this will hurt the generated assembly (#93490) + // + // As an optimization we want to pass homogeneous aggregate of floats + // greater than pointer size as indirect + if size > Pointer.size(self) { + arg.make_indirect(); + } } else { // We want to pass small aggregates as immediates, but using // a LLVM aggregate type for this leads to bad optimizations, // so we pick an appropriately sized integer type instead. + // + // NOTE: This is sub-optimal because in the case of (f32, f32, u32, u32) + // we could do ([f32; 2], u64) which is better but this is the best we + // can do right now. arg.cast_to(Reg { kind: RegKind::Integer, size }); } } @@ -3403,7 +3421,7 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { arg.make_indirect(); } - _ => {}, + _ => {} } }; fixup(&mut fn_abi.ret); diff --git a/src/test/assembly/x86-64-homogenous-floats.rs b/src/test/assembly/x86-64-homogenous-floats.rs new file mode 100644 index 0000000000000..5b725bab07790 --- /dev/null +++ b/src/test/assembly/x86-64-homogenous-floats.rs @@ -0,0 +1,45 @@ +// assembly-output: emit-asm +// needs-llvm-components: x86 +// compile-flags: --target x86_64-unknown-linux-gnu +// compile-flags: -C llvm-args=--x86-asm-syntax=intel +// compile-flags: -C opt-level=3 + +#![crate_type = "rlib"] +#![no_std] + +// CHECK-LABEL: sum_f32: +// CHECK: addss xmm0, xmm1 +// CHECK-NEXT: ret +#[no_mangle] +pub fn sum_f32(a: f32, b: f32) -> f32 { + a + b +} + +// CHECK-LABEL: sum_f32x2: +// CHECK: addss xmm{{[0-9]}}, xmm{{[0-9]}} +// CHECK-NEXT: addss xmm{{[0-9]}}, xmm{{[0-9]}} +// CHECK-NEXT: ret +#[no_mangle] +pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { + [ + a[0] + b[0], + a[1] + b[1], + ] +} + +// CHECK-LABEL: sum_f32x4: +// CHECK: mov rax, [[PTR_IN:.*]] +// CHECK-NEXT: movups [[XMMA:xmm[0-9]]], xmmword ptr [rsi] +// CHECK-NEXT: movups [[XMMB:xmm[0-9]]], xmmword ptr [rdx] +// CHECK-NEXT: addps [[XMMB]], [[XMMA]] +// CHECK-NEXT: movups xmmword ptr {{\[}}[[PTR_IN]]{{\]}}, [[XMMB]] +// CHECK-NEXT: ret +#[no_mangle] +pub fn sum_f32x4(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + [ + a[0] + b[0], + a[1] + b[1], + a[2] + b[2], + a[3] + b[3], + ] +} diff --git a/src/test/codegen/homogeneous-floats.rs b/src/test/codegen/homogeneous-floats.rs new file mode 100644 index 0000000000000..0b729156d2842 --- /dev/null +++ b/src/test/codegen/homogeneous-floats.rs @@ -0,0 +1,32 @@ +//! Check that small (less then 128bits on x86_64) homogeneous floats are either pass as an array +//! or by a pointer + +// compile-flags: -C no-prepopulate-passes -O +// only-x86_64 + +#![crate_type = "lib"] + +pub struct Foo { + bar1: f32, + bar2: f32, + bar3: f32, + bar4: f32, +} + +// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1) +#[no_mangle] +pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { + todo!() +} + +// CHECK: define void @array_f32x4([4 x float]* {{.*}} sret([4 x float]) {{.*}} %0, [4 x float]* {{.*}} %a, [4 x float]* {{.*}} %b) +#[no_mangle] +pub fn array_f32x4(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + todo!() +} + +// CHECK: define void @array_f32x4_nested(%Foo* {{.*}} sret(%Foo) {{.*}} %0, %Foo* {{.*}} %a, %Foo* {{.*}} %b) +#[no_mangle] +pub fn array_f32x4_nested(a: Foo, b: Foo) -> Foo { + todo!() +} diff --git a/src/test/ui/abi/homogenous-floats.rs b/src/test/ui/abi/homogenous-floats.rs new file mode 100644 index 0000000000000..cbbcd2a47e82c --- /dev/null +++ b/src/test/ui/abi/homogenous-floats.rs @@ -0,0 +1,46 @@ +// This test that no matter the optimization level or the target feature enable, the non +// aggregation of homogenous floats in the abi is sound and still produce the right answer. + +// revisions: opt-0 opt-0-native opt-1 opt-1-native opt-2 opt-2-native opt-3 opt-3-native +// [opt-0]: compile-flags: -C opt-level=0 +// [opt-1]: compile-flags: -C opt-level=1 +// [opt-2]: compile-flags: -C opt-level=2 +// [opt-3]: compile-flags: -C opt-level=3 +// [opt-0-native]: compile-flags: -C target-cpu=native +// [opt-1-native]: compile-flags: -C target-cpu=native +// [opt-2-native]: compile-flags: -C target-cpu=native +// [opt-3-native]: compile-flags: -C target-cpu=native +// run-pass + +#![feature(core_intrinsics)] + +use std::intrinsics::black_box; + +pub fn sum_f32(a: f32, b: f32) -> f32 { + a + b +} + +pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { + [a[0] + b[0], a[1] + b[1]] +} + +pub fn sum_f32x3(a: [f32; 3], b: [f32; 3]) -> [f32; 3] { + [a[0] + b[0], a[1] + b[1], a[2] + b[2]] +} + +pub fn sum_f32x4(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + [a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]] +} + +fn main() { + assert_eq!(1., black_box(sum_f32(black_box(0.), black_box(1.)))); + assert_eq!([2., 2.], black_box(sum_f32x2(black_box([2., 0.]), black_box([0., 2.])))); + assert_eq!( + [3., 3., 3.], + black_box(sum_f32x3(black_box([1., 2., 3.]), black_box([2., 1., 0.]))) + ); + assert_eq!( + [4., 4., 4., 4.], + black_box(sum_f32x4(black_box([1., 2., 3., 4.]), black_box([3., 2., 1., 0.]))) + ); +} From dcc75bfc664160fde43b636ac49508ed98cbb247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20BRANSTETT?= Date: Mon, 21 Feb 2022 01:45:03 +0100 Subject: [PATCH 03/12] Test that target feature mix up with homogeneous floats is sound This is basically is ripoff of src/test/ui/simd/target-feature-mixup.rs but for floats and without #[repr(simd)] --- .../homogenous-floats-target-feature-mixup.rs | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 src/test/ui/abi/homogenous-floats-target-feature-mixup.rs diff --git a/src/test/ui/abi/homogenous-floats-target-feature-mixup.rs b/src/test/ui/abi/homogenous-floats-target-feature-mixup.rs new file mode 100644 index 0000000000000..536ad4522e742 --- /dev/null +++ b/src/test/ui/abi/homogenous-floats-target-feature-mixup.rs @@ -0,0 +1,184 @@ +// This test check that even if we mixup target feature of function with homogenous floats, +// the abi is sound and still produce the right answer. +// +// This is basically the same test as src/test/ui/simd/target-feature-mixup.rs but for floats and +// without #[repr(simd)] + +// run-pass +// ignore-emscripten +// ignore-sgx no processes + +#![feature(target_feature, cfg_target_feature)] +#![feature(avx512_target_feature)] + +#![allow(overflowing_literals)] +#![allow(unused_variables)] +#![allow(stable_features)] + +use std::process::{Command, ExitStatus}; +use std::env; + +fn main() { + if let Some(level) = env::args().nth(1) { + return test::main(&level) + } + + let me = env::current_exe().unwrap(); + for level in ["sse", "avx", "avx512"].iter() { + let status = Command::new(&me).arg(level).status().unwrap(); + if status.success() { + println!("success with {}", level); + continue + } + + // We don't actually know if our computer has the requisite target features + // for the test below. Testing for that will get added to libstd later so + // for now just assume sigill means this is a machine that can't run this test. + if is_sigill(status) { + println!("sigill with {}, assuming spurious", level); + continue + } + panic!("invalid status at {}: {}", level, status); + } +} + +#[cfg(unix)] +fn is_sigill(status: ExitStatus) -> bool { + use std::os::unix::prelude::*; + status.signal() == Some(4) +} + +#[cfg(windows)] +fn is_sigill(status: ExitStatus) -> bool { + status.code() == Some(0xc000001d) +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[allow(nonstandard_style)] +mod test { + #[derive(PartialEq, Debug, Clone, Copy)] + struct f32x2(f32, f32); + + #[derive(PartialEq, Debug, Clone, Copy)] + struct f32x4(f32, f32, f32, f32); + + #[derive(PartialEq, Debug, Clone, Copy)] + struct f32x8(f32, f32, f32, f32, f32, f32, f32, f32); + + pub fn main(level: &str) { + unsafe { + main_normal(level); + main_sse(level); + if level == "sse" { + return + } + main_avx(level); + if level == "avx" { + return + } + main_avx512(level); + } + } + + macro_rules! mains { + ($( + $(#[$attr:meta])* + unsafe fn $main:ident(level: &str) { + ... + } + )*) => ($( + $(#[$attr])* + unsafe fn $main(level: &str) { + let m128 = f32x2(1., 2.); + let m256 = f32x4(3., 4., 5., 6.); + let m512 = f32x8(7., 8., 9., 10., 11., 12., 13., 14.); + assert_eq!(id_sse_128(m128), m128); + assert_eq!(id_sse_256(m256), m256); + assert_eq!(id_sse_512(m512), m512); + + if level == "sse" { + return + } + assert_eq!(id_avx_128(m128), m128); + assert_eq!(id_avx_256(m256), m256); + assert_eq!(id_avx_512(m512), m512); + + if level == "avx" { + return + } + assert_eq!(id_avx512_128(m128), m128); + assert_eq!(id_avx512_256(m256), m256); + assert_eq!(id_avx512_512(m512), m512); + } + )*) + } + + mains! { + unsafe fn main_normal(level: &str) { ... } + #[target_feature(enable = "sse2")] + unsafe fn main_sse(level: &str) { ... } + #[target_feature(enable = "avx")] + unsafe fn main_avx(level: &str) { ... } + #[target_feature(enable = "avx512bw")] + unsafe fn main_avx512(level: &str) { ... } + } + + #[target_feature(enable = "sse2")] + unsafe fn id_sse_128(a: f32x2) -> f32x2 { + assert_eq!(a, f32x2(1., 2.)); + a.clone() + } + + #[target_feature(enable = "sse2")] + unsafe fn id_sse_256(a: f32x4) -> f32x4 { + assert_eq!(a, f32x4(3., 4., 5., 6.)); + a.clone() + } + + #[target_feature(enable = "sse2")] + unsafe fn id_sse_512(a: f32x8) -> f32x8 { + assert_eq!(a, f32x8(7., 8., 9., 10., 11., 12., 13., 14.)); + a.clone() + } + + #[target_feature(enable = "avx")] + unsafe fn id_avx_128(a: f32x2) -> f32x2 { + assert_eq!(a, f32x2(1., 2.)); + a.clone() + } + + #[target_feature(enable = "avx")] + unsafe fn id_avx_256(a: f32x4) -> f32x4 { + assert_eq!(a, f32x4(3., 4., 5., 6.)); + a.clone() + } + + #[target_feature(enable = "avx")] + unsafe fn id_avx_512(a: f32x8) -> f32x8 { + assert_eq!(a, f32x8(7., 8., 9., 10., 11., 12., 13., 14.)); + a.clone() + } + + #[target_feature(enable = "avx512bw")] + unsafe fn id_avx512_128(a: f32x2) -> f32x2 { + assert_eq!(a, f32x2(1., 2.)); + a.clone() + } + + #[target_feature(enable = "avx512bw")] + unsafe fn id_avx512_256(a: f32x4) -> f32x4 { + assert_eq!(a, f32x4(3., 4., 5., 6.)); + a.clone() + } + + #[target_feature(enable = "avx512bw")] + unsafe fn id_avx512_512(a: f32x8) -> f32x8 { + assert_eq!(a, f32x8(7., 8., 9., 10., 11., 12., 13., 14.)); + a.clone() + } +} + +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +mod test { + pub fn main(level: &str) {} +} From ec16a323e8b90f892dd42031657fafe850705edd Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 30 May 2022 20:37:10 +0200 Subject: [PATCH 04/12] Fix some codegen tests --- src/test/codegen/array-equality.rs | 3 +-- src/test/codegen/union-abi.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/codegen/array-equality.rs b/src/test/codegen/array-equality.rs index cd5e82a9205c1..b509bc20775b0 100644 --- a/src/test/codegen/array-equality.rs +++ b/src/test/codegen/array-equality.rs @@ -56,8 +56,7 @@ pub fn array_eq_zero_short(x: [u16; 3]) -> bool { #[no_mangle] pub fn array_eq_zero_mid(x: [u16; 8]) -> bool { // CHECK-NEXT: start: - // CHECK: %[[LOAD:.+]] = load i128, - // CHECK-NEXT: %[[EQ:.+]] = icmp eq i128 %[[LOAD]], 0 + // CHECK-NEXT: %[[EQ:.+]] = icmp eq i128 %0, 0 // CHECK-NEXT: ret i1 %[[EQ]] x == [0; 8] } diff --git a/src/test/codegen/union-abi.rs b/src/test/codegen/union-abi.rs index c18f2a49fc369..6e3866778a391 100644 --- a/src/test/codegen/union-abi.rs +++ b/src/test/codegen/union-abi.rs @@ -65,7 +65,7 @@ pub fn test_UnionU128(_: UnionU128) -> UnionU128 { loop {} } #[repr(C)] pub union CUnionU128{a:u128} -// CHECK: define void @test_CUnionU128({{%CUnionU128\*|ptr}} {{.*}} %_1) +// CHECK: define void @test_CUnionU128(i128 %0) #[no_mangle] pub fn test_CUnionU128(_: CUnionU128) { loop {} } From 9ed05ed87f0b1627836ab555ba86af55eb38168e Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 30 May 2022 21:12:22 +0200 Subject: [PATCH 05/12] Use simpler heuristic for determining if a layout only floats --- compiler/rustc_middle/src/ty/layout.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index 847a0e3e58e34..f638063c4408c 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -14,8 +14,7 @@ use rustc_session::{config::OptLevel, DataTypeKind, FieldInfo, SizeKind, Variant use rustc_span::symbol::Symbol; use rustc_span::{Span, DUMMY_SP}; use rustc_target::abi::call::{ - ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, HomogeneousAggregate, PassMode, - Reg, RegKind, + ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, Reg, RegKind, }; use rustc_target::abi::*; use rustc_target::spec::{abi::Abi as SpecAbi, HasTargetSpec, PanicStrategy, Target}; @@ -3341,6 +3340,17 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { Ok(self.tcx.arena.alloc(fn_abi)) } + /// Small heuristic for determining if layout has any float primitive + fn has_all_float(&self, layout: &'_ TyAndLayout<'tcx>) -> bool { + match layout.abi { + Abi::Uninhabited | Abi::Vector { .. } => false, + Abi::Scalar(scalar) => matches!(scalar.primitive(), Primitive::F32 | Primitive::F64), + Abi::ScalarPair(..) | Abi::Aggregate { .. } => { + (0..layout.fields.count()).all(|i| self.has_all_float(&layout.field(self, i))) + } + } + } + fn fn_abi_adjust_for_abi( &self, fn_abi: &mut FnAbi<'tcx, Ty<'tcx>>, @@ -3370,11 +3380,7 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { if arg.layout.is_unsized() || size > max_by_val_size { arg.make_indirect(); - } else if let Ok(HomogeneousAggregate::Homogeneous(Reg { - kind: RegKind::Float, - .. - })) = arg.layout.homogeneous_aggregate(self) - { + } else if unlikely!(self.has_all_float(&arg.layout)) { // We don't want to aggregate floats as an aggregates of Integer // because this will hurt the generated assembly (#93490) // From f1c72be1ebb734ff036390626178f04a4df71799 Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 31 May 2022 10:27:48 +0200 Subject: [PATCH 06/12] Use nbdd0121 suggestion for reducing the perf impact --- compiler/rustc_middle/src/ty/layout.rs | 16 +++++++--------- src/test/assembly/x86-64-homogenous-floats.rs | 11 +++++++---- src/test/codegen/homogeneous-floats.rs | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index f638063c4408c..a9385595bfbe0 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -3375,20 +3375,18 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { // Pass and return structures up to 2 pointers in size by value, // matching `ScalarPair`. LLVM will usually pass these in 2 registers // which is more efficient than by-ref. - let max_by_val_size = Pointer.size(self) * 2; + let ptr_size = Pointer.size(self); + let max_by_val_size = ptr_size * 2; let size = arg.layout.size; if arg.layout.is_unsized() || size > max_by_val_size { arg.make_indirect(); - } else if unlikely!(self.has_all_float(&arg.layout)) { + } else if size > ptr_size && unlikely!(self.has_all_float(&arg.layout)) { // We don't want to aggregate floats as an aggregates of Integer - // because this will hurt the generated assembly (#93490) - // - // As an optimization we want to pass homogeneous aggregate of floats - // greater than pointer size as indirect - if size > Pointer.size(self) { - arg.make_indirect(); - } + // because this will hurt the generated assembly (#93490) but as an + // optimization we want to pass homogeneous aggregate of floats + // greater than pointer size as indirect. + arg.make_indirect(); } else { // We want to pass small aggregates as immediates, but using // a LLVM aggregate type for this leads to bad optimizations, diff --git a/src/test/assembly/x86-64-homogenous-floats.rs b/src/test/assembly/x86-64-homogenous-floats.rs index 5b725bab07790..00b434a688d67 100644 --- a/src/test/assembly/x86-64-homogenous-floats.rs +++ b/src/test/assembly/x86-64-homogenous-floats.rs @@ -15,12 +15,15 @@ pub fn sum_f32(a: f32, b: f32) -> f32 { a + b } -// CHECK-LABEL: sum_f32x2: -// CHECK: addss xmm{{[0-9]}}, xmm{{[0-9]}} -// CHECK-NEXT: addss xmm{{[0-9]}}, xmm{{[0-9]}} +// CHECK-LABEL: sum_f64x2: +// CHECK: mov rax, [[PTR_IN:.*]] +// CHECK-NEXT: movupd [[XMMA:xmm[0-9]]], xmmword ptr [rsi] +// CHECK-NEXT: movupd [[XMMB:xmm[0-9]]], xmmword ptr [rdx] +// CHECK-NEXT: addpd [[XMMB]], [[XMMA]] +// CHECK-NEXT: movupd xmmword ptr {{\[}}[[PTR_IN]]{{\]}}, [[XMMB]] // CHECK-NEXT: ret #[no_mangle] -pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { +pub fn sum_f64x2(a: [f64; 2], b: [f64; 2]) -> [f64; 2] { [ a[0] + b[0], a[1] + b[1], diff --git a/src/test/codegen/homogeneous-floats.rs b/src/test/codegen/homogeneous-floats.rs index 0b729156d2842..cf8a62e488ee1 100644 --- a/src/test/codegen/homogeneous-floats.rs +++ b/src/test/codegen/homogeneous-floats.rs @@ -13,7 +13,7 @@ pub struct Foo { bar4: f32, } -// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1) +// CHECK: define i64 @array_f32x2(i64 %0, i64 %1) #[no_mangle] pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { todo!() From 1be1d4a5402f8378df8537e0d103b1e18b4b6429 Mon Sep 17 00:00:00 2001 From: Urgau Date: Fri, 1 Jul 2022 10:14:28 +0200 Subject: [PATCH 07/12] Remove undefined unlikely! macro --- compiler/rustc_middle/src/ty/layout.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index a9385595bfbe0..6696da5062256 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -3381,7 +3381,7 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { if arg.layout.is_unsized() || size > max_by_val_size { arg.make_indirect(); - } else if size > ptr_size && unlikely!(self.has_all_float(&arg.layout)) { + } else if size > ptr_size && self.has_all_float(&arg.layout) { // We don't want to aggregate floats as an aggregates of Integer // because this will hurt the generated assembly (#93490) but as an // optimization we want to pass homogeneous aggregate of floats From b2fba9a67f32bc2b8ba609eefd0b21061ca0f27f Mon Sep 17 00:00:00 2001 From: Urgau Date: Fri, 1 Jul 2022 10:15:17 +0200 Subject: [PATCH 08/12] Revert "Use nbdd0121 suggestion for reducing the perf impact" This reverts commit 2e0cf271285089316db55b995312712638126245. --- compiler/rustc_middle/src/ty/layout.rs | 16 +++++++++------- src/test/assembly/x86-64-homogenous-floats.rs | 11 ++++------- src/test/codegen/homogeneous-floats.rs | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index 6696da5062256..f5fe117c403b6 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -3375,18 +3375,20 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { // Pass and return structures up to 2 pointers in size by value, // matching `ScalarPair`. LLVM will usually pass these in 2 registers // which is more efficient than by-ref. - let ptr_size = Pointer.size(self); - let max_by_val_size = ptr_size * 2; + let max_by_val_size = Pointer.size(self) * 2; let size = arg.layout.size; if arg.layout.is_unsized() || size > max_by_val_size { arg.make_indirect(); - } else if size > ptr_size && self.has_all_float(&arg.layout) { + } else if self.has_all_float(&arg.layout) { // We don't want to aggregate floats as an aggregates of Integer - // because this will hurt the generated assembly (#93490) but as an - // optimization we want to pass homogeneous aggregate of floats - // greater than pointer size as indirect. - arg.make_indirect(); + // because this will hurt the generated assembly (#93490) + // + // As an optimization we want to pass homogeneous aggregate of floats + // greater than pointer size as indirect + if size > Pointer.size(self) { + arg.make_indirect(); + } } else { // We want to pass small aggregates as immediates, but using // a LLVM aggregate type for this leads to bad optimizations, diff --git a/src/test/assembly/x86-64-homogenous-floats.rs b/src/test/assembly/x86-64-homogenous-floats.rs index 00b434a688d67..5b725bab07790 100644 --- a/src/test/assembly/x86-64-homogenous-floats.rs +++ b/src/test/assembly/x86-64-homogenous-floats.rs @@ -15,15 +15,12 @@ pub fn sum_f32(a: f32, b: f32) -> f32 { a + b } -// CHECK-LABEL: sum_f64x2: -// CHECK: mov rax, [[PTR_IN:.*]] -// CHECK-NEXT: movupd [[XMMA:xmm[0-9]]], xmmword ptr [rsi] -// CHECK-NEXT: movupd [[XMMB:xmm[0-9]]], xmmword ptr [rdx] -// CHECK-NEXT: addpd [[XMMB]], [[XMMA]] -// CHECK-NEXT: movupd xmmword ptr {{\[}}[[PTR_IN]]{{\]}}, [[XMMB]] +// CHECK-LABEL: sum_f32x2: +// CHECK: addss xmm{{[0-9]}}, xmm{{[0-9]}} +// CHECK-NEXT: addss xmm{{[0-9]}}, xmm{{[0-9]}} // CHECK-NEXT: ret #[no_mangle] -pub fn sum_f64x2(a: [f64; 2], b: [f64; 2]) -> [f64; 2] { +pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { [ a[0] + b[0], a[1] + b[1], diff --git a/src/test/codegen/homogeneous-floats.rs b/src/test/codegen/homogeneous-floats.rs index cf8a62e488ee1..0b729156d2842 100644 --- a/src/test/codegen/homogeneous-floats.rs +++ b/src/test/codegen/homogeneous-floats.rs @@ -13,7 +13,7 @@ pub struct Foo { bar4: f32, } -// CHECK: define i64 @array_f32x2(i64 %0, i64 %1) +// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1) #[no_mangle] pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { todo!() From a84f4c9259aafa9fbd1aaa4ba212e45f7f1fac2a Mon Sep 17 00:00:00 2001 From: Urgau Date: Sat, 2 Jul 2022 18:12:41 +0200 Subject: [PATCH 09/12] Let LLVM also handle small aggregate --- compiler/rustc_middle/src/ty/layout.rs | 58 +++++++++++++------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index f5fe117c403b6..36c4cfdc38028 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -14,7 +14,7 @@ use rustc_session::{config::OptLevel, DataTypeKind, FieldInfo, SizeKind, Variant use rustc_span::symbol::Symbol; use rustc_span::{Span, DUMMY_SP}; use rustc_target::abi::call::{ - ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, Reg, RegKind, + ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, /* Reg, RegKind, */ }; use rustc_target::abi::*; use rustc_target::spec::{abi::Abi as SpecAbi, HasTargetSpec, PanicStrategy, Target}; @@ -3340,16 +3340,16 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { Ok(self.tcx.arena.alloc(fn_abi)) } - /// Small heuristic for determining if layout has any float primitive - fn has_all_float(&self, layout: &'_ TyAndLayout<'tcx>) -> bool { - match layout.abi { - Abi::Uninhabited | Abi::Vector { .. } => false, - Abi::Scalar(scalar) => matches!(scalar.primitive(), Primitive::F32 | Primitive::F64), - Abi::ScalarPair(..) | Abi::Aggregate { .. } => { - (0..layout.fields.count()).all(|i| self.has_all_float(&layout.field(self, i))) - } - } - } + // /// Small heuristic for determining if layout has any float primitive + // fn has_all_float(&self, layout: &'_ TyAndLayout<'tcx>) -> bool { + // match layout.abi { + // Abi::Uninhabited | Abi::Vector { .. } => false, + // Abi::Scalar(scalar) => matches!(scalar.primitive(), Primitive::F32 | Primitive::F64), + // Abi::ScalarPair(..) | Abi::Aggregate { .. } => { + // (0..layout.fields.count()).all(|i| self.has_all_float(&layout.field(self, i))) + // } + // } + // } fn fn_abi_adjust_for_abi( &self, @@ -3380,24 +3380,24 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { if arg.layout.is_unsized() || size > max_by_val_size { arg.make_indirect(); - } else if self.has_all_float(&arg.layout) { - // We don't want to aggregate floats as an aggregates of Integer - // because this will hurt the generated assembly (#93490) - // - // As an optimization we want to pass homogeneous aggregate of floats - // greater than pointer size as indirect - if size > Pointer.size(self) { - arg.make_indirect(); - } - } else { - // We want to pass small aggregates as immediates, but using - // a LLVM aggregate type for this leads to bad optimizations, - // so we pick an appropriately sized integer type instead. - // - // NOTE: This is sub-optimal because in the case of (f32, f32, u32, u32) - // we could do ([f32; 2], u64) which is better but this is the best we - // can do right now. - arg.cast_to(Reg { kind: RegKind::Integer, size }); + // } else if self.has_all_float(&arg.layout) { + // // We don't want to aggregate floats as an aggregates of Integer + // // because this will hurt the generated assembly (#93490) + // // + // // As an optimization we want to pass homogeneous aggregate of floats + // // greater than pointer size as indirect + // if size > Pointer.size(self) { + // arg.make_indirect(); + // } + // } else { + // // We want to pass small aggregates as immediates, but using + // // a LLVM aggregate type for this leads to bad optimizations, + // // so we pick an appropriately sized integer type instead. + // // + // // NOTE: This is sub-optimal because in the case of (f32, f32, u32, u32) + // // we could do ([f32; 2], u64) which is better but this is the best we + // // can do right now. + // arg.cast_to(Reg { kind: RegKind::Integer, size }); } } From 0c1451cd57c0312989c0a26f33722799667fe21f Mon Sep 17 00:00:00 2001 From: Urgau Date: Sat, 2 Jul 2022 21:36:28 +0200 Subject: [PATCH 10/12] Revert "Revert "Use nbdd0121 suggestion for reducing the perf impact"" This reverts commit e136c3a9348200c261b9b3c1c50a2f6f6a68b4bd. --- compiler/rustc_middle/src/ty/layout.rs | 58 +++++++++---------- src/test/assembly/x86-64-homogenous-floats.rs | 11 ++-- src/test/codegen/homogeneous-floats.rs | 2 +- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index 36c4cfdc38028..6696da5062256 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -14,7 +14,7 @@ use rustc_session::{config::OptLevel, DataTypeKind, FieldInfo, SizeKind, Variant use rustc_span::symbol::Symbol; use rustc_span::{Span, DUMMY_SP}; use rustc_target::abi::call::{ - ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, /* Reg, RegKind, */ + ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, Reg, RegKind, }; use rustc_target::abi::*; use rustc_target::spec::{abi::Abi as SpecAbi, HasTargetSpec, PanicStrategy, Target}; @@ -3340,16 +3340,16 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { Ok(self.tcx.arena.alloc(fn_abi)) } - // /// Small heuristic for determining if layout has any float primitive - // fn has_all_float(&self, layout: &'_ TyAndLayout<'tcx>) -> bool { - // match layout.abi { - // Abi::Uninhabited | Abi::Vector { .. } => false, - // Abi::Scalar(scalar) => matches!(scalar.primitive(), Primitive::F32 | Primitive::F64), - // Abi::ScalarPair(..) | Abi::Aggregate { .. } => { - // (0..layout.fields.count()).all(|i| self.has_all_float(&layout.field(self, i))) - // } - // } - // } + /// Small heuristic for determining if layout has any float primitive + fn has_all_float(&self, layout: &'_ TyAndLayout<'tcx>) -> bool { + match layout.abi { + Abi::Uninhabited | Abi::Vector { .. } => false, + Abi::Scalar(scalar) => matches!(scalar.primitive(), Primitive::F32 | Primitive::F64), + Abi::ScalarPair(..) | Abi::Aggregate { .. } => { + (0..layout.fields.count()).all(|i| self.has_all_float(&layout.field(self, i))) + } + } + } fn fn_abi_adjust_for_abi( &self, @@ -3375,29 +3375,27 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { // Pass and return structures up to 2 pointers in size by value, // matching `ScalarPair`. LLVM will usually pass these in 2 registers // which is more efficient than by-ref. - let max_by_val_size = Pointer.size(self) * 2; + let ptr_size = Pointer.size(self); + let max_by_val_size = ptr_size * 2; let size = arg.layout.size; if arg.layout.is_unsized() || size > max_by_val_size { arg.make_indirect(); - // } else if self.has_all_float(&arg.layout) { - // // We don't want to aggregate floats as an aggregates of Integer - // // because this will hurt the generated assembly (#93490) - // // - // // As an optimization we want to pass homogeneous aggregate of floats - // // greater than pointer size as indirect - // if size > Pointer.size(self) { - // arg.make_indirect(); - // } - // } else { - // // We want to pass small aggregates as immediates, but using - // // a LLVM aggregate type for this leads to bad optimizations, - // // so we pick an appropriately sized integer type instead. - // // - // // NOTE: This is sub-optimal because in the case of (f32, f32, u32, u32) - // // we could do ([f32; 2], u64) which is better but this is the best we - // // can do right now. - // arg.cast_to(Reg { kind: RegKind::Integer, size }); + } else if size > ptr_size && self.has_all_float(&arg.layout) { + // We don't want to aggregate floats as an aggregates of Integer + // because this will hurt the generated assembly (#93490) but as an + // optimization we want to pass homogeneous aggregate of floats + // greater than pointer size as indirect. + arg.make_indirect(); + } else { + // We want to pass small aggregates as immediates, but using + // a LLVM aggregate type for this leads to bad optimizations, + // so we pick an appropriately sized integer type instead. + // + // NOTE: This is sub-optimal because in the case of (f32, f32, u32, u32) + // we could do ([f32; 2], u64) which is better but this is the best we + // can do right now. + arg.cast_to(Reg { kind: RegKind::Integer, size }); } } diff --git a/src/test/assembly/x86-64-homogenous-floats.rs b/src/test/assembly/x86-64-homogenous-floats.rs index 5b725bab07790..00b434a688d67 100644 --- a/src/test/assembly/x86-64-homogenous-floats.rs +++ b/src/test/assembly/x86-64-homogenous-floats.rs @@ -15,12 +15,15 @@ pub fn sum_f32(a: f32, b: f32) -> f32 { a + b } -// CHECK-LABEL: sum_f32x2: -// CHECK: addss xmm{{[0-9]}}, xmm{{[0-9]}} -// CHECK-NEXT: addss xmm{{[0-9]}}, xmm{{[0-9]}} +// CHECK-LABEL: sum_f64x2: +// CHECK: mov rax, [[PTR_IN:.*]] +// CHECK-NEXT: movupd [[XMMA:xmm[0-9]]], xmmword ptr [rsi] +// CHECK-NEXT: movupd [[XMMB:xmm[0-9]]], xmmword ptr [rdx] +// CHECK-NEXT: addpd [[XMMB]], [[XMMA]] +// CHECK-NEXT: movupd xmmword ptr {{\[}}[[PTR_IN]]{{\]}}, [[XMMB]] // CHECK-NEXT: ret #[no_mangle] -pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { +pub fn sum_f64x2(a: [f64; 2], b: [f64; 2]) -> [f64; 2] { [ a[0] + b[0], a[1] + b[1], diff --git a/src/test/codegen/homogeneous-floats.rs b/src/test/codegen/homogeneous-floats.rs index 0b729156d2842..cf8a62e488ee1 100644 --- a/src/test/codegen/homogeneous-floats.rs +++ b/src/test/codegen/homogeneous-floats.rs @@ -13,7 +13,7 @@ pub struct Foo { bar4: f32, } -// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1) +// CHECK: define i64 @array_f32x2(i64 %0, i64 %1) #[no_mangle] pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { todo!() From c7e8880f484f939a042ddbab964c57aa0237691e Mon Sep 17 00:00:00 2001 From: Urgau Date: Sun, 3 Jul 2022 14:08:06 +0200 Subject: [PATCH 11/12] Retry with the homogeneous aggregate concept --- compiler/rustc_middle/src/ty/layout.rs | 31 ++++++++++++-------------- src/test/codegen/homogeneous-floats.rs | 2 +- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index 6696da5062256..db385b0e1d2d3 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -14,7 +14,8 @@ use rustc_session::{config::OptLevel, DataTypeKind, FieldInfo, SizeKind, Variant use rustc_span::symbol::Symbol; use rustc_span::{Span, DUMMY_SP}; use rustc_target::abi::call::{ - ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, Reg, RegKind, + ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, HomogeneousAggregate, PassMode, + Reg, RegKind, }; use rustc_target::abi::*; use rustc_target::spec::{abi::Abi as SpecAbi, HasTargetSpec, PanicStrategy, Target}; @@ -3340,17 +3341,6 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { Ok(self.tcx.arena.alloc(fn_abi)) } - /// Small heuristic for determining if layout has any float primitive - fn has_all_float(&self, layout: &'_ TyAndLayout<'tcx>) -> bool { - match layout.abi { - Abi::Uninhabited | Abi::Vector { .. } => false, - Abi::Scalar(scalar) => matches!(scalar.primitive(), Primitive::F32 | Primitive::F64), - Abi::ScalarPair(..) | Abi::Aggregate { .. } => { - (0..layout.fields.count()).all(|i| self.has_all_float(&layout.field(self, i))) - } - } - } - fn fn_abi_adjust_for_abi( &self, fn_abi: &mut FnAbi<'tcx, Ty<'tcx>>, @@ -3381,12 +3371,19 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { if arg.layout.is_unsized() || size > max_by_val_size { arg.make_indirect(); - } else if size > ptr_size && self.has_all_float(&arg.layout) { + } else if let Ok(HomogeneousAggregate::Homogeneous(Reg { + kind: RegKind::Float, + .. + })) = arg.layout.homogeneous_aggregate(self) + { // We don't want to aggregate floats as an aggregates of Integer - // because this will hurt the generated assembly (#93490) but as an - // optimization we want to pass homogeneous aggregate of floats - // greater than pointer size as indirect. - arg.make_indirect(); + // because this will hurt the generated assembly (#93490) + // + // As an optimization we want to pass homogeneous aggregate of floats + // greater than pointer size as indirect + if size > ptr_size { + arg.make_indirect(); + } } else { // We want to pass small aggregates as immediates, but using // a LLVM aggregate type for this leads to bad optimizations, diff --git a/src/test/codegen/homogeneous-floats.rs b/src/test/codegen/homogeneous-floats.rs index cf8a62e488ee1..0b729156d2842 100644 --- a/src/test/codegen/homogeneous-floats.rs +++ b/src/test/codegen/homogeneous-floats.rs @@ -13,7 +13,7 @@ pub struct Foo { bar4: f32, } -// CHECK: define i64 @array_f32x2(i64 %0, i64 %1) +// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1) #[no_mangle] pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { todo!() From 683e13f3476ea488e77b2a9e7babcab051957aba Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 5 Jul 2022 11:43:42 +0200 Subject: [PATCH 12/12] Revert max_by_val_size * 2 --- compiler/rustc_middle/src/ty/layout.rs | 6 ++---- src/test/codegen/array-equality.rs | 3 ++- src/test/codegen/issue-37945.rs | 4 ++-- src/test/codegen/union-abi.rs | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index db385b0e1d2d3..498cbf0a462ba 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -3362,11 +3362,9 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> { match arg.layout.abi { Abi::Aggregate { .. } => { - // Pass and return structures up to 2 pointers in size by value, - // matching `ScalarPair`. LLVM will usually pass these in 2 registers - // which is more efficient than by-ref. + // Pass and return structures up to 1 pointers in size by value. let ptr_size = Pointer.size(self); - let max_by_val_size = ptr_size * 2; + let max_by_val_size = ptr_size; let size = arg.layout.size; if arg.layout.is_unsized() || size > max_by_val_size { diff --git a/src/test/codegen/array-equality.rs b/src/test/codegen/array-equality.rs index b509bc20775b0..cd5e82a9205c1 100644 --- a/src/test/codegen/array-equality.rs +++ b/src/test/codegen/array-equality.rs @@ -56,7 +56,8 @@ pub fn array_eq_zero_short(x: [u16; 3]) -> bool { #[no_mangle] pub fn array_eq_zero_mid(x: [u16; 8]) -> bool { // CHECK-NEXT: start: - // CHECK-NEXT: %[[EQ:.+]] = icmp eq i128 %0, 0 + // CHECK: %[[LOAD:.+]] = load i128, + // CHECK-NEXT: %[[EQ:.+]] = icmp eq i128 %[[LOAD]], 0 // CHECK-NEXT: ret i1 %[[EQ]] x == [0; 8] } diff --git a/src/test/codegen/issue-37945.rs b/src/test/codegen/issue-37945.rs index 4234c26b5e89b..24d3dfb237e70 100644 --- a/src/test/codegen/issue-37945.rs +++ b/src/test/codegen/issue-37945.rs @@ -17,7 +17,7 @@ pub fn is_empty_1(xs: Iter) -> bool { // CHECK-NEXT: start: // CHECK-NEXT: [[A:%.*]] = icmp ne {{i32\*|ptr}} %xs.1, null // CHECK-NEXT: tail call void @llvm.assume(i1 [[A]]) -// CHECK-NEXT: [[B:%.*]] = icmp eq {{i32\*|ptr}} %xs.1, %xs.0 +// CHECK-NEXT: [[B:%.*]] = icmp eq {{i32\*|ptr}} {{%xs.1, %xs.0|%xs.0, %xs.1}} // CHECK-NEXT: ret i1 [[B:%.*]] {xs}.next().is_none() } @@ -28,7 +28,7 @@ pub fn is_empty_2(xs: Iter) -> bool { // CHECK-NEXT: start: // CHECK-NEXT: [[C:%.*]] = icmp ne {{i32\*|ptr}} %xs.1, null // CHECK-NEXT: tail call void @llvm.assume(i1 [[C]]) -// CHECK-NEXT: [[D:%.*]] = icmp eq {{i32\*|ptr}} %xs.1, %xs.0 +// CHECK-NEXT: [[D:%.*]] = icmp eq {{i32\*|ptr}} {{%xs.1, %xs.0|%xs.0, %xs.1}} // CHECK-NEXT: ret i1 [[D:%.*]] xs.map(|&x| x).next().is_none() } diff --git a/src/test/codegen/union-abi.rs b/src/test/codegen/union-abi.rs index 6e3866778a391..c18f2a49fc369 100644 --- a/src/test/codegen/union-abi.rs +++ b/src/test/codegen/union-abi.rs @@ -65,7 +65,7 @@ pub fn test_UnionU128(_: UnionU128) -> UnionU128 { loop {} } #[repr(C)] pub union CUnionU128{a:u128} -// CHECK: define void @test_CUnionU128(i128 %0) +// CHECK: define void @test_CUnionU128({{%CUnionU128\*|ptr}} {{.*}} %_1) #[no_mangle] pub fn test_CUnionU128(_: CUnionU128) { loop {} }