diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4cf0e5fba5378..8032154a7365b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,7 +65,7 @@ jobs:
     defaults:
       run:
         shell: ${{ contains(matrix.os, 'windows') && 'msys2 {0}' || 'bash' }}
-    timeout-minutes: 600
+    timeout-minutes: 240
     env:
       CI_JOB_NAME: ${{ matrix.image }}
       CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs
index bcddfe9fb9cb0..22006c0b4712a 100644
--- a/compiler/rustc_codegen_ssa/src/target_features.rs
+++ b/compiler/rustc_codegen_ssa/src/target_features.rs
@@ -80,6 +80,7 @@ pub fn from_target_feature(
                 Some(sym::loongarch_target_feature) => rust_features.loongarch_target_feature,
                 Some(sym::lahfsahf_target_feature) => rust_features.lahfsahf_target_feature,
                 Some(sym::prfchw_target_feature) => rust_features.prfchw_target_feature,
+                Some(sym::x86_amx_intrinsics) => rust_features.x86_amx_intrinsics,
                 Some(name) => bug!("unknown target feature gate {}", name),
                 None => true,
             };
diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs
index 948499fb38fbf..dfbe270822c0a 100644
--- a/compiler/rustc_feature/src/unstable.rs
+++ b/compiler/rustc_feature/src/unstable.rs
@@ -640,6 +640,8 @@ declare_features! (
     (unstable, unsized_tuple_coercion, "1.20.0", Some(42877)),
     /// Allows using the `#[used(linker)]` (or `#[used(compiler)]`) attribute.
     (unstable, used_with_arg, "1.60.0", Some(93798)),
+    /// Allows use of x86 `AMX` target-feature attributes and intrinsics
+    (unstable, x86_amx_intrinsics, "CURRENT_RUSTC_VERSION", Some(126622)),
     /// Allows `do yeet` expressions
     (unstable, yeet_expr, "1.62.0", Some(96373)),
     // !!!!    !!!!    !!!!    !!!!   !!!!    !!!!    !!!!    !!!!    !!!!    !!!!    !!!!
diff --git a/compiler/rustc_middle/src/traits/solve.rs b/compiler/rustc_middle/src/traits/solve.rs
index 7bc4c60f10272..f659bf8125a0e 100644
--- a/compiler/rustc_middle/src/traits/solve.rs
+++ b/compiler/rustc_middle/src/traits/solve.rs
@@ -8,10 +8,6 @@ use crate::ty::{
     self, FallibleTypeFolder, TyCtxt, TypeFoldable, TypeFolder, TypeVisitable, TypeVisitor,
 };
 
-mod cache;
-
-pub use cache::EvaluationCache;
-
 pub type Goal<'tcx, P> = ir::solve::Goal<TyCtxt<'tcx>, P>;
 pub type QueryInput<'tcx, P> = ir::solve::QueryInput<TyCtxt<'tcx>, P>;
 pub type QueryResult<'tcx> = ir::solve::QueryResult<TyCtxt<'tcx>>;
diff --git a/compiler/rustc_middle/src/traits/solve/cache.rs b/compiler/rustc_middle/src/traits/solve/cache.rs
deleted file mode 100644
index 72a8d4eb4050c..0000000000000
--- a/compiler/rustc_middle/src/traits/solve/cache.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-use super::{inspect, CanonicalInput, QueryResult};
-use crate::ty::TyCtxt;
-use rustc_data_structures::fx::{FxHashMap, FxHashSet};
-use rustc_data_structures::sync::Lock;
-use rustc_query_system::cache::WithDepNode;
-use rustc_query_system::dep_graph::DepNodeIndex;
-use rustc_session::Limit;
-use rustc_type_ir::solve::CacheData;
-
-/// The trait solver cache used by `-Znext-solver`.
-///
-/// FIXME(@lcnr): link to some official documentation of how
-/// this works.
-#[derive(Default)]
-pub struct EvaluationCache<'tcx> {
-    map: Lock<FxHashMap<CanonicalInput<'tcx>, CacheEntry<'tcx>>>,
-}
-
-impl<'tcx> rustc_type_ir::inherent::EvaluationCache<TyCtxt<'tcx>> for &'tcx EvaluationCache<'tcx> {
-    /// Insert a final result into the global cache.
-    fn insert(
-        &self,
-        tcx: TyCtxt<'tcx>,
-        key: CanonicalInput<'tcx>,
-        proof_tree: Option<&'tcx inspect::CanonicalGoalEvaluationStep<TyCtxt<'tcx>>>,
-        additional_depth: usize,
-        encountered_overflow: bool,
-        cycle_participants: FxHashSet<CanonicalInput<'tcx>>,
-        dep_node: DepNodeIndex,
-        result: QueryResult<'tcx>,
-    ) {
-        let mut map = self.map.borrow_mut();
-        let entry = map.entry(key).or_default();
-        let data = WithDepNode::new(dep_node, QueryData { result, proof_tree });
-        entry.cycle_participants.extend(cycle_participants);
-        if encountered_overflow {
-            entry.with_overflow.insert(additional_depth, data);
-        } else {
-            entry.success = Some(Success { data, additional_depth });
-        }
-
-        if cfg!(debug_assertions) {
-            drop(map);
-            let expected = CacheData { result, proof_tree, additional_depth, encountered_overflow };
-            let actual = self.get(tcx, key, [], additional_depth);
-            if !actual.as_ref().is_some_and(|actual| expected == *actual) {
-                bug!("failed to lookup inserted element for {key:?}: {expected:?} != {actual:?}");
-            }
-        }
-    }
-
-    /// Try to fetch a cached result, checking the recursion limit
-    /// and handling root goals of coinductive cycles.
-    ///
-    /// If this returns `Some` the cache result can be used.
-    fn get(
-        &self,
-        tcx: TyCtxt<'tcx>,
-        key: CanonicalInput<'tcx>,
-        stack_entries: impl IntoIterator<Item = CanonicalInput<'tcx>>,
-        available_depth: usize,
-    ) -> Option<CacheData<TyCtxt<'tcx>>> {
-        let map = self.map.borrow();
-        let entry = map.get(&key)?;
-
-        for stack_entry in stack_entries {
-            if entry.cycle_participants.contains(&stack_entry) {
-                return None;
-            }
-        }
-
-        if let Some(ref success) = entry.success {
-            if Limit(available_depth).value_within_limit(success.additional_depth) {
-                let QueryData { result, proof_tree } = success.data.get(tcx);
-                return Some(CacheData {
-                    result,
-                    proof_tree,
-                    additional_depth: success.additional_depth,
-                    encountered_overflow: false,
-                });
-            }
-        }
-
-        entry.with_overflow.get(&available_depth).map(|e| {
-            let QueryData { result, proof_tree } = e.get(tcx);
-            CacheData {
-                result,
-                proof_tree,
-                additional_depth: available_depth,
-                encountered_overflow: true,
-            }
-        })
-    }
-}
-
-struct Success<'tcx> {
-    data: WithDepNode<QueryData<'tcx>>,
-    additional_depth: usize,
-}
-
-#[derive(Clone, Copy)]
-pub struct QueryData<'tcx> {
-    pub result: QueryResult<'tcx>,
-    pub proof_tree: Option<&'tcx inspect::CanonicalGoalEvaluationStep<TyCtxt<'tcx>>>,
-}
-
-/// The cache entry for a goal `CanonicalInput`.
-///
-/// This contains results whose computation never hit the
-/// recursion limit in `success`, and all results which hit
-/// the recursion limit in `with_overflow`.
-#[derive(Default)]
-struct CacheEntry<'tcx> {
-    success: Option<Success<'tcx>>,
-    /// We have to be careful when caching roots of cycles.
-    ///
-    /// See the doc comment of `StackEntry::cycle_participants` for more
-    /// details.
-    cycle_participants: FxHashSet<CanonicalInput<'tcx>>,
-    with_overflow: FxHashMap<usize, WithDepNode<QueryData<'tcx>>>,
-}
diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs
index aee42bfe3aaca..9e24ea485b26e 100644
--- a/compiler/rustc_middle/src/ty/context.rs
+++ b/compiler/rustc_middle/src/ty/context.rs
@@ -59,6 +59,7 @@ use rustc_hir::lang_items::LangItem;
 use rustc_hir::{HirId, Node, TraitCandidate};
 use rustc_index::IndexVec;
 use rustc_macros::{HashStable, TyDecodable, TyEncodable};
+use rustc_query_system::cache::WithDepNode;
 use rustc_query_system::dep_graph::DepNodeIndex;
 use rustc_query_system::ich::StableHashingContext;
 use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
@@ -75,7 +76,7 @@ use rustc_type_ir::fold::TypeFoldable;
 use rustc_type_ir::lang_items::TraitSolverLangItem;
 use rustc_type_ir::solve::SolverMode;
 use rustc_type_ir::TyKind::*;
-use rustc_type_ir::{CollectAndApply, Interner, TypeFlags, WithCachedTypeInfo};
+use rustc_type_ir::{search_graph, CollectAndApply, Interner, TypeFlags, WithCachedTypeInfo};
 use tracing::{debug, instrument};
 
 use std::assert_matches::assert_matches;
@@ -164,12 +165,26 @@ impl<'tcx> Interner for TyCtxt<'tcx> {
     type Clause = Clause<'tcx>;
     type Clauses = ty::Clauses<'tcx>;
 
-    type EvaluationCache = &'tcx solve::EvaluationCache<'tcx>;
+    type Tracked<T: fmt::Debug + Clone> = WithDepNode<T>;
+    fn mk_tracked<T: fmt::Debug + Clone>(
+        self,
+        data: T,
+        dep_node: DepNodeIndex,
+    ) -> Self::Tracked<T> {
+        WithDepNode::new(dep_node, data)
+    }
+    fn get_tracked<T: fmt::Debug + Clone>(self, tracked: &Self::Tracked<T>) -> T {
+        tracked.get(self)
+    }
 
-    fn evaluation_cache(self, mode: SolverMode) -> &'tcx solve::EvaluationCache<'tcx> {
+    fn with_global_cache<R>(
+        self,
+        mode: SolverMode,
+        f: impl FnOnce(&mut search_graph::GlobalCache<Self>) -> R,
+    ) -> R {
         match mode {
-            SolverMode::Normal => &self.new_solver_evaluation_cache,
-            SolverMode::Coherence => &self.new_solver_coherence_evaluation_cache,
+            SolverMode::Normal => f(&mut *self.new_solver_evaluation_cache.lock()),
+            SolverMode::Coherence => f(&mut *self.new_solver_coherence_evaluation_cache.lock()),
         }
     }
 
@@ -1283,8 +1298,8 @@ pub struct GlobalCtxt<'tcx> {
     pub evaluation_cache: traits::EvaluationCache<'tcx>,
 
     /// Caches the results of goal evaluation in the new solver.
-    pub new_solver_evaluation_cache: solve::EvaluationCache<'tcx>,
-    pub new_solver_coherence_evaluation_cache: solve::EvaluationCache<'tcx>,
+    pub new_solver_evaluation_cache: Lock<search_graph::GlobalCache<TyCtxt<'tcx>>>,
+    pub new_solver_coherence_evaluation_cache: Lock<search_graph::GlobalCache<TyCtxt<'tcx>>>,
 
     pub canonical_param_env_cache: CanonicalParamEnvCache<'tcx>,
 
diff --git a/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs b/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
index c90f8e761633b..c23bc8f09ad16 100644
--- a/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/eval_ctxt/mod.rs
@@ -16,9 +16,9 @@ use crate::delegate::SolverDelegate;
 use crate::solve::inspect::{self, ProofTreeBuilder};
 use crate::solve::search_graph::SearchGraph;
 use crate::solve::{
-    search_graph, CanonicalInput, CanonicalResponse, Certainty, Goal, GoalEvaluationKind,
-    GoalSource, MaybeCause, NestedNormalizationGoals, NoSolution, PredefinedOpaquesData,
-    QueryResult, SolverMode, FIXPOINT_STEP_LIMIT,
+    CanonicalInput, CanonicalResponse, Certainty, Goal, GoalEvaluationKind, GoalSource, MaybeCause,
+    NestedNormalizationGoals, NoSolution, PredefinedOpaquesData, QueryResult, SolverMode,
+    FIXPOINT_STEP_LIMIT,
 };
 
 pub(super) mod canonical;
@@ -72,7 +72,7 @@ where
     /// new placeholders to the caller.
     pub(super) max_input_universe: ty::UniverseIndex,
 
-    pub(super) search_graph: &'a mut SearchGraph<I>,
+    pub(super) search_graph: &'a mut SearchGraph<D>,
 
     nested_goals: NestedGoals<I>,
 
@@ -200,7 +200,7 @@ where
         generate_proof_tree: GenerateProofTree,
         f: impl FnOnce(&mut EvalCtxt<'_, D>) -> R,
     ) -> (R, Option<inspect::GoalEvaluation<I>>) {
-        let mut search_graph = search_graph::SearchGraph::new(delegate.solver_mode());
+        let mut search_graph = SearchGraph::new(delegate.solver_mode());
 
         let mut ecx = EvalCtxt {
             delegate,
@@ -241,7 +241,7 @@ where
     /// and registering opaques from the canonicalized input.
     fn enter_canonical<R>(
         cx: I,
-        search_graph: &'a mut search_graph::SearchGraph<I>,
+        search_graph: &'a mut SearchGraph<D>,
         canonical_input: CanonicalInput<I>,
         canonical_goal_evaluation: &mut ProofTreeBuilder<D>,
         f: impl FnOnce(&mut EvalCtxt<'_, D>, Goal<I, I::Predicate>) -> R,
@@ -296,7 +296,7 @@ where
     #[instrument(level = "debug", skip(cx, search_graph, goal_evaluation), ret)]
     fn evaluate_canonical_goal(
         cx: I,
-        search_graph: &'a mut search_graph::SearchGraph<I>,
+        search_graph: &'a mut SearchGraph<D>,
         canonical_input: CanonicalInput<I>,
         goal_evaluation: &mut ProofTreeBuilder<D>,
     ) -> QueryResult<I> {
diff --git a/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs b/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
index b50676e8d5327..3e266ddac71fd 100644
--- a/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
@@ -8,7 +8,7 @@ use std::marker::PhantomData;
 use std::mem;
 
 use rustc_type_ir::inherent::*;
-use rustc_type_ir::{self as ty, Interner};
+use rustc_type_ir::{self as ty, search_graph, Interner};
 
 use crate::delegate::SolverDelegate;
 use crate::solve::eval_ctxt::canonical;
@@ -38,7 +38,7 @@ use crate::solve::{
 /// trees. At the end of trait solving `ProofTreeBuilder::finalize`
 /// is called to recursively convert the whole structure to a
 /// finished proof tree.
-pub(in crate::solve) struct ProofTreeBuilder<D, I = <D as SolverDelegate>::Interner>
+pub(crate) struct ProofTreeBuilder<D, I = <D as SolverDelegate>::Interner>
 where
     D: SolverDelegate<Interner = I>,
     I: Interner,
@@ -321,23 +321,6 @@ impl<D: SolverDelegate<Interner = I>, I: Interner> ProofTreeBuilder<D> {
         })
     }
 
-    pub fn finalize_canonical_goal_evaluation(
-        &mut self,
-        cx: I,
-    ) -> Option<I::CanonicalGoalEvaluationStepRef> {
-        self.as_mut().map(|this| match this {
-            DebugSolver::CanonicalGoalEvaluation(evaluation) => {
-                let final_revision = mem::take(&mut evaluation.final_revision).unwrap();
-                let final_revision =
-                    cx.intern_canonical_goal_evaluation_step(final_revision.finalize());
-                let kind = WipCanonicalGoalEvaluationKind::Interned { final_revision };
-                assert_eq!(evaluation.kind.replace(kind), None);
-                final_revision
-            }
-            _ => unreachable!(),
-        })
-    }
-
     pub fn canonical_goal_evaluation(&mut self, canonical_goal_evaluation: ProofTreeBuilder<D>) {
         if let Some(this) = self.as_mut() {
             match (this, *canonical_goal_evaluation.state.unwrap()) {
@@ -571,3 +554,51 @@ impl<D: SolverDelegate<Interner = I>, I: Interner> ProofTreeBuilder<D> {
         }
     }
 }
+
+impl<D, I> search_graph::ProofTreeBuilder<I> for ProofTreeBuilder<D>
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    fn try_apply_proof_tree(
+        &mut self,
+        proof_tree: Option<I::CanonicalGoalEvaluationStepRef>,
+    ) -> bool {
+        if !self.is_noop() {
+            if let Some(final_revision) = proof_tree {
+                let kind = WipCanonicalGoalEvaluationKind::Interned { final_revision };
+                self.canonical_goal_evaluation_kind(kind);
+                true
+            } else {
+                false
+            }
+        } else {
+            true
+        }
+    }
+
+    fn on_provisional_cache_hit(&mut self) {
+        self.canonical_goal_evaluation_kind(WipCanonicalGoalEvaluationKind::ProvisionalCacheHit);
+    }
+
+    fn on_cycle_in_stack(&mut self) {
+        self.canonical_goal_evaluation_kind(WipCanonicalGoalEvaluationKind::CycleInStack);
+    }
+
+    fn finalize_canonical_goal_evaluation(
+        &mut self,
+        tcx: I,
+    ) -> Option<I::CanonicalGoalEvaluationStepRef> {
+        self.as_mut().map(|this| match this {
+            DebugSolver::CanonicalGoalEvaluation(evaluation) => {
+                let final_revision = mem::take(&mut evaluation.final_revision).unwrap();
+                let final_revision =
+                    tcx.intern_canonical_goal_evaluation_step(final_revision.finalize());
+                let kind = WipCanonicalGoalEvaluationKind::Interned { final_revision };
+                assert_eq!(evaluation.kind.replace(kind), None);
+                final_revision
+            }
+            _ => unreachable!(),
+        })
+    }
+}
diff --git a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
index 69d52dcad7a59..fe053a506e712 100644
--- a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
@@ -1,599 +1,90 @@
-use std::mem;
+use std::marker::PhantomData;
 
-use rustc_index::{Idx, IndexVec};
-use rustc_type_ir::data_structures::{HashMap, HashSet};
 use rustc_type_ir::inherent::*;
+use rustc_type_ir::search_graph::{self, CycleKind, UsageKind};
+use rustc_type_ir::solve::{CanonicalInput, Certainty, QueryResult};
 use rustc_type_ir::Interner;
-use tracing::debug;
 
+use super::inspect::{self, ProofTreeBuilder};
+use super::FIXPOINT_STEP_LIMIT;
 use crate::delegate::SolverDelegate;
-use crate::solve::inspect::{self, ProofTreeBuilder};
-use crate::solve::{
-    CacheData, CanonicalInput, Certainty, QueryResult, SolverMode, FIXPOINT_STEP_LIMIT,
-};
 
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
-pub struct SolverLimit(usize);
-
-rustc_index::newtype_index! {
-    #[orderable]
-    #[gate_rustc_only]
-    pub struct StackDepth {}
-}
-
-bitflags::bitflags! {
-    /// Whether and how this goal has been used as the root of a
-    /// cycle. We track the kind of cycle as we're otherwise forced
-    /// to always rerun at least once.
-    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
-    struct HasBeenUsed: u8 {
-        const INDUCTIVE_CYCLE = 1 << 0;
-        const COINDUCTIVE_CYCLE = 1 << 1;
-    }
-}
-
-#[derive(derivative::Derivative)]
-#[derivative(Debug(bound = ""))]
-struct StackEntry<I: Interner> {
-    input: CanonicalInput<I>,
-
-    available_depth: SolverLimit,
-
-    /// The maximum depth reached by this stack entry, only up-to date
-    /// for the top of the stack and lazily updated for the rest.
-    reached_depth: StackDepth,
-
-    /// Whether this entry is a non-root cycle participant.
-    ///
-    /// We must not move the result of non-root cycle participants to the
-    /// global cache. We store the highest stack depth of a head of a cycle
-    /// this goal is involved in. This necessary to soundly cache its
-    /// provisional result.
-    non_root_cycle_participant: Option<StackDepth>,
-
-    encountered_overflow: bool,
-
-    has_been_used: HasBeenUsed,
-
-    /// We put only the root goal of a coinductive cycle into the global cache.
-    ///
-    /// If we were to use that result when later trying to prove another cycle
-    /// participant, we can end up with unstable query results.
-    ///
-    /// See tests/ui/next-solver/coinduction/incompleteness-unstable-result.rs for
-    /// an example of where this is needed.
-    ///
-    /// There can  be multiple roots on the same stack, so we need to track
-    /// cycle participants per root:
-    /// ```plain
-    /// A :- B
-    /// B :- A, C
-    /// C :- D
-    /// D :- C
-    /// ```
-    nested_goals: HashSet<CanonicalInput<I>>,
-    /// Starts out as `None` and gets set when rerunning this
-    /// goal in case we encounter a cycle.
-    provisional_result: Option<QueryResult<I>>,
-}
-
-/// The provisional result for a goal which is not on the stack.
-#[derive(Debug)]
-struct DetachedEntry<I: Interner> {
-    /// The head of the smallest non-trivial cycle involving this entry.
-    ///
-    /// Given the following rules, when proving `A` the head for
-    /// the provisional entry of `C` would be `B`.
-    /// ```plain
-    /// A :- B
-    /// B :- C
-    /// C :- A + B + C
-    /// ```
-    head: StackDepth,
-    result: QueryResult<I>,
-}
-
-/// Stores the stack depth of a currently evaluated goal *and* already
-/// computed results for goals which depend on other goals still on the stack.
-///
-/// The provisional result may depend on whether the stack above it is inductive
-/// or coinductive. Because of this, we store separate provisional results for
-/// each case. If an provisional entry is not applicable, it may be the case
-/// that we already have provisional result while computing a goal. In this case
-/// we prefer the provisional result to potentially avoid fixpoint iterations.
-/// See tests/ui/traits/next-solver/cycles/mixed-cycles-2.rs for an example.
-///
-/// The provisional cache can theoretically result in changes to the observable behavior,
-/// see tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs.
-#[derive(derivative::Derivative)]
-#[derivative(Default(bound = ""))]
-struct ProvisionalCacheEntry<I: Interner> {
-    stack_depth: Option<StackDepth>,
-    with_inductive_stack: Option<DetachedEntry<I>>,
-    with_coinductive_stack: Option<DetachedEntry<I>>,
-}
-
-impl<I: Interner> ProvisionalCacheEntry<I> {
-    fn is_empty(&self) -> bool {
-        self.stack_depth.is_none()
-            && self.with_inductive_stack.is_none()
-            && self.with_coinductive_stack.is_none()
-    }
+/// This type is never constructed. We only use it to implement `search_graph::Delegate`
+/// for all types which impl `SolverDelegate` and doing it directly fails in coherence.
+pub(super) struct SearchGraphDelegate<D: SolverDelegate> {
+    _marker: PhantomData<D>,
 }
+pub(super) type SearchGraph<D> = search_graph::SearchGraph<SearchGraphDelegate<D>>;
+impl<D, I> search_graph::Delegate for SearchGraphDelegate<D>
+where
+    D: SolverDelegate<Interner = I>,
+    I: Interner,
+{
+    type Cx = D::Interner;
 
-pub(super) struct SearchGraph<I: Interner> {
-    mode: SolverMode,
-    /// The stack of goals currently being computed.
-    ///
-    /// An element is *deeper* in the stack if its index is *lower*.
-    stack: IndexVec<StackDepth, StackEntry<I>>,
-    provisional_cache: HashMap<CanonicalInput<I>, ProvisionalCacheEntry<I>>,
-}
+    const FIXPOINT_STEP_LIMIT: usize = FIXPOINT_STEP_LIMIT;
 
-impl<I: Interner> SearchGraph<I> {
-    pub(super) fn new(mode: SolverMode) -> SearchGraph<I> {
-        Self { mode, stack: Default::default(), provisional_cache: Default::default() }
-    }
+    type ProofTreeBuilder = ProofTreeBuilder<D>;
 
-    pub(super) fn solver_mode(&self) -> SolverMode {
-        self.mode
+    fn recursion_limit(cx: I) -> usize {
+        cx.recursion_limit()
     }
 
-    fn update_parent_goal(&mut self, reached_depth: StackDepth, encountered_overflow: bool) {
-        if let Some(parent) = self.stack.raw.last_mut() {
-            parent.reached_depth = parent.reached_depth.max(reached_depth);
-            parent.encountered_overflow |= encountered_overflow;
-        }
-    }
-
-    pub(super) fn is_empty(&self) -> bool {
-        self.stack.is_empty()
-    }
-
-    /// Returns the remaining depth allowed for nested goals.
-    ///
-    /// This is generally simply one less than the current depth.
-    /// However, if we encountered overflow, we significantly reduce
-    /// the remaining depth of all nested goals to prevent hangs
-    /// in case there is exponential blowup.
-    fn allowed_depth_for_nested(
+    fn initial_provisional_result(
         cx: I,
-        stack: &IndexVec<StackDepth, StackEntry<I>>,
-    ) -> Option<SolverLimit> {
-        if let Some(last) = stack.raw.last() {
-            if last.available_depth.0 == 0 {
-                return None;
-            }
-
-            Some(if last.encountered_overflow {
-                SolverLimit(last.available_depth.0 / 4)
-            } else {
-                SolverLimit(last.available_depth.0 - 1)
-            })
-        } else {
-            Some(SolverLimit(cx.recursion_limit()))
-        }
-    }
-
-    fn stack_coinductive_from(
-        cx: I,
-        stack: &IndexVec<StackDepth, StackEntry<I>>,
-        head: StackDepth,
-    ) -> bool {
-        stack.raw[head.index()..]
-            .iter()
-            .all(|entry| entry.input.value.goal.predicate.is_coinductive(cx))
-    }
-
-    // When encountering a solver cycle, the result of the current goal
-    // depends on goals lower on the stack.
-    //
-    // We have to therefore be careful when caching goals. Only the final result
-    // of the cycle root, i.e. the lowest goal on the stack involved in this cycle,
-    // is moved to the global cache while all others are stored in a provisional cache.
-    //
-    // We update both the head of this cycle to rerun its evaluation until
-    // we reach a fixpoint and all other cycle participants to make sure that
-    // their result does not get moved to the global cache.
-    fn tag_cycle_participants(
-        stack: &mut IndexVec<StackDepth, StackEntry<I>>,
-        usage_kind: HasBeenUsed,
-        head: StackDepth,
-    ) {
-        stack[head].has_been_used |= usage_kind;
-        debug_assert!(!stack[head].has_been_used.is_empty());
-
-        // The current root of these cycles. Note that this may not be the final
-        // root in case a later goal depends on a goal higher up the stack.
-        let mut current_root = head;
-        while let Some(parent) = stack[current_root].non_root_cycle_participant {
-            current_root = parent;
-            debug_assert!(!stack[current_root].has_been_used.is_empty());
-        }
-
-        let (stack, cycle_participants) = stack.raw.split_at_mut(head.index() + 1);
-        let current_cycle_root = &mut stack[current_root.as_usize()];
-        for entry in cycle_participants {
-            entry.non_root_cycle_participant = entry.non_root_cycle_participant.max(Some(head));
-            current_cycle_root.nested_goals.insert(entry.input);
-            current_cycle_root.nested_goals.extend(mem::take(&mut entry.nested_goals));
+        kind: CycleKind,
+        input: CanonicalInput<I>,
+    ) -> QueryResult<I> {
+        match kind {
+            CycleKind::Coinductive => response_no_constraints(cx, input, Certainty::Yes),
+            CycleKind::Inductive => response_no_constraints(cx, input, Certainty::overflow(false)),
         }
     }
 
-    fn clear_dependent_provisional_results(
-        provisional_cache: &mut HashMap<CanonicalInput<I>, ProvisionalCacheEntry<I>>,
-        head: StackDepth,
-    ) {
-        #[allow(rustc::potential_query_instability)]
-        provisional_cache.retain(|_, entry| {
-            if entry.with_coinductive_stack.as_ref().is_some_and(|p| p.head == head) {
-                entry.with_coinductive_stack.take();
-            }
-            if entry.with_inductive_stack.as_ref().is_some_and(|p| p.head == head) {
-                entry.with_inductive_stack.take();
-            }
-            !entry.is_empty()
-        });
-    }
-
-    /// The trait solver behavior is different for coherence
-    /// so we use a separate cache. Alternatively we could use
-    /// a single cache and share it between coherence and ordinary
-    /// trait solving.
-    pub(super) fn global_cache(&self, cx: I) -> I::EvaluationCache {
-        cx.evaluation_cache(self.mode)
-    }
-
-    /// Probably the most involved method of the whole solver.
-    ///
-    /// Given some goal which is proven via the `prove_goal` closure, this
-    /// handles caching, overflow, and coinductive cycles.
-    pub(super) fn with_new_goal<D: SolverDelegate<Interner = I>>(
-        &mut self,
+    fn reached_fixpoint(
         cx: I,
+        kind: UsageKind,
         input: CanonicalInput<I>,
-        inspect: &mut ProofTreeBuilder<D>,
-        mut prove_goal: impl FnMut(&mut Self, &mut ProofTreeBuilder<D>) -> QueryResult<I>,
-    ) -> QueryResult<I> {
-        self.check_invariants();
-        // Check for overflow.
-        let Some(available_depth) = Self::allowed_depth_for_nested(cx, &self.stack) else {
-            if let Some(last) = self.stack.raw.last_mut() {
-                last.encountered_overflow = true;
-            }
-
-            inspect
-                .canonical_goal_evaluation_kind(inspect::WipCanonicalGoalEvaluationKind::Overflow);
-            return Self::response_no_constraints(cx, input, Certainty::overflow(true));
-        };
-
-        if let Some(result) = self.lookup_global_cache(cx, input, available_depth, inspect) {
-            debug!("global cache hit");
-            return result;
-        }
-
-        // Check whether the goal is in the provisional cache.
-        // The provisional result may rely on the path to its cycle roots,
-        // so we have to check the path of the current goal matches that of
-        // the cache entry.
-        let cache_entry = self.provisional_cache.entry(input).or_default();
-        if let Some(entry) = cache_entry
-            .with_coinductive_stack
-            .as_ref()
-            .filter(|p| Self::stack_coinductive_from(cx, &self.stack, p.head))
-            .or_else(|| {
-                cache_entry
-                    .with_inductive_stack
-                    .as_ref()
-                    .filter(|p| !Self::stack_coinductive_from(cx, &self.stack, p.head))
-            })
-        {
-            debug!("provisional cache hit");
-            // We have a nested goal which is already in the provisional cache, use
-            // its result. We do not provide any usage kind as that should have been
-            // already set correctly while computing the cache entry.
-            inspect.canonical_goal_evaluation_kind(
-                inspect::WipCanonicalGoalEvaluationKind::ProvisionalCacheHit,
-            );
-            Self::tag_cycle_participants(&mut self.stack, HasBeenUsed::empty(), entry.head);
-            return entry.result;
-        } else if let Some(stack_depth) = cache_entry.stack_depth {
-            debug!("encountered cycle with depth {stack_depth:?}");
-            // We have a nested goal which directly relies on a goal deeper in the stack.
-            //
-            // We start by tagging all cycle participants, as that's necessary for caching.
-            //
-            // Finally we can return either the provisional response or the initial response
-            // in case we're in the first fixpoint iteration for this goal.
-            inspect.canonical_goal_evaluation_kind(
-                inspect::WipCanonicalGoalEvaluationKind::CycleInStack,
-            );
-            let is_coinductive_cycle = Self::stack_coinductive_from(cx, &self.stack, stack_depth);
-            let usage_kind = if is_coinductive_cycle {
-                HasBeenUsed::COINDUCTIVE_CYCLE
-            } else {
-                HasBeenUsed::INDUCTIVE_CYCLE
-            };
-            Self::tag_cycle_participants(&mut self.stack, usage_kind, stack_depth);
-
-            // Return the provisional result or, if we're in the first iteration,
-            // start with no constraints.
-            return if let Some(result) = self.stack[stack_depth].provisional_result {
-                result
-            } else if is_coinductive_cycle {
-                Self::response_no_constraints(cx, input, Certainty::Yes)
-            } else {
-                Self::response_no_constraints(cx, input, Certainty::overflow(false))
-            };
+        provisional_result: Option<QueryResult<I>>,
+        result: QueryResult<I>,
+    ) -> bool {
+        if let Some(r) = provisional_result {
+            r == result
         } else {
-            // No entry, we push this goal on the stack and try to prove it.
-            let depth = self.stack.next_index();
-            let entry = StackEntry {
-                input,
-                available_depth,
-                reached_depth: depth,
-                non_root_cycle_participant: None,
-                encountered_overflow: false,
-                has_been_used: HasBeenUsed::empty(),
-                nested_goals: Default::default(),
-                provisional_result: None,
-            };
-            assert_eq!(self.stack.push(entry), depth);
-            cache_entry.stack_depth = Some(depth);
-        }
-
-        // This is for global caching, so we properly track query dependencies.
-        // Everything that affects the `result` should be performed within this
-        // `with_anon_task` closure. If computing this goal depends on something
-        // not tracked by the cache key and from outside of this anon task, it
-        // must not be added to the global cache. Notably, this is the case for
-        // trait solver cycles participants.
-        let ((final_entry, result), dep_node) = cx.with_cached_task(|| {
-            for _ in 0..FIXPOINT_STEP_LIMIT {
-                match self.fixpoint_step_in_task(cx, input, inspect, &mut prove_goal) {
-                    StepResult::Done(final_entry, result) => return (final_entry, result),
-                    StepResult::HasChanged => debug!("fixpoint changed provisional results"),
+            match kind {
+                UsageKind::Single(CycleKind::Coinductive) => {
+                    response_no_constraints(cx, input, Certainty::Yes) == result
                 }
+                UsageKind::Single(CycleKind::Inductive) => {
+                    response_no_constraints(cx, input, Certainty::overflow(false)) == result
+                }
+                UsageKind::Mixed => false,
             }
-
-            debug!("canonical cycle overflow");
-            let current_entry = self.stack.pop().unwrap();
-            debug_assert!(current_entry.has_been_used.is_empty());
-            let result = Self::response_no_constraints(cx, input, Certainty::overflow(false));
-            (current_entry, result)
-        });
-
-        let proof_tree = inspect.finalize_canonical_goal_evaluation(cx);
-
-        self.update_parent_goal(final_entry.reached_depth, final_entry.encountered_overflow);
-
-        // We're now done with this goal. In case this goal is involved in a larger cycle
-        // do not remove it from the provisional cache and update its provisional result.
-        // We only add the root of cycles to the global cache.
-        if let Some(head) = final_entry.non_root_cycle_participant {
-            let coinductive_stack = Self::stack_coinductive_from(cx, &self.stack, head);
-
-            let entry = self.provisional_cache.get_mut(&input).unwrap();
-            entry.stack_depth = None;
-            if coinductive_stack {
-                entry.with_coinductive_stack = Some(DetachedEntry { head, result });
-            } else {
-                entry.with_inductive_stack = Some(DetachedEntry { head, result });
-            }
-        } else {
-            self.provisional_cache.remove(&input);
-            let reached_depth = final_entry.reached_depth.as_usize() - self.stack.len();
-            // When encountering a cycle, both inductive and coinductive, we only
-            // move the root into the global cache. We also store all other cycle
-            // participants involved.
-            //
-            // We must not use the global cache entry of a root goal if a cycle
-            // participant is on the stack. This is necessary to prevent unstable
-            // results. See the comment of `StackEntry::nested_goals` for
-            // more details.
-            self.global_cache(cx).insert(
-                cx,
-                input,
-                proof_tree,
-                reached_depth,
-                final_entry.encountered_overflow,
-                final_entry.nested_goals,
-                dep_node,
-                result,
-            )
         }
-
-        self.check_invariants();
-
-        result
     }
 
-    /// Try to fetch a previously computed result from the global cache,
-    /// making sure to only do so if it would match the result of reevaluating
-    /// this goal.
-    fn lookup_global_cache<D: SolverDelegate<Interner = I>>(
-        &mut self,
+    fn on_stack_overflow(
         cx: I,
-        input: CanonicalInput<I>,
-        available_depth: SolverLimit,
         inspect: &mut ProofTreeBuilder<D>,
-    ) -> Option<QueryResult<I>> {
-        let CacheData { result, proof_tree, additional_depth, encountered_overflow } = self
-            .global_cache(cx)
-            // FIXME: Awkward `Limit -> usize -> Limit`.
-            .get(cx, input, self.stack.iter().map(|e| e.input), available_depth.0)?;
-
-        // If we're building a proof tree and the current cache entry does not
-        // contain a proof tree, we do not use the entry but instead recompute
-        // the goal. We simply overwrite the existing entry once we're done,
-        // caching the proof tree.
-        if !inspect.is_noop() {
-            if let Some(final_revision) = proof_tree {
-                let kind = inspect::WipCanonicalGoalEvaluationKind::Interned { final_revision };
-                inspect.canonical_goal_evaluation_kind(kind);
-            } else {
-                return None;
-            }
-        }
-
-        // Adjust the parent goal as if we actually computed this goal.
-        let reached_depth = self.stack.next_index().plus(additional_depth);
-        self.update_parent_goal(reached_depth, encountered_overflow);
-
-        Some(result)
-    }
-}
-
-enum StepResult<I: Interner> {
-    Done(StackEntry<I>, QueryResult<I>),
-    HasChanged,
-}
-
-impl<I: Interner> SearchGraph<I> {
-    /// When we encounter a coinductive cycle, we have to fetch the
-    /// result of that cycle while we are still computing it. Because
-    /// of this we continuously recompute the cycle until the result
-    /// of the previous iteration is equal to the final result, at which
-    /// point we are done.
-    fn fixpoint_step_in_task<D, F>(
-        &mut self,
-        cx: I,
         input: CanonicalInput<I>,
-        inspect: &mut ProofTreeBuilder<D>,
-        prove_goal: &mut F,
-    ) -> StepResult<I>
-    where
-        D: SolverDelegate<Interner = I>,
-        F: FnMut(&mut Self, &mut ProofTreeBuilder<D>) -> QueryResult<I>,
-    {
-        let result = prove_goal(self, inspect);
-        let stack_entry = self.stack.pop().unwrap();
-        debug_assert_eq!(stack_entry.input, input);
-
-        // If the current goal is not the root of a cycle, we are done.
-        if stack_entry.has_been_used.is_empty() {
-            return StepResult::Done(stack_entry, result);
-        }
-
-        // If it is a cycle head, we have to keep trying to prove it until
-        // we reach a fixpoint. We need to do so for all cycle heads,
-        // not only for the root.
-        //
-        // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
-        // for an example.
-
-        // Start by clearing all provisional cache entries which depend on this
-        // the current goal.
-        Self::clear_dependent_provisional_results(
-            &mut self.provisional_cache,
-            self.stack.next_index(),
-        );
-
-        // Check whether we reached a fixpoint, either because the final result
-        // is equal to the provisional result of the previous iteration, or because
-        // this was only the root of either coinductive or inductive cycles, and the
-        // final result is equal to the initial response for that case.
-        let reached_fixpoint = if let Some(r) = stack_entry.provisional_result {
-            r == result
-        } else if stack_entry.has_been_used == HasBeenUsed::COINDUCTIVE_CYCLE {
-            Self::response_no_constraints(cx, input, Certainty::Yes) == result
-        } else if stack_entry.has_been_used == HasBeenUsed::INDUCTIVE_CYCLE {
-            Self::response_no_constraints(cx, input, Certainty::overflow(false)) == result
-        } else {
-            false
-        };
-
-        // If we did not reach a fixpoint, update the provisional result and reevaluate.
-        if reached_fixpoint {
-            StepResult::Done(stack_entry, result)
-        } else {
-            let depth = self.stack.push(StackEntry {
-                has_been_used: HasBeenUsed::empty(),
-                provisional_result: Some(result),
-                ..stack_entry
-            });
-            debug_assert_eq!(self.provisional_cache[&input].stack_depth, Some(depth));
-            StepResult::HasChanged
-        }
-    }
-
-    fn response_no_constraints(
-        cx: I,
-        goal: CanonicalInput<I>,
-        certainty: Certainty,
     ) -> QueryResult<I> {
-        Ok(super::response_no_constraints_raw(cx, goal.max_universe, goal.variables, certainty))
+        inspect.canonical_goal_evaluation_kind(inspect::WipCanonicalGoalEvaluationKind::Overflow);
+        response_no_constraints(cx, input, Certainty::overflow(true))
     }
 
-    #[allow(rustc::potential_query_instability)]
-    fn check_invariants(&self) {
-        if !cfg!(debug_assertions) {
-            return;
-        }
-
-        let SearchGraph { mode: _, stack, provisional_cache } = self;
-        if stack.is_empty() {
-            assert!(provisional_cache.is_empty());
-        }
-
-        for (depth, entry) in stack.iter_enumerated() {
-            let StackEntry {
-                input,
-                available_depth: _,
-                reached_depth: _,
-                non_root_cycle_participant,
-                encountered_overflow: _,
-                has_been_used,
-                ref nested_goals,
-                provisional_result,
-            } = *entry;
-            let cache_entry = provisional_cache.get(&entry.input).unwrap();
-            assert_eq!(cache_entry.stack_depth, Some(depth));
-            if let Some(head) = non_root_cycle_participant {
-                assert!(head < depth);
-                assert!(nested_goals.is_empty());
-                assert_ne!(stack[head].has_been_used, HasBeenUsed::empty());
-
-                let mut current_root = head;
-                while let Some(parent) = stack[current_root].non_root_cycle_participant {
-                    current_root = parent;
-                }
-                assert!(stack[current_root].nested_goals.contains(&input));
-            }
-
-            if !nested_goals.is_empty() {
-                assert!(provisional_result.is_some() || !has_been_used.is_empty());
-                for entry in stack.iter().take(depth.as_usize()) {
-                    assert_eq!(nested_goals.get(&entry.input), None);
-                }
-            }
-        }
-
-        for (&input, entry) in &self.provisional_cache {
-            let ProvisionalCacheEntry { stack_depth, with_coinductive_stack, with_inductive_stack } =
-                entry;
-            assert!(
-                stack_depth.is_some()
-                    || with_coinductive_stack.is_some()
-                    || with_inductive_stack.is_some()
-            );
-
-            if let &Some(stack_depth) = stack_depth {
-                assert_eq!(stack[stack_depth].input, input);
-            }
-
-            let check_detached = |detached_entry: &DetachedEntry<I>| {
-                let DetachedEntry { head, result: _ } = *detached_entry;
-                assert_ne!(stack[head].has_been_used, HasBeenUsed::empty());
-            };
-
-            if let Some(with_coinductive_stack) = with_coinductive_stack {
-                check_detached(with_coinductive_stack);
-            }
+    fn on_fixpoint_overflow(cx: I, input: CanonicalInput<I>) -> QueryResult<I> {
+        response_no_constraints(cx, input, Certainty::overflow(false))
+    }
 
-            if let Some(with_inductive_stack) = with_inductive_stack {
-                check_detached(with_inductive_stack);
-            }
-        }
+    fn step_is_coinductive(cx: I, input: CanonicalInput<I>) -> bool {
+        input.value.goal.predicate.is_coinductive(cx)
     }
 }
+
+fn response_no_constraints<I: Interner>(
+    cx: I,
+    goal: CanonicalInput<I>,
+    certainty: Certainty,
+) -> QueryResult<I> {
+    Ok(super::response_no_constraints_raw(cx, goal.max_universe, goal.variables, certainty))
+}
diff --git a/compiler/rustc_query_system/src/cache.rs b/compiler/rustc_query_system/src/cache.rs
index 6e862db0b2547..d8a5bdba7b8a7 100644
--- a/compiler/rustc_query_system/src/cache.rs
+++ b/compiler/rustc_query_system/src/cache.rs
@@ -40,7 +40,7 @@ impl<Key: Eq + Hash, Value: Clone> Cache<Key, Value> {
     }
 }
 
-#[derive(Clone, Eq, PartialEq)]
+#[derive(Debug, Clone, Eq, PartialEq)]
 pub struct WithDepNode<T> {
     dep_node: DepNodeIndex,
     cached_value: T,
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index af56f4e51413d..827b9062d83ab 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -2072,6 +2072,7 @@ symbols! {
         write_str,
         write_via_move,
         writeln_macro,
+        x86_amx_intrinsics,
         x87_reg,
         xer,
         xmm_reg,
diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs
index 017fd3072fdb7..aec2828181b91 100644
--- a/compiler/rustc_target/src/target_features.rs
+++ b/compiler/rustc_target/src/target_features.rs
@@ -192,6 +192,11 @@ const X86_ALLOWED_FEATURES: &[(&str, Stability)] = &[
     // tidy-alphabetical-start
     ("adx", Stable),
     ("aes", Stable),
+    ("amx-bf16", Unstable(sym::x86_amx_intrinsics)),
+    ("amx-complex", Unstable(sym::x86_amx_intrinsics)),
+    ("amx-fp16", Unstable(sym::x86_amx_intrinsics)),
+    ("amx-int8", Unstable(sym::x86_amx_intrinsics)),
+    ("amx-tile", Unstable(sym::x86_amx_intrinsics)),
     ("avx", Stable),
     ("avx2", Stable),
     ("avx512bf16", Unstable(sym::avx512_target_feature)),
diff --git a/compiler/rustc_type_ir/src/inherent.rs b/compiler/rustc_type_ir/src/inherent.rs
index de86a8536f7af..f05d626b47032 100644
--- a/compiler/rustc_type_ir/src/inherent.rs
+++ b/compiler/rustc_type_ir/src/inherent.rs
@@ -8,11 +8,10 @@ use std::hash::Hash;
 
 use rustc_ast_ir::Mutability;
 
-use crate::data_structures::HashSet;
 use crate::elaborate::Elaboratable;
 use crate::fold::{TypeFoldable, TypeSuperFoldable};
 use crate::relate::Relate;
-use crate::solve::{CacheData, CanonicalInput, QueryResult, Reveal};
+use crate::solve::Reveal;
 use crate::visit::{Flags, TypeSuperVisitable, TypeVisitable};
 use crate::{self as ty, CollectAndApply, Interner, UpcastFrom};
 
@@ -539,33 +538,6 @@ pub trait Features<I: Interner>: Copy {
     fn associated_const_equality(self) -> bool;
 }
 
-pub trait EvaluationCache<I: Interner> {
-    /// Insert a final result into the global cache.
-    fn insert(
-        &self,
-        tcx: I,
-        key: CanonicalInput<I>,
-        proof_tree: Option<I::CanonicalGoalEvaluationStepRef>,
-        additional_depth: usize,
-        encountered_overflow: bool,
-        cycle_participants: HashSet<CanonicalInput<I>>,
-        dep_node: I::DepNodeIndex,
-        result: QueryResult<I>,
-    );
-
-    /// Try to fetch a cached result, checking the recursion limit
-    /// and handling root goals of coinductive cycles.
-    ///
-    /// If this returns `Some` the cache result can be used.
-    fn get(
-        &self,
-        tcx: I,
-        key: CanonicalInput<I>,
-        stack_entries: impl IntoIterator<Item = CanonicalInput<I>>,
-        available_depth: usize,
-    ) -> Option<CacheData<I>>;
-}
-
 pub trait DefId<I: Interner>: Copy + Debug + Hash + Eq + TypeFoldable<I> {
     fn is_local(self) -> bool;
 
diff --git a/compiler/rustc_type_ir/src/interner.rs b/compiler/rustc_type_ir/src/interner.rs
index fdd1553d389d2..14ebbb12fe2f0 100644
--- a/compiler/rustc_type_ir/src/interner.rs
+++ b/compiler/rustc_type_ir/src/interner.rs
@@ -10,8 +10,11 @@ use crate::inherent::*;
 use crate::ir_print::IrPrint;
 use crate::lang_items::TraitSolverLangItem;
 use crate::relate::Relate;
+use crate::search_graph;
 use crate::solve::inspect::CanonicalGoalEvaluationStep;
-use crate::solve::{ExternalConstraintsData, PredefinedOpaquesData, SolverMode};
+use crate::solve::{
+    CanonicalInput, ExternalConstraintsData, PredefinedOpaquesData, QueryResult, SolverMode,
+};
 use crate::visit::{Flags, TypeSuperVisitable, TypeVisitable};
 use crate::{self as ty};
 
@@ -86,6 +89,13 @@ pub trait Interner:
     ) -> Self::ExternalConstraints;
 
     type DepNodeIndex;
+    type Tracked<T: Debug + Clone>: Debug;
+    fn mk_tracked<T: Debug + Clone>(
+        self,
+        data: T,
+        dep_node: Self::DepNodeIndex,
+    ) -> Self::Tracked<T>;
+    fn get_tracked<T: Debug + Clone>(self, tracked: &Self::Tracked<T>) -> T;
     fn with_cached_task<T>(self, task: impl FnOnce() -> T) -> (T, Self::DepNodeIndex);
 
     // Kinds of tys
@@ -125,8 +135,11 @@ pub trait Interner:
     type Clause: Clause<Self>;
     type Clauses: Copy + Debug + Hash + Eq + TypeSuperVisitable<Self> + Flags;
 
-    type EvaluationCache: EvaluationCache<Self>;
-    fn evaluation_cache(self, mode: SolverMode) -> Self::EvaluationCache;
+    fn with_global_cache<R>(
+        self,
+        mode: SolverMode,
+        f: impl FnOnce(&mut search_graph::GlobalCache<Self>) -> R,
+    ) -> R;
 
     fn expand_abstract_consts<T: TypeFoldable<Self>>(self, t: T) -> T;
 
@@ -373,3 +386,32 @@ impl<T, R, E> CollectAndApply<T, R> for Result<T, E> {
         })
     }
 }
+
+impl<I: Interner> search_graph::Cx for I {
+    type ProofTree = Option<I::CanonicalGoalEvaluationStepRef>;
+    type Input = CanonicalInput<I>;
+    type Result = QueryResult<I>;
+
+    type DepNodeIndex = I::DepNodeIndex;
+    type Tracked<T: Debug + Clone> = I::Tracked<T>;
+    fn mk_tracked<T: Debug + Clone>(
+        self,
+        data: T,
+        dep_node_index: I::DepNodeIndex,
+    ) -> I::Tracked<T> {
+        I::mk_tracked(self, data, dep_node_index)
+    }
+    fn get_tracked<T: Debug + Clone>(self, tracked: &I::Tracked<T>) -> T {
+        I::get_tracked(self, tracked)
+    }
+    fn with_cached_task<T>(self, task: impl FnOnce() -> T) -> (T, I::DepNodeIndex) {
+        I::with_cached_task(self, task)
+    }
+    fn with_global_cache<R>(
+        self,
+        mode: SolverMode,
+        f: impl FnOnce(&mut search_graph::GlobalCache<Self>) -> R,
+    ) -> R {
+        I::with_global_cache(self, mode, f)
+    }
+}
diff --git a/compiler/rustc_type_ir/src/lib.rs b/compiler/rustc_type_ir/src/lib.rs
index b14a65fc77958..37ee66fa222ae 100644
--- a/compiler/rustc_type_ir/src/lib.rs
+++ b/compiler/rustc_type_ir/src/lib.rs
@@ -30,6 +30,7 @@ pub mod lang_items;
 pub mod lift;
 pub mod outlives;
 pub mod relate;
+pub mod search_graph;
 pub mod solve;
 
 // These modules are not `pub` since they are glob-imported.
diff --git a/compiler/rustc_type_ir/src/search_graph/global_cache.rs b/compiler/rustc_type_ir/src/search_graph/global_cache.rs
new file mode 100644
index 0000000000000..5ccda931f9c5f
--- /dev/null
+++ b/compiler/rustc_type_ir/src/search_graph/global_cache.rs
@@ -0,0 +1,118 @@
+use rustc_index::IndexVec;
+
+use super::{AvailableDepth, Cx, StackDepth, StackEntry};
+use crate::data_structures::{HashMap, HashSet};
+
+#[derive(derivative::Derivative)]
+#[derivative(Debug(bound = ""), Clone(bound = ""), Copy(bound = ""))]
+struct QueryData<X: Cx> {
+    result: X::Result,
+    proof_tree: X::ProofTree,
+}
+
+struct Success<X: Cx> {
+    data: X::Tracked<QueryData<X>>,
+    additional_depth: usize,
+}
+
+/// The cache entry for a given input.
+///
+/// This contains results whose computation never hit the
+/// recursion limit in `success`, and all results which hit
+/// the recursion limit in `with_overflow`.
+#[derive(derivative::Derivative)]
+#[derivative(Default(bound = ""))]
+struct CacheEntry<X: Cx> {
+    success: Option<Success<X>>,
+    /// We have to be careful when caching roots of cycles.
+    ///
+    /// See the doc comment of `StackEntry::cycle_participants` for more
+    /// details.
+    nested_goals: HashSet<X::Input>,
+    with_overflow: HashMap<usize, X::Tracked<QueryData<X>>>,
+}
+
+#[derive(derivative::Derivative)]
+#[derivative(Debug(bound = ""))]
+pub(super) struct CacheData<'a, X: Cx> {
+    pub(super) result: X::Result,
+    pub(super) proof_tree: X::ProofTree,
+    pub(super) additional_depth: usize,
+    pub(super) encountered_overflow: bool,
+    // FIXME: This is currently unused, but impacts the design
+    // by requiring a closure for `Cx::with_global_cache`.
+    pub(super) nested_goals: &'a HashSet<X::Input>,
+}
+
+#[derive(derivative::Derivative)]
+#[derivative(Default(bound = ""))]
+pub struct GlobalCache<X: Cx> {
+    map: HashMap<X::Input, CacheEntry<X>>,
+}
+
+impl<X: Cx> GlobalCache<X> {
+    /// Insert a final result into the global cache.
+    pub(super) fn insert(
+        &mut self,
+        cx: X,
+        input: X::Input,
+
+        result: X::Result,
+        proof_tree: X::ProofTree,
+        dep_node: X::DepNodeIndex,
+
+        additional_depth: usize,
+        encountered_overflow: bool,
+        nested_goals: &HashSet<X::Input>,
+    ) {
+        let data = cx.mk_tracked(QueryData { result, proof_tree }, dep_node);
+        let entry = self.map.entry(input).or_default();
+        entry.nested_goals.extend(nested_goals);
+        if encountered_overflow {
+            entry.with_overflow.insert(additional_depth, data);
+        } else {
+            entry.success = Some(Success { data, additional_depth });
+        }
+    }
+
+    /// Try to fetch a cached result, checking the recursion limit
+    /// and handling root goals of coinductive cycles.
+    ///
+    /// If this returns `Some` the cache result can be used.
+    pub(super) fn get<'a>(
+        &'a self,
+        cx: X,
+        input: X::Input,
+        stack: &IndexVec<StackDepth, StackEntry<X>>,
+        available_depth: AvailableDepth,
+    ) -> Option<CacheData<'a, X>> {
+        let entry = self.map.get(&input)?;
+        if stack.iter().any(|e| entry.nested_goals.contains(&e.input)) {
+            return None;
+        }
+
+        if let Some(ref success) = entry.success {
+            if available_depth.cache_entry_is_applicable(success.additional_depth) {
+                let QueryData { result, proof_tree } = cx.get_tracked(&success.data);
+                return Some(CacheData {
+                    result,
+                    proof_tree,
+                    additional_depth: success.additional_depth,
+                    encountered_overflow: false,
+                    nested_goals: &entry.nested_goals,
+                });
+            }
+        }
+
+        entry.with_overflow.get(&available_depth.0).map(|e| {
+            let QueryData { result, proof_tree } = cx.get_tracked(e);
+            CacheData {
+                result,
+                proof_tree,
+                additional_depth: available_depth.0,
+                encountered_overflow: true,
+                nested_goals: &entry.nested_goals,
+            }
+        })
+    }
+}
diff --git a/compiler/rustc_type_ir/src/search_graph/mod.rs b/compiler/rustc_type_ir/src/search_graph/mod.rs
new file mode 100644
index 0000000000000..c2204becdfd71
--- /dev/null
+++ b/compiler/rustc_type_ir/src/search_graph/mod.rs
@@ -0,0 +1,605 @@
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::marker::PhantomData;
+use std::mem;
+
+use rustc_index::{Idx, IndexVec};
+use tracing::debug;
+
+use crate::data_structures::{HashMap, HashSet};
+use crate::solve::SolverMode;
+
+mod global_cache;
+use global_cache::CacheData;
+pub use global_cache::GlobalCache;
+mod validate;
+
+/// The search graph does not simply use `Interner` directly
+/// to enable its fuzzing without having to stub the rest of
+/// the interner. We don't make this a super trait of `Interner`
+/// as users of the shared type library shouldn't have to care
+/// about `Input` and `Result` as they are implementation details
+/// of the search graph.
+pub trait Cx: Copy {
+    type ProofTree: Debug + Copy;
+    type Input: Debug + Eq + Hash + Copy;
+    type Result: Debug + Eq + Hash + Copy;
+
+    type DepNodeIndex;
+    type Tracked<T: Debug + Clone>: Debug;
+    fn mk_tracked<T: Debug + Clone>(
+        self,
+        data: T,
+        dep_node_index: Self::DepNodeIndex,
+    ) -> Self::Tracked<T>;
+    fn get_tracked<T: Debug + Clone>(self, tracked: &Self::Tracked<T>) -> T;
+    fn with_cached_task<T>(self, task: impl FnOnce() -> T) -> (T, Self::DepNodeIndex);
+
+    fn with_global_cache<R>(
+        self,
+        mode: SolverMode,
+        f: impl FnOnce(&mut GlobalCache<Self>) -> R,
+    ) -> R;
+}
+
+pub trait ProofTreeBuilder<X: Cx> {
+    fn try_apply_proof_tree(&mut self, proof_tree: X::ProofTree) -> bool;
+    fn on_provisional_cache_hit(&mut self);
+    fn on_cycle_in_stack(&mut self);
+    fn finalize_canonical_goal_evaluation(&mut self, cx: X) -> X::ProofTree;
+}
+
+pub trait Delegate {
+    type Cx: Cx;
+    const FIXPOINT_STEP_LIMIT: usize;
+    type ProofTreeBuilder: ProofTreeBuilder<Self::Cx>;
+
+    fn recursion_limit(cx: Self::Cx) -> usize;
+
+    fn initial_provisional_result(
+        cx: Self::Cx,
+        kind: CycleKind,
+        input: <Self::Cx as Cx>::Input,
+    ) -> <Self::Cx as Cx>::Result;
+    fn reached_fixpoint(
+        cx: Self::Cx,
+        kind: UsageKind,
+        input: <Self::Cx as Cx>::Input,
+        provisional_result: Option<<Self::Cx as Cx>::Result>,
+        result: <Self::Cx as Cx>::Result,
+    ) -> bool;
+    fn on_stack_overflow(
+        cx: Self::Cx,
+        inspect: &mut Self::ProofTreeBuilder,
+        input: <Self::Cx as Cx>::Input,
+    ) -> <Self::Cx as Cx>::Result;
+    fn on_fixpoint_overflow(
+        cx: Self::Cx,
+        input: <Self::Cx as Cx>::Input,
+    ) -> <Self::Cx as Cx>::Result;
+
+    fn step_is_coinductive(cx: Self::Cx, input: <Self::Cx as Cx>::Input) -> bool;
+}
+
+/// In the initial iteration of a cycle, we do not yet have a provisional
+/// result. In the case we return an initial provisional result depending
+/// on the kind of cycle.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CycleKind {
+    Coinductive,
+    Inductive,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum UsageKind {
+    Single(CycleKind),
+    Mixed,
+}
+impl UsageKind {
+    fn merge(self, other: Self) -> Self {
+        match (self, other) {
+            (UsageKind::Single(lhs), UsageKind::Single(rhs)) => {
+                if lhs == rhs {
+                    UsageKind::Single(lhs)
+                } else {
+                    UsageKind::Mixed
+                }
+            }
+            (UsageKind::Mixed, UsageKind::Mixed)
+            | (UsageKind::Mixed, UsageKind::Single(_))
+            | (UsageKind::Single(_), UsageKind::Mixed) => UsageKind::Mixed,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct AvailableDepth(usize);
+impl AvailableDepth {
+    /// Returns the remaining depth allowed for nested goals.
+    ///
+    /// This is generally simply one less than the current depth.
+    /// However, if we encountered overflow, we significantly reduce
+    /// the remaining depth of all nested goals to prevent hangs
+    /// in case there is exponential blowup.
+    fn allowed_depth_for_nested<D: Delegate>(
+        cx: D::Cx,
+        stack: &IndexVec<StackDepth, StackEntry<D::Cx>>,
+    ) -> Option<AvailableDepth> {
+        if let Some(last) = stack.raw.last() {
+            if last.available_depth.0 == 0 {
+                return None;
+            }
+
+            Some(if last.encountered_overflow {
+                AvailableDepth(last.available_depth.0 / 2)
+            } else {
+                AvailableDepth(last.available_depth.0 - 1)
+            })
+        } else {
+            Some(AvailableDepth(D::recursion_limit(cx)))
+        }
+    }
+
+    /// Whether we're allowed to use a global cache entry which required
+    /// the given depth.
+    fn cache_entry_is_applicable(self, additional_depth: usize) -> bool {
+        self.0 >= additional_depth
+    }
+}
+
+rustc_index::newtype_index! {
+    #[orderable]
+    #[gate_rustc_only]
+    pub struct StackDepth {}
+}
+
+#[derive(derivative::Derivative)]
+#[derivative(Debug(bound = ""))]
+struct StackEntry<X: Cx> {
+    input: X::Input,
+
+    available_depth: AvailableDepth,
+
+    /// The maximum depth reached by this stack entry, only up-to date
+    /// for the top of the stack and lazily updated for the rest.
+    reached_depth: StackDepth,
+
+    /// Whether this entry is a non-root cycle participant.
+    ///
+    /// We must not move the result of non-root cycle participants to the
+    /// global cache. We store the highest stack depth of a head of a cycle
+    /// this goal is involved in. This necessary to soundly cache its
+    /// provisional result.
+    non_root_cycle_participant: Option<StackDepth>,
+
+    encountered_overflow: bool,
+
+    has_been_used: Option<UsageKind>,
+
+    /// We put only the root goal of a coinductive cycle into the global cache.
+    ///
+    /// If we were to use that result when later trying to prove another cycle
+    /// participant, we can end up with unstable query results.
+    ///
+    /// See tests/ui/next-solver/coinduction/incompleteness-unstable-result.rs for
+    /// an example of where this is needed.
+    ///
+    /// There can  be multiple roots on the same stack, so we need to track
+    /// cycle participants per root:
+    /// ```plain
+    /// A :- B
+    /// B :- A, C
+    /// C :- D
+    /// D :- C
+    /// ```
+    nested_goals: HashSet<X::Input>,
+    /// Starts out as `None` and gets set when rerunning this
+    /// goal in case we encounter a cycle.
+    provisional_result: Option<X::Result>,
+}
+
+/// The provisional result for a goal which is not on the stack.
+#[derive(Debug)]
+struct DetachedEntry<X: Cx> {
+    /// The head of the smallest non-trivial cycle involving this entry.
+    ///
+    /// Given the following rules, when proving `A` the head for
+    /// the provisional entry of `C` would be `B`.
+    /// ```plain
+    /// A :- B
+    /// B :- C
+    /// C :- A + B + C
+    /// ```
+    head: StackDepth,
+    result: X::Result,
+}
+
+/// Stores the stack depth of a currently evaluated goal *and* already
+/// computed results for goals which depend on other goals still on the stack.
+///
+/// The provisional result may depend on whether the stack above it is inductive
+/// or coinductive. Because of this, we store separate provisional results for
+/// each case. If an provisional entry is not applicable, it may be the case
+/// that we already have provisional result while computing a goal. In this case
+/// we prefer the provisional result to potentially avoid fixpoint iterations.
+/// See tests/ui/traits/next-solver/cycles/mixed-cycles-2.rs for an example.
+///
+/// The provisional cache can theoretically result in changes to the observable behavior,
+/// see tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs.
+#[derive(derivative::Derivative)]
+#[derivative(Default(bound = ""))]
+struct ProvisionalCacheEntry<X: Cx> {
+    stack_depth: Option<StackDepth>,
+    with_inductive_stack: Option<DetachedEntry<X>>,
+    with_coinductive_stack: Option<DetachedEntry<X>>,
+}
+
+impl<X: Cx> ProvisionalCacheEntry<X> {
+    fn is_empty(&self) -> bool {
+        self.stack_depth.is_none()
+            && self.with_inductive_stack.is_none()
+            && self.with_coinductive_stack.is_none()
+    }
+}
+
+pub struct SearchGraph<D: Delegate<Cx = X>, X: Cx = <D as Delegate>::Cx> {
+    mode: SolverMode,
+    /// The stack of goals currently being computed.
+    ///
+    /// An element is *deeper* in the stack if its index is *lower*.
+    stack: IndexVec<StackDepth, StackEntry<X>>,
+    provisional_cache: HashMap<X::Input, ProvisionalCacheEntry<X>>,
+
+    _marker: PhantomData<D>,
+}
+
+impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
+    pub fn new(mode: SolverMode) -> SearchGraph<D> {
+        Self {
+            mode,
+            stack: Default::default(),
+            provisional_cache: Default::default(),
+            _marker: PhantomData,
+        }
+    }
+
+    pub fn solver_mode(&self) -> SolverMode {
+        self.mode
+    }
+
+    fn update_parent_goal(&mut self, reached_depth: StackDepth, encountered_overflow: bool) {
+        if let Some(parent) = self.stack.raw.last_mut() {
+            parent.reached_depth = parent.reached_depth.max(reached_depth);
+            parent.encountered_overflow |= encountered_overflow;
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.stack.is_empty()
+    }
+
+    fn stack_coinductive_from(
+        cx: X,
+        stack: &IndexVec<StackDepth, StackEntry<X>>,
+        head: StackDepth,
+    ) -> bool {
+        stack.raw[head.index()..].iter().all(|entry| D::step_is_coinductive(cx, entry.input))
+    }
+
+    // When encountering a solver cycle, the result of the current goal
+    // depends on goals lower on the stack.
+    //
+    // We have to therefore be careful when caching goals. Only the final result
+    // of the cycle root, i.e. the lowest goal on the stack involved in this cycle,
+    // is moved to the global cache while all others are stored in a provisional cache.
+    //
+    // We update both the head of this cycle to rerun its evaluation until
+    // we reach a fixpoint and all other cycle participants to make sure that
+    // their result does not get moved to the global cache.
+    fn tag_cycle_participants(
+        stack: &mut IndexVec<StackDepth, StackEntry<X>>,
+        usage_kind: Option<UsageKind>,
+        head: StackDepth,
+    ) {
+        if let Some(usage_kind) = usage_kind {
+            stack[head].has_been_used =
+                Some(stack[head].has_been_used.map_or(usage_kind, |prev| prev.merge(usage_kind)));
+        }
+        debug_assert!(stack[head].has_been_used.is_some());
+
+        // The current root of these cycles. Note that this may not be the final
+        // root in case a later goal depends on a goal higher up the stack.
+        let mut current_root = head;
+        while let Some(parent) = stack[current_root].non_root_cycle_participant {
+            current_root = parent;
+            debug_assert!(stack[current_root].has_been_used.is_some());
+        }
+
+        let (stack, cycle_participants) = stack.raw.split_at_mut(head.index() + 1);
+        let current_cycle_root = &mut stack[current_root.as_usize()];
+        for entry in cycle_participants {
+            entry.non_root_cycle_participant = entry.non_root_cycle_participant.max(Some(head));
+            current_cycle_root.nested_goals.insert(entry.input);
+            current_cycle_root.nested_goals.extend(mem::take(&mut entry.nested_goals));
+        }
+    }
+
+    fn clear_dependent_provisional_results(
+        provisional_cache: &mut HashMap<X::Input, ProvisionalCacheEntry<X>>,
+        head: StackDepth,
+    ) {
+        #[allow(rustc::potential_query_instability)]
+        provisional_cache.retain(|_, entry| {
+            if entry.with_coinductive_stack.as_ref().is_some_and(|p| p.head == head) {
+                entry.with_coinductive_stack.take();
+            }
+            if entry.with_inductive_stack.as_ref().is_some_and(|p| p.head == head) {
+                entry.with_inductive_stack.take();
+            }
+            !entry.is_empty()
+        });
+    }
+
+    /// Probably the most involved method of the whole solver.
+    ///
+    /// Given some goal which is proven via the `prove_goal` closure, this
+    /// handles caching, overflow, and coinductive cycles.
+    pub fn with_new_goal(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        inspect: &mut D::ProofTreeBuilder,
+        mut prove_goal: impl FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
+    ) -> X::Result {
+        self.check_invariants();
+        // Check for overflow.
+        let Some(available_depth) = AvailableDepth::allowed_depth_for_nested::<D>(cx, &self.stack)
+        else {
+            if let Some(last) = self.stack.raw.last_mut() {
+                last.encountered_overflow = true;
+            }
+
+            debug!("encountered stack overflow");
+            return D::on_stack_overflow(cx, inspect, input);
+        };
+
+        if let Some(result) = self.lookup_global_cache(cx, input, available_depth, inspect) {
+            return result;
+        }
+
+        // Check whether the goal is in the provisional cache.
+        // The provisional result may rely on the path to its cycle roots,
+        // so we have to check the path of the current goal matches that of
+        // the cache entry.
+        let cache_entry = self.provisional_cache.entry(input).or_default();
+        if let Some(entry) = cache_entry
+            .with_coinductive_stack
+            .as_ref()
+            .filter(|p| Self::stack_coinductive_from(cx, &self.stack, p.head))
+            .or_else(|| {
+                cache_entry
+                    .with_inductive_stack
+                    .as_ref()
+                    .filter(|p| !Self::stack_coinductive_from(cx, &self.stack, p.head))
+            })
+        {
+            debug!("provisional cache hit");
+            // We have a nested goal which is already in the provisional cache, use
+            // its result. We do not provide any usage kind as that should have been
+            // already set correctly while computing the cache entry.
+            inspect.on_provisional_cache_hit();
+            Self::tag_cycle_participants(&mut self.stack, None, entry.head);
+            return entry.result;
+        } else if let Some(stack_depth) = cache_entry.stack_depth {
+            debug!("encountered cycle with depth {stack_depth:?}");
+            // We have a nested goal which directly relies on a goal deeper in the stack.
+            //
+            // We start by tagging all cycle participants, as that's necessary for caching.
+            //
+            // Finally we can return either the provisional response or the initial response
+            // in case we're in the first fixpoint iteration for this goal.
+            inspect.on_cycle_in_stack();
+
+            let is_coinductive_cycle = Self::stack_coinductive_from(cx, &self.stack, stack_depth);
+            let cycle_kind =
+                if is_coinductive_cycle { CycleKind::Coinductive } else { CycleKind::Inductive };
+            Self::tag_cycle_participants(
+                &mut self.stack,
+                Some(UsageKind::Single(cycle_kind)),
+                stack_depth,
+            );
+
+            // Return the provisional result or, if we're in the first iteration,
+            // start with no constraints.
+            return if let Some(result) = self.stack[stack_depth].provisional_result {
+                result
+            } else {
+                D::initial_provisional_result(cx, cycle_kind, input)
+            };
+        } else {
+            // No entry, we push this goal on the stack and try to prove it.
+            let depth = self.stack.next_index();
+            let entry = StackEntry {
+                input,
+                available_depth,
+                reached_depth: depth,
+                non_root_cycle_participant: None,
+                encountered_overflow: false,
+                has_been_used: None,
+                nested_goals: Default::default(),
+                provisional_result: None,
+            };
+            assert_eq!(self.stack.push(entry), depth);
+            cache_entry.stack_depth = Some(depth);
+        };
+
+        // This is for global caching, so we properly track query dependencies.
+        // Everything that affects the `result` should be performed within this
+        // `with_anon_task` closure. If computing this goal depends on something
+        // not tracked by the cache key and from outside of this anon task, it
+        // must not be added to the global cache. Notably, this is the case for
+        // trait solver cycles participants.
+        let ((final_entry, result), dep_node) = cx.with_cached_task(|| {
+            for _ in 0..D::FIXPOINT_STEP_LIMIT {
+                match self.fixpoint_step_in_task(cx, input, inspect, &mut prove_goal) {
+                    StepResult::Done(final_entry, result) => return (final_entry, result),
+                    StepResult::HasChanged => debug!("fixpoint changed provisional results"),
+                }
+            }
+
+            debug!("canonical cycle overflow");
+            let current_entry = self.stack.pop().unwrap();
+            debug_assert!(current_entry.has_been_used.is_none());
+            let result = D::on_fixpoint_overflow(cx, input);
+            (current_entry, result)
+        });
+
+        let proof_tree = inspect.finalize_canonical_goal_evaluation(cx);
+
+        self.update_parent_goal(final_entry.reached_depth, final_entry.encountered_overflow);
+
+        // We're now done with this goal. In case this goal is involved in a larger cycle
+        // do not remove it from the provisional cache and update its provisional result.
+        // We only add the root of cycles to the global cache.
+        if let Some(head) = final_entry.non_root_cycle_participant {
+            let coinductive_stack = Self::stack_coinductive_from(cx, &self.stack, head);
+
+            let entry = self.provisional_cache.get_mut(&input).unwrap();
+            entry.stack_depth = None;
+            if coinductive_stack {
+                entry.with_coinductive_stack = Some(DetachedEntry { head, result });
+            } else {
+                entry.with_inductive_stack = Some(DetachedEntry { head, result });
+            }
+        } else {
+            // When encountering a cycle, both inductive and coinductive, we only
+            // move the root into the global cache. We also store all other cycle
+            // participants involved.
+            //
+            // We must not use the global cache entry of a root goal if a cycle
+            // participant is on the stack. This is necessary to prevent unstable
+            // results. See the comment of `StackEntry::nested_goals` for
+            // more details.
+            self.provisional_cache.remove(&input);
+            let additional_depth = final_entry.reached_depth.as_usize() - self.stack.len();
+            cx.with_global_cache(self.mode, |cache| {
+                cache.insert(
+                    cx,
+                    input,
+                    result,
+                    proof_tree,
+                    dep_node,
+                    additional_depth,
+                    final_entry.encountered_overflow,
+                    &final_entry.nested_goals,
+                )
+            })
+        }
+
+        self.check_invariants();
+
+        result
+    }
+
+    /// Try to fetch a previously computed result from the global cache,
+    /// making sure to only do so if it would match the result of reevaluating
+    /// this goal.
+    fn lookup_global_cache(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        available_depth: AvailableDepth,
+        inspect: &mut D::ProofTreeBuilder,
+    ) -> Option<X::Result> {
+        cx.with_global_cache(self.mode, |cache| {
+            let CacheData {
+                result,
+                proof_tree,
+                additional_depth,
+                encountered_overflow,
+                nested_goals: _, // FIXME: consider nested goals here.
+            } = cache.get(cx, input, &self.stack, available_depth)?;
+
+            // If we're building a proof tree and the current cache entry does not
+            // contain a proof tree, we do not use the entry but instead recompute
+            // the goal. We simply overwrite the existing entry once we're done,
+            // caching the proof tree.
+            if !inspect.try_apply_proof_tree(proof_tree) {
+                return None;
+            }
+
+            // Update the reached depth of the current goal to make sure
+            // its state is the same regardless of whether we've used the
+            // global cache or not.
+            let reached_depth = self.stack.next_index().plus(additional_depth);
+            self.update_parent_goal(reached_depth, encountered_overflow);
+
+            debug!("global cache hit");
+            Some(result)
+        })
+    }
+}
+
+enum StepResult<X: Cx> {
+    Done(StackEntry<X>, X::Result),
+    HasChanged,
+}
+
+impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
+    /// When we encounter a coinductive cycle, we have to fetch the
+    /// result of that cycle while we are still computing it. Because
+    /// of this we continuously recompute the cycle until the result
+    /// of the previous iteration is equal to the final result, at which
+    /// point we are done.
+    fn fixpoint_step_in_task<F>(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        inspect: &mut D::ProofTreeBuilder,
+        prove_goal: &mut F,
+    ) -> StepResult<X>
+    where
+        F: FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
+    {
+        let result = prove_goal(self, inspect);
+        let stack_entry = self.stack.pop().unwrap();
+        debug_assert_eq!(stack_entry.input, input);
+
+        // If the current goal is not the root of a cycle, we are done.
+        let Some(usage_kind) = stack_entry.has_been_used else {
+            return StepResult::Done(stack_entry, result);
+        };
+
+        // If it is a cycle head, we have to keep trying to prove it until
+        // we reach a fixpoint. We need to do so for all cycle heads,
+        // not only for the root.
+        //
+        // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
+        // for an example.
+
+        // Start by clearing all provisional cache entries which depend on this
+        // the current goal.
+        Self::clear_dependent_provisional_results(
+            &mut self.provisional_cache,
+            self.stack.next_index(),
+        );
+
+        // Check whether we reached a fixpoint, either because the final result
+        // is equal to the provisional result of the previous iteration, or because
+        // this was only the root of either coinductive or inductive cycles, and the
+        // final result is equal to the initial response for that case.
+        //
+        // If we did not reach a fixpoint, update the provisional result and reevaluate.
+        if D::reached_fixpoint(cx, usage_kind, input, stack_entry.provisional_result, result) {
+            StepResult::Done(stack_entry, result)
+        } else {
+            let depth = self.stack.push(StackEntry {
+                has_been_used: None,
+                provisional_result: Some(result),
+                ..stack_entry
+            });
+            debug_assert_eq!(self.provisional_cache[&input].stack_depth, Some(depth));
+            StepResult::HasChanged
+        }
+    }
+}
diff --git a/compiler/rustc_type_ir/src/search_graph/validate.rs b/compiler/rustc_type_ir/src/search_graph/validate.rs
new file mode 100644
index 0000000000000..1ae806834ba7d
--- /dev/null
+++ b/compiler/rustc_type_ir/src/search_graph/validate.rs
@@ -0,0 +1,75 @@
+use super::*;
+
+impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
+    #[allow(rustc::potential_query_instability)]
+    pub(super) fn check_invariants(&self) {
+        if !cfg!(debug_assertions) {
+            return;
+        }
+
+        let SearchGraph { mode: _, stack, provisional_cache, _marker } = self;
+        if stack.is_empty() {
+            assert!(provisional_cache.is_empty());
+        }
+
+        for (depth, entry) in stack.iter_enumerated() {
+            let StackEntry {
+                input,
+                available_depth: _,
+                reached_depth: _,
+                non_root_cycle_participant,
+                encountered_overflow: _,
+                has_been_used,
+                ref nested_goals,
+                provisional_result,
+            } = *entry;
+            let cache_entry = provisional_cache.get(&entry.input).unwrap();
+            assert_eq!(cache_entry.stack_depth, Some(depth));
+            if let Some(head) = non_root_cycle_participant {
+                assert!(head < depth);
+                assert!(nested_goals.is_empty());
+                assert_ne!(stack[head].has_been_used, None);
+
+                let mut current_root = head;
+                while let Some(parent) = stack[current_root].non_root_cycle_participant {
+                    current_root = parent;
+                }
+                assert!(stack[current_root].nested_goals.contains(&input));
+            }
+
+            if !nested_goals.is_empty() {
+                assert!(provisional_result.is_some() || has_been_used.is_some());
+                for entry in stack.iter().take(depth.as_usize()) {
+                    assert_eq!(nested_goals.get(&entry.input), None);
+                }
+            }
+        }
+
+        for (&input, entry) in &self.provisional_cache {
+            let ProvisionalCacheEntry { stack_depth, with_coinductive_stack, with_inductive_stack } =
+                entry;
+            assert!(
+                stack_depth.is_some()
+                    || with_coinductive_stack.is_some()
+                    || with_inductive_stack.is_some()
+            );
+
+            if let &Some(stack_depth) = stack_depth {
+                assert_eq!(stack[stack_depth].input, input);
+            }
+
+            let check_detached = |detached_entry: &DetachedEntry<X>| {
+                let DetachedEntry { head, result: _ } = *detached_entry;
+                assert_ne!(stack[head].has_been_used, None);
+            };
+
+            if let Some(with_coinductive_stack) = with_coinductive_stack {
+                check_detached(with_coinductive_stack);
+            }
+
+            if let Some(with_inductive_stack) = with_inductive_stack {
+                check_detached(with_inductive_stack);
+            }
+        }
+    }
+}
diff --git a/compiler/rustc_type_ir/src/solve.rs b/compiler/rustc_type_ir/src/solve/mod.rs
similarity index 100%
rename from compiler/rustc_type_ir/src/solve.rs
rename to compiler/rustc_type_ir/src/solve/mod.rs
diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs
index 3745ecb48c18e..aa8dd9f79c381 100644
--- a/library/alloc/src/rc.rs
+++ b/library/alloc/src/rc.rs
@@ -665,16 +665,6 @@ impl<T> Rc<T> {
 }
 
 impl<T, A: Allocator> Rc<T, A> {
-    /// Returns a reference to the underlying allocator.
-    ///
-    /// Note: this is an associated function, which means that you have
-    /// to call it as `Rc::allocator(&r)` instead of `r.allocator()`. This
-    /// is so that there is no conflict with a method on the inner type.
-    #[inline]
-    #[unstable(feature = "allocator_api", issue = "32838")]
-    pub fn allocator(this: &Self) -> &A {
-        &this.alloc
-    }
     /// Constructs a new `Rc` in the provided allocator.
     ///
     /// # Examples
@@ -1331,6 +1321,17 @@ impl<T: ?Sized> Rc<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Rc<T, A> {
+    /// Returns a reference to the underlying allocator.
+    ///
+    /// Note: this is an associated function, which means that you have
+    /// to call it as `Rc::allocator(&r)` instead of `r.allocator()`. This
+    /// is so that there is no conflict with a method on the inner type.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(this: &Self) -> &A {
+        &this.alloc
+    }
+
     /// Consumes the `Rc`, returning the wrapped pointer.
     ///
     /// To avoid a memory leak the pointer must be converted back to an `Rc` using
@@ -2994,6 +2995,13 @@ impl<T: ?Sized> Weak<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Weak<T, A> {
+    /// Returns a reference to the underlying allocator.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(&self) -> &A {
+        &self.alloc
+    }
+
     /// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
     ///
     /// The pointer is valid only if there are some strong references. The pointer may be dangling,
diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs
index 1983ea8281aa0..57ac20ba323aa 100644
--- a/library/alloc/src/sync.rs
+++ b/library/alloc/src/sync.rs
@@ -677,16 +677,6 @@ impl<T> Arc<T> {
 }
 
 impl<T, A: Allocator> Arc<T, A> {
-    /// Returns a reference to the underlying allocator.
-    ///
-    /// Note: this is an associated function, which means that you have
-    /// to call it as `Arc::allocator(&a)` instead of `a.allocator()`. This
-    /// is so that there is no conflict with a method on the inner type.
-    #[inline]
-    #[unstable(feature = "allocator_api", issue = "32838")]
-    pub fn allocator(this: &Self) -> &A {
-        &this.alloc
-    }
     /// Constructs a new `Arc<T>` in the provided allocator.
     ///
     /// # Examples
@@ -1470,6 +1460,17 @@ impl<T: ?Sized> Arc<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Arc<T, A> {
+    /// Returns a reference to the underlying allocator.
+    ///
+    /// Note: this is an associated function, which means that you have
+    /// to call it as `Arc::allocator(&a)` instead of `a.allocator()`. This
+    /// is so that there is no conflict with a method on the inner type.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(this: &Self) -> &A {
+        &this.alloc
+    }
+
     /// Consumes the `Arc`, returning the wrapped pointer.
     ///
     /// To avoid a memory leak the pointer must be converted back to an `Arc` using
@@ -2715,6 +2716,13 @@ impl<T: ?Sized> Weak<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Weak<T, A> {
+    /// Returns a reference to the underlying allocator.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(&self) -> &A {
+        &self.alloc
+    }
+
     /// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
     ///
     /// The pointer is valid only if there are some strong references. The pointer may be dangling,
diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs
index d2a408485d162..dc2a5803a1b20 100644
--- a/library/core/src/ffi/c_str.rs
+++ b/library/core/src/ffi/c_str.rs
@@ -263,8 +263,6 @@ impl CStr {
     /// ```
     ///
     /// ```
-    /// #![feature(const_cstr_from_ptr)]
-    ///
     /// use std::ffi::{c_char, CStr};
     ///
     /// const HELLO_PTR: *const c_char = {
@@ -280,7 +278,7 @@ impl CStr {
     #[inline] // inline is necessary for codegen to see strlen.
     #[must_use]
     #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
+    #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
     pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr {
         // SAFETY: The caller has provided a pointer that points to a valid C
         // string with a NUL terminator less than `isize::MAX` from `ptr`.
@@ -542,7 +540,7 @@ impl CStr {
     #[must_use]
     #[doc(alias("len", "strlen"))]
     #[stable(feature = "cstr_count_bytes", since = "1.79.0")]
-    #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
+    #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
     pub const fn count_bytes(&self) -> usize {
         self.inner.len() - 1
     }
@@ -742,6 +740,9 @@ impl AsRef<CStr> for CStr {
 /// The pointer must point to a valid buffer that contains a NUL terminator. The NUL must be
 /// located within `isize::MAX` from `ptr`.
 #[inline]
+#[unstable(feature = "cstr_internals", issue = "none")]
+#[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
+#[rustc_allow_const_fn_unstable(const_eval_select)]
 const unsafe fn const_strlen(ptr: *const c_char) -> usize {
     const fn strlen_ct(s: *const c_char) -> usize {
         let mut len = 0;
diff --git a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
index 6d9532f2ef1ff..fb928c76fbd04 100644
--- a/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
+++ b/library/std/src/sys/pal/unix/linux/pidfd/tests.rs
@@ -1,7 +1,7 @@
 use crate::assert_matches::assert_matches;
 use crate::os::fd::{AsRawFd, RawFd};
-use crate::os::linux::process::{ChildExt, CommandExt};
-use crate::os::unix::process::ExitStatusExt;
+use crate::os::linux::process::{ChildExt, CommandExt as _};
+use crate::os::unix::process::{CommandExt as _, ExitStatusExt};
 use crate::process::Command;
 
 #[test]
@@ -21,6 +21,7 @@ fn test_command_pidfd() {
         let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
         assert!(flags & libc::FD_CLOEXEC != 0);
     }
+    assert!(child.id() > 0 && child.id() < -1i32 as u32);
     let status = child.wait().expect("error waiting on pidfd");
     assert_eq!(status.code(), Some(1));
 
@@ -42,6 +43,17 @@ fn test_command_pidfd() {
         .unwrap()
         .pidfd()
         .expect_err("pidfd should not have been created");
+
+    // exercise the fork/exec path since the earlier attempts may have used pidfd_spawnp()
+    let mut child =
+        unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
+
+    assert!(child.id() > 0 && child.id() < -1i32 as u32);
+
+    if pidfd_open_available {
+        assert!(child.pidfd().is_ok())
+    }
+    child.wait().expect("error waiting on child");
 }
 
 #[test]
diff --git a/library/std/src/sys/pal/unix/mod.rs b/library/std/src/sys/pal/unix/mod.rs
index 16fc2011d7085..262f9c704a882 100644
--- a/library/std/src/sys/pal/unix/mod.rs
+++ b/library/std/src/sys/pal/unix/mod.rs
@@ -305,10 +305,13 @@ macro_rules! impl_is_minus_one {
 
 impl_is_minus_one! { i8 i16 i32 i64 isize }
 
+/// Convert native return values to Result using the *-1 means error is in `errno`*  convention.
+/// Non-error values are `Ok`-wrapped.
 pub fn cvt<T: IsMinusOne>(t: T) -> crate::io::Result<T> {
     if t.is_minus_one() { Err(crate::io::Error::last_os_error()) } else { Ok(t) }
 }
 
+/// `-1` → look at `errno` → retry on `EINTR`. Otherwise `Ok()`-wrap the closure return value.
 pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
 where
     T: IsMinusOne,
@@ -323,6 +326,7 @@ where
 }
 
 #[allow(dead_code)] // Not used on all platforms.
+/// Zero means `Ok()`, all other values are treated as raw OS errors. Does not look at `errno`.
 pub fn cvt_nz(error: libc::c_int) -> crate::io::Result<()> {
     if error == 0 { Ok(()) } else { Err(crate::io::Error::from_raw_os_error(error)) }
 }
diff --git a/library/std/src/sys/pal/unix/process/process_unix.rs b/library/std/src/sys/pal/unix/process/process_unix.rs
index 32382d9a50cf4..abd4a334783e4 100644
--- a/library/std/src/sys/pal/unix/process/process_unix.rs
+++ b/library/std/src/sys/pal/unix/process/process_unix.rs
@@ -449,17 +449,82 @@ impl Command {
         use crate::mem::MaybeUninit;
         use crate::sys::weak::weak;
         use crate::sys::{self, cvt_nz, on_broken_pipe_flag_used};
+        #[cfg(target_os = "linux")]
+        use core::sync::atomic::{AtomicU8, Ordering};
 
         if self.get_gid().is_some()
             || self.get_uid().is_some()
             || (self.env_saw_path() && !self.program_is_path())
             || !self.get_closures().is_empty()
             || self.get_groups().is_some()
-            || self.get_create_pidfd()
         {
             return Ok(None);
         }
 
+        cfg_if::cfg_if! {
+            if #[cfg(target_os = "linux")] {
+                weak! {
+                    fn pidfd_spawnp(
+                        *mut libc::c_int,
+                        *const libc::c_char,
+                        *const libc::posix_spawn_file_actions_t,
+                        *const libc::posix_spawnattr_t,
+                        *const *mut libc::c_char,
+                        *const *mut libc::c_char
+                    ) -> libc::c_int
+                }
+
+                weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
+
+                static PIDFD_SUPPORTED: AtomicU8 = AtomicU8::new(0);
+                const UNKNOWN: u8 = 0;
+                const SPAWN: u8 = 1;
+                // Obtaining a pidfd via the fork+exec path might work
+                const FORK_EXEC: u8 = 2;
+                // Neither pidfd_spawn nor fork/exec will get us a pidfd.
+                // Instead we'll just posix_spawn if the other preconditions are met.
+                const NO: u8 = 3;
+
+                if self.get_create_pidfd() {
+                    let mut support = PIDFD_SUPPORTED.load(Ordering::Relaxed);
+                    if support == FORK_EXEC {
+                        return Ok(None);
+                    }
+                    if support == UNKNOWN {
+                        support = NO;
+                        let our_pid = crate::process::id();
+                        let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int);
+                        match pidfd {
+                            Ok(pidfd) => {
+                                support = FORK_EXEC;
+                                if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) {
+                                    if pidfd_spawnp.get().is_some() && pid as u32 == our_pid {
+                                        support = SPAWN
+                                    }
+                                }
+                                unsafe { libc::close(pidfd) };
+                            }
+                            Err(e) if e.raw_os_error() == Some(libc::EMFILE) => {
+                                // We're temporarily(?) out of file descriptors.  In this case obtaining a pidfd would also fail
+                                // Don't update the support flag so we can probe again later.
+                                return Err(e)
+                            }
+                            _ => {}
+                        }
+                        PIDFD_SUPPORTED.store(support, Ordering::Relaxed);
+                        if support == FORK_EXEC {
+                            return Ok(None);
+                        }
+                    }
+                    core::assert_matches::debug_assert_matches!(support, SPAWN | NO);
+                }
+            } else {
+                if self.get_create_pidfd() {
+                    unreachable!("only implemented on linux")
+                }
+            }
+        }
+
         // Only glibc 2.24+ posix_spawn() supports returning ENOENT directly.
         #[cfg(all(target_os = "linux", target_env = "gnu"))]
         {
@@ -543,9 +608,6 @@ impl Command {
 
         let pgroup = self.get_pgroup();
 
-        // Safety: -1 indicates we don't have a pidfd.
-        let mut p = unsafe { Process::new(0, -1) };
-
         struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
 
         impl Drop for PosixSpawnFileActions<'_> {
@@ -640,6 +702,47 @@ impl Command {
             #[cfg(target_os = "nto")]
             let spawn_fn = retrying_libc_posix_spawnp;
 
+            #[cfg(target_os = "linux")]
+            if self.get_create_pidfd() && PIDFD_SUPPORTED.load(Ordering::Relaxed) == SPAWN {
+                let mut pidfd: libc::c_int = -1;
+                let spawn_res = pidfd_spawnp.get().unwrap()(
+                    &mut pidfd,
+                    self.get_program_cstr().as_ptr(),
+                    file_actions.0.as_ptr(),
+                    attrs.0.as_ptr(),
+                    self.get_argv().as_ptr() as *const _,
+                    envp as *const _,
+                );
+
+                let spawn_res = cvt_nz(spawn_res);
+                if let Err(ref e) = spawn_res
+                    && e.raw_os_error() == Some(libc::ENOSYS)
+                {
+                    PIDFD_SUPPORTED.store(FORK_EXEC, Ordering::Relaxed);
+                    return Ok(None);
+                }
+                spawn_res?;
+
+                let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) {
+                    Ok(pid) => pid,
+                    Err(e) => {
+                        // The child has been spawned and we are holding its pidfd.
+                        // But we cannot obtain its pid even though pidfd_getpid support was verified earlier.
+                        // This might happen if libc can't open procfs because the file descriptor limit has been reached.
+                        libc::close(pidfd);
+                        return Err(Error::new(
+                            e.kind(),
+                            "pidfd_spawnp succeeded but the child's PID could not be obtained",
+                        ));
+                    }
+                };
+
+                return Ok(Some(Process::new(pid, pidfd)));
+            }
+
+            // Safety: -1 indicates we don't have a pidfd.
+            let mut p = Process::new(0, -1);
+
             let spawn_res = spawn_fn(
                 &mut p.pid,
                 self.get_program_cstr().as_ptr(),
@@ -786,6 +889,12 @@ pub struct Process {
 
 impl Process {
     #[cfg(target_os = "linux")]
+    /// # Safety
+    ///
+    /// `pidfd` must either be -1 (representing no file descriptor) or a valid, exclusively owned file
+    /// descriptor (See [I/O Safety]).
+    ///
+    /// [I/O Safety]: crate::io#io-safety
     unsafe fn new(pid: pid_t, pidfd: pid_t) -> Self {
         use crate::os::unix::io::FromRawFd;
         use crate::sys_common::FromInner;
diff --git a/src/bootstrap/src/core/config/config.rs b/src/bootstrap/src/core/config/config.rs
index 2d54a84331ff6..3327df972bf80 100644
--- a/src/bootstrap/src/core/config/config.rs
+++ b/src/bootstrap/src/core/config/config.rs
@@ -2466,14 +2466,6 @@ impl Config {
             }
         };
 
-        // Handle running from a directory other than the top level
-        let top_level = output(
-            &mut helpers::git(Some(&self.src)).args(["rev-parse", "--show-toplevel"]).command,
-        );
-        let top_level = top_level.trim_end();
-        let compiler = format!("{top_level}/compiler/");
-        let library = format!("{top_level}/library/");
-
         // Look for a version to compare to based on the current commit.
         // Only commits merged by bors will have CI artifacts.
         let merge_base = output(
@@ -2494,7 +2486,9 @@ impl Config {
 
         // Warn if there were changes to the compiler or standard library since the ancestor commit.
         let has_changes = !t!(helpers::git(Some(&self.src))
-            .args(["diff-index", "--quiet", commit, "--", &compiler, &library])
+            .args(["diff-index", "--quiet", commit])
+            .arg("--")
+            .args([self.src.join("compiler"), self.src.join("library")])
             .command
             .status())
         .success();
@@ -2566,12 +2560,6 @@ impl Config {
         option_name: &str,
         if_unchanged: bool,
     ) -> Option<String> {
-        // Handle running from a directory other than the top level
-        let top_level = output(
-            &mut helpers::git(Some(&self.src)).args(["rev-parse", "--show-toplevel"]).command,
-        );
-        let top_level = top_level.trim_end();
-
         // Look for a version to compare to based on the current commit.
         // Only commits merged by bors will have CI artifacts.
         let merge_base = output(
@@ -2594,8 +2582,11 @@ impl Config {
         let mut git = helpers::git(Some(&self.src));
         git.args(["diff-index", "--quiet", commit, "--"]);
 
+        // Handle running from a directory other than the top level
+        let top_level = &self.src;
+
         for path in modified_paths {
-            git.arg(format!("{top_level}/{path}"));
+            git.arg(top_level.join(path));
         }
 
         let has_changes = !t!(git.command.status()).success();
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/Dockerfile b/src/ci/docker/host-x86_64/dist-riscv64-linux/Dockerfile
index 426e601f5d34d..4d9334dde8c55 100644
--- a/src/ci/docker/host-x86_64/dist-riscv64-linux/Dockerfile
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/Dockerfile
@@ -11,6 +11,7 @@ RUN sh /scripts/rustbuild-setup.sh
 WORKDIR /tmp
 
 COPY scripts/crosstool-ng-build.sh /scripts/
+COPY host-x86_64/dist-riscv64-linux/patches/ /tmp/patches/
 COPY host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig /tmp/crosstool.defconfig
 RUN /scripts/crosstool-ng-build.sh
 
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0001-divdi3-div-zero.patch b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0001-divdi3-div-zero.patch
new file mode 100644
index 0000000000000..f688eaf8029ec
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0001-divdi3-div-zero.patch
@@ -0,0 +1,37 @@
+From 4013baf99c38f7bca06a51f8301e8fb195ccfa33 Mon Sep 17 00:00:00 2001
+From: Jim Wilson <jimw@sifive.com>
+Date: Tue, 2 Jun 2020 11:19:39 -0700
+Subject: [PATCH] RISC-V: Make __divdi3 handle div by zero same as hardware.
+
+The ISA manual specifies that divide by zero always returns -1 as the result.
+We were failing to do that when the dividend was negative.
+
+Original patch from Virginie Moser.
+
+	libgcc/
+	* config/riscv/div.S (__divdi3): For negative arguments, change bgez
+	to bgtz.
+---
+ libgcc/config/riscv/div.S | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/libgcc/config/riscv/div.S b/libgcc/config/riscv/div.S
+index 151f8e273ac77..17234324c1e41 100644
+--- a/libgcc/config/riscv/div.S
++++ b/libgcc/config/riscv/div.S
+@@ -107,10 +107,12 @@ FUNC_END (__umoddi3)
+   /* Handle negative arguments to __divdi3.  */
+ .L10:
+   neg   a0, a0
+-  bgez  a1, .L12      /* Compute __udivdi3(-a0, a1), then negate the result.  */
++  /* Zero is handled as a negative so that the result will not be inverted.  */
++  bgtz  a1, .L12     /* Compute __udivdi3(-a0, a1), then negate the result.  */
++
+   neg   a1, a1
+-  j     __udivdi3     /* Compute __udivdi3(-a0, -a1).  */
+-.L11:                 /* Compute __udivdi3(a0, -a1), then negate the result.  */
++  j     __udivdi3    /* Compute __udivdi3(-a0, -a1).  */
++.L11:                /* Compute __udivdi3(a0, -a1), then negate the result.  */
+   neg   a1, a1
+ .L12:
+   move  t0, ra
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0002-hidden-jump-target.patch b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0002-hidden-jump-target.patch
new file mode 100644
index 0000000000000..7ae4469428b13
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/gcc/8.5.0/0002-hidden-jump-target.patch
@@ -0,0 +1,117 @@
+From 45116f342057b7facecd3d05c2091ce3a77eda59 Mon Sep 17 00:00:00 2001
+From: Nelson Chu <nelson.chu@sifive.com>
+Date: Mon, 29 Nov 2021 04:48:20 -0800
+Subject: [PATCH] RISC-V: jal cannot refer to a default visibility symbol for
+ shared object.
+
+This is the original binutils bugzilla report,
+https://sourceware.org/bugzilla/show_bug.cgi?id=28509
+
+And this is the first version of the proposed binutils patch,
+https://sourceware.org/pipermail/binutils/2021-November/118398.html
+
+After applying the binutils patch, I get the the unexpected error when
+building libgcc,
+
+/scratch/nelsonc/riscv-gnu-toolchain/riscv-gcc/libgcc/config/riscv/div.S:42:
+/scratch/nelsonc/build-upstream/rv64gc-linux/build-install/riscv64-unknown-linux-gnu/bin/ld: relocation R_RISCV_JAL against `__udivdi3' which may bind externally can not be used when making a shared object; recompile with -fPIC
+
+Therefore, this patch add an extra hidden alias symbol for __udivdi3, and
+then use HIDDEN_JUMPTARGET to target a non-preemptible symbol instead.
+The solution is similar to glibc as follows,
+https://sourceware.org/git/?p=glibc.git;a=commit;h=68389203832ab39dd0dbaabbc4059e7fff51c29b
+
+libgcc/ChangeLog:
+
+	* config/riscv/div.S: Add the hidden alias symbol for __udivdi3, and
+	then use HIDDEN_JUMPTARGET to target it since it is non-preemptible.
+	* config/riscv/riscv-asm.h: Added new macros HIDDEN_JUMPTARGET and
+	HIDDEN_DEF.
+---
+ libgcc/config/riscv/div.S       | 15 ++++++++-------
+ libgcc/config/riscv/riscv-asm.h |  6 ++++++
+ 2 files changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/libgcc/config/riscv/div.S b/libgcc/config/riscv/div.S
+index c9bd7879c1e36..723c3b82e48c6 100644
+--- a/libgcc/config/riscv/div.S
++++ b/libgcc/config/riscv/div.S
+@@ -40,7 +40,7 @@ FUNC_BEGIN (__udivsi3)
+   sll    a0, a0, 32
+   sll    a1, a1, 32
+   move   t0, ra
+-  jal    __udivdi3
++  jal    HIDDEN_JUMPTARGET(__udivdi3)
+   sext.w a0, a0
+   jr     t0
+ FUNC_END (__udivsi3)
+@@ -52,7 +52,7 @@ FUNC_BEGIN (__umodsi3)
+   srl    a0, a0, 32
+   srl    a1, a1, 32
+   move   t0, ra
+-  jal    __udivdi3
++  jal    HIDDEN_JUMPTARGET(__udivdi3)
+   sext.w a0, a1
+   jr     t0
+ FUNC_END (__umodsi3)
+@@ -95,11 +95,12 @@ FUNC_BEGIN (__udivdi3)
+ .L5:
+   ret
+ FUNC_END (__udivdi3)
++HIDDEN_DEF (__udivdi3)
+ 
+ FUNC_BEGIN (__umoddi3)
+   /* Call __udivdi3(a0, a1), then return the remainder, which is in a1.  */
+   move  t0, ra
+-  jal   __udivdi3
++  jal   HIDDEN_JUMPTARGET(__udivdi3)
+   move  a0, a1
+   jr    t0
+ FUNC_END (__umoddi3)
+@@ -111,12 +112,12 @@ FUNC_END (__umoddi3)
+   bgtz  a1, .L12     /* Compute __udivdi3(-a0, a1), then negate the result.  */
+ 
+   neg   a1, a1
+-  j     __udivdi3    /* Compute __udivdi3(-a0, -a1).  */
++  j     HIDDEN_JUMPTARGET(__udivdi3)     /* Compute __udivdi3(-a0, -a1).  */
+ .L11:                /* Compute __udivdi3(a0, -a1), then negate the result.  */
+   neg   a1, a1
+ .L12:
+   move  t0, ra
+-  jal   __udivdi3
++  jal   HIDDEN_JUMPTARGET(__udivdi3)
+   neg   a0, a0
+   jr    t0
+ FUNC_END (__divdi3)
+@@ -126,7 +127,7 @@ FUNC_BEGIN (__moddi3)
+   bltz   a1, .L31
+   bltz   a0, .L32
+ .L30:
+-  jal    __udivdi3    /* The dividend is not negative.  */
++  jal    HIDDEN_JUMPTARGET(__udivdi3)    /* The dividend is not negative.  */
+   move   a0, a1
+   jr     t0
+ .L31:
+@@ -134,7 +135,7 @@ FUNC_BEGIN (__moddi3)
+   bgez   a0, .L30
+ .L32:
+   neg    a0, a0
+-  jal    __udivdi3    /* The dividend is hella negative.  */
++  jal    HIDDEN_JUMPTARGET(__udivdi3)    /* The dividend is hella negative.  */
+   neg    a0, a1
+   jr     t0
+ FUNC_END (__moddi3)
+diff --git a/libgcc/config/riscv/riscv-asm.h b/libgcc/config/riscv/riscv-asm.h
+index 8550707a4a26a..96dd85b0df2e5 100644
+--- a/libgcc/config/riscv/riscv-asm.h
++++ b/libgcc/config/riscv/riscv-asm.h
+@@ -33,3 +33,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ #define FUNC_ALIAS(X,Y)		\
+ 	.globl X;		\
+ 	X = Y
++
++#define CONCAT1(a, b)		CONCAT2(a, b)
++#define CONCAT2(a, b)		a ## b
++#define HIDDEN_JUMPTARGET(X)	CONCAT1(__hidden_, X)
++#define HIDDEN_DEF(X)		FUNC_ALIAS(HIDDEN_JUMPTARGET(X), X);     \
++				.hidden HIDDEN_JUMPTARGET(X)
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/glibc/2.29/0001-hidden-jump-target.patch b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/glibc/2.29/0001-hidden-jump-target.patch
new file mode 100644
index 0000000000000..d267b961d3472
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/patches/glibc/2.29/0001-hidden-jump-target.patch
@@ -0,0 +1,58 @@
+From 68389203832ab39dd0dbaabbc4059e7fff51c29b Mon Sep 17 00:00:00 2001
+From: Fangrui Song <maskray@google.com>
+Date: Thu, 28 Oct 2021 11:39:49 -0700
+Subject: [PATCH] riscv: Fix incorrect jal with HIDDEN_JUMPTARGET
+
+A non-local STV_DEFAULT defined symbol is by default preemptible in a
+shared object. j/jal cannot target a preemptible symbol. On other
+architectures, such a jump instruction either causes PLT [BZ #18822], or
+if short-ranged, sometimes rejected by the linker (but not by GNU ld's
+riscv port [ld PR/28509]).
+
+Use HIDDEN_JUMPTARGET to target a non-preemptible symbol instead.
+
+With this patch, ld.so and libc.so can be linked with LLD if source
+files are compiled/assembled with -mno-relax/-Wa,-mno-relax.
+
+Acked-by: Palmer Dabbelt <palmer@dabbelt.com>
+Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+---
+ sysdeps/riscv/setjmp.S                     | 2 +-
+ sysdeps/unix/sysv/linux/riscv/setcontext.S | 5 +++--
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/sysdeps/riscv/setjmp.S b/sysdeps/riscv/setjmp.S
+index 0b92016b311..bec7ff80f49 100644
+--- a/sysdeps/riscv/setjmp.S
++++ b/sysdeps/riscv/setjmp.S
+@@ -21,7 +21,7 @@
+ 
+ ENTRY (_setjmp)
+   li	a1, 0
+-  j	__sigsetjmp
++  j	HIDDEN_JUMPTARGET (__sigsetjmp)
+ END (_setjmp)
+ ENTRY (setjmp)
+   li	a1, 1
+diff --git a/sysdeps/unix/sysv/linux/riscv/setcontext.S b/sysdeps/unix/sysv/linux/riscv/setcontext.S
+index 9510518750a..e44a68aad47 100644
+--- a/sysdeps/unix/sysv/linux/riscv/setcontext.S
++++ b/sysdeps/unix/sysv/linux/riscv/setcontext.S
+@@ -95,6 +95,7 @@ LEAF (__setcontext)
+ 99:	j	__syscall_error
+ 
+ END (__setcontext)
++libc_hidden_def (__setcontext)
+ weak_alias (__setcontext, setcontext)
+ 
+ LEAF (__start_context)
+@@ -108,7 +109,7 @@ LEAF (__start_context)
+ 	/* Invoke subsequent context if present, else exit(0).  */
+ 	mv	a0, s2
+ 	beqz	s2, 1f
+-	jal	__setcontext
+-1:	j	exit
++	jal	HIDDEN_JUMPTARGET (__setcontext)
++1:	j	HIDDEN_JUMPTARGET (exit)
+ 
+ END (__start_context)
diff --git a/src/ci/docker/host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig b/src/ci/docker/host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig
index 470cef1a84e18..f7c93a9d5fc88 100644
--- a/src/ci/docker/host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig
+++ b/src/ci/docker/host-x86_64/dist-riscv64-linux/riscv64-unknown-linux-gnu.defconfig
@@ -3,6 +3,8 @@ CT_EXPERIMENTAL=y
 CT_PREFIX_DIR="/x-tools/${CT_TARGET}"
 CT_USE_MIRROR=y
 CT_MIRROR_BASE_URL="https://ci-mirrors.rust-lang.org/rustc"
+CT_PATCH_BUNDLED_LOCAL=y
+CT_LOCAL_PATCH_DIR="/tmp/patches"
 CT_ARCH_RISCV=y
 # CT_DEMULTILIB is not set
 CT_ARCH_USE_MMU=y
@@ -10,7 +12,7 @@ CT_ARCH_64=y
 CT_ARCH_ARCH="rv64gc"
 CT_KERNEL_LINUX=y
 CT_LINUX_V_4_20=y
-CT_BINUTILS_V_2_36=y
+CT_BINUTILS_V_2_40=y
 CT_GLIBC_V_2_29=y
 CT_GCC_V_8=y
 CT_CC_LANG_CXX=y
diff --git a/tests/ui/check-cfg/mix.stderr b/tests/ui/check-cfg/mix.stderr
index cc63466585a6a..15b0100d7d23b 100644
--- a/tests/ui/check-cfg/mix.stderr
+++ b/tests/ui/check-cfg/mix.stderr
@@ -251,7 +251,7 @@ warning: unexpected `cfg` condition value: `zebra`
 LL |     cfg!(target_feature = "zebra");
    |          ^^^^^^^^^^^^^^^^^^^^^^^^
    |
-   = note: expected values for `target_feature` are: `10e60`, `2e3`, `3e3r1`, `3e3r2`, `3e3r3`, `3e7`, `7e10`, `a`, `aclass`, `adx`, `aes`, `altivec`, `alu32`, `atomics`, `avx`, `avx2`, `avx512bf16`, `avx512bitalg`, `avx512bw`, `avx512cd`, `avx512dq`, `avx512f`, `avx512fp16`, `avx512ifma`, `avx512vbmi`, `avx512vbmi2`, `avx512vl`, `avx512vnni`, `avx512vp2intersect`, `avx512vpopcntdq`, `avxifma`, `avxneconvert`, `avxvnni`, `avxvnniint16`, and `avxvnniint8` and 191 more
+   = note: expected values for `target_feature` are: `10e60`, `2e3`, `3e3r1`, `3e3r2`, `3e3r3`, `3e7`, `7e10`, `a`, `aclass`, `adx`, `aes`, `altivec`, `alu32`, `amx-bf16`, `amx-complex`, `amx-fp16`, `amx-int8`, `amx-tile`, `atomics`, `avx`, `avx2`, `avx512bf16`, `avx512bitalg`, `avx512bw`, `avx512cd`, `avx512dq`, `avx512f`, `avx512fp16`, `avx512ifma`, `avx512vbmi`, `avx512vbmi2`, `avx512vl`, `avx512vnni`, `avx512vp2intersect`, and `avx512vpopcntdq` and 196 more
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: 27 warnings emitted
diff --git a/tests/ui/check-cfg/well-known-values.stderr b/tests/ui/check-cfg/well-known-values.stderr
index 8a99ace75d852..c35fb68c839dd 100644
--- a/tests/ui/check-cfg/well-known-values.stderr
+++ b/tests/ui/check-cfg/well-known-values.stderr
@@ -165,7 +165,7 @@ warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
 LL |     target_feature = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
-   = note: expected values for `target_feature` are: `10e60`, `2e3`, `3e3r1`, `3e3r2`, `3e3r3`, `3e7`, `7e10`, `a`, `aclass`, `adx`, `aes`, `altivec`, `alu32`, `atomics`, `avx`, `avx2`, `avx512bf16`, `avx512bitalg`, `avx512bw`, `avx512cd`, `avx512dq`, `avx512f`, `avx512fp16`, `avx512ifma`, `avx512vbmi`, `avx512vbmi2`, `avx512vl`, `avx512vnni`, `avx512vp2intersect`, `avx512vpopcntdq`, `avxifma`, `avxneconvert`, `avxvnni`, `avxvnniint16`, `avxvnniint8`, `bf16`, `bmi1`, `bmi2`, `bti`, `bulk-memory`, `c`, `cache`, `cmpxchg16b`, `crc`, `crt-static`, `d`, `d32`, `dit`, `doloop`, `dotprod`, `dpb`, `dpb2`, `dsp`, `dsp1e2`, `dspe60`, `e`, `e1`, `e2`, `edsp`, `elrw`, `ermsb`, `exception-handling`, `extended-const`, `f`, `f16c`, `f32mm`, `f64mm`, `fcma`, `fdivdu`, `fhm`, `flagm`, `float1e2`, `float1e3`, `float3e4`, `float7e60`, `floate1`, `fma`, `fp-armv8`, `fp16`, `fp64`, `fpuv2_df`, `fpuv2_sf`, `fpuv3_df`, `fpuv3_hf`, `fpuv3_hi`, `fpuv3_sf`, `frecipe`, `frintts`, `fxsr`, `gfni`, `hard-float`, `hard-float-abi`, `hard-tp`, `high-registers`, `hvx`, `hvx-length128b`, `hwdiv`, `i8mm`, `jsconv`, `lahfsahf`, `lasx`, `lbt`, `lor`, `lse`, `lsx`, `lvz`, `lzcnt`, `m`, `mclass`, `movbe`, `mp`, `mp1e2`, `msa`, `mte`, `multivalue`, `mutable-globals`, `neon`, `nontrapping-fptoint`, `nvic`, `paca`, `pacg`, `pan`, `pclmulqdq`, `pmuv3`, `popcnt`, `power10-vector`, `power8-altivec`, `power8-vector`, `power9-altivec`, `power9-vector`, `prfchw`, `rand`, `ras`, `rclass`, `rcpc`, `rcpc2`, `rdm`, `rdrand`, `rdseed`, `reference-types`, `relax`, `relaxed-simd`, `rtm`, `sb`, `sha`, `sha2`, `sha3`, `sign-ext`, `simd128`, `sm4`, `spe`, `ssbs`, `sse`, `sse2`, `sse3`, `sse4.1`, `sse4.2`, `sse4a`, `ssse3`, `sve`, `sve2`, `sve2-aes`, `sve2-bitperm`, `sve2-sha3`, `sve2-sm4`, `tbm`, `thumb-mode`, `thumb2`, `tme`, `trust`, `trustzone`, `ual`, `unaligned-scalar-mem`, `v`, `v5te`, `v6`, `v6k`, `v6t2`, `v7`, `v8`, `v8.1a`, `v8.2a`, `v8.3a`, `v8.4a`, `v8.5a`, `v8.6a`, `v8.7a`, `vaes`, `vdsp2e60f`, `vdspv1`, `vdspv2`, `vfp2`, `vfp3`, `vfp4`, `vh`, `virt`, `virtualization`, `vpclmulqdq`, `vsx`, `xsave`, `xsavec`, `xsaveopt`, `xsaves`, `zba`, `zbb`, `zbc`, `zbkb`, `zbkc`, `zbkx`, `zbs`, `zdinx`, `zfh`, `zfhmin`, `zfinx`, `zhinx`, `zhinxmin`, `zk`, `zkn`, `zknd`, `zkne`, `zknh`, `zkr`, `zks`, `zksed`, `zksh`, and `zkt`
+   = note: expected values for `target_feature` are: `10e60`, `2e3`, `3e3r1`, `3e3r2`, `3e3r3`, `3e7`, `7e10`, `a`, `aclass`, `adx`, `aes`, `altivec`, `alu32`, `amx-bf16`, `amx-complex`, `amx-fp16`, `amx-int8`, `amx-tile`, `atomics`, `avx`, `avx2`, `avx512bf16`, `avx512bitalg`, `avx512bw`, `avx512cd`, `avx512dq`, `avx512f`, `avx512fp16`, `avx512ifma`, `avx512vbmi`, `avx512vbmi2`, `avx512vl`, `avx512vnni`, `avx512vp2intersect`, `avx512vpopcntdq`, `avxifma`, `avxneconvert`, `avxvnni`, `avxvnniint16`, `avxvnniint8`, `bf16`, `bmi1`, `bmi2`, `bti`, `bulk-memory`, `c`, `cache`, `cmpxchg16b`, `crc`, `crt-static`, `d`, `d32`, `dit`, `doloop`, `dotprod`, `dpb`, `dpb2`, `dsp`, `dsp1e2`, `dspe60`, `e`, `e1`, `e2`, `edsp`, `elrw`, `ermsb`, `exception-handling`, `extended-const`, `f`, `f16c`, `f32mm`, `f64mm`, `fcma`, `fdivdu`, `fhm`, `flagm`, `float1e2`, `float1e3`, `float3e4`, `float7e60`, `floate1`, `fma`, `fp-armv8`, `fp16`, `fp64`, `fpuv2_df`, `fpuv2_sf`, `fpuv3_df`, `fpuv3_hf`, `fpuv3_hi`, `fpuv3_sf`, `frecipe`, `frintts`, `fxsr`, `gfni`, `hard-float`, `hard-float-abi`, `hard-tp`, `high-registers`, `hvx`, `hvx-length128b`, `hwdiv`, `i8mm`, `jsconv`, `lahfsahf`, `lasx`, `lbt`, `lor`, `lse`, `lsx`, `lvz`, `lzcnt`, `m`, `mclass`, `movbe`, `mp`, `mp1e2`, `msa`, `mte`, `multivalue`, `mutable-globals`, `neon`, `nontrapping-fptoint`, `nvic`, `paca`, `pacg`, `pan`, `pclmulqdq`, `pmuv3`, `popcnt`, `power10-vector`, `power8-altivec`, `power8-vector`, `power9-altivec`, `power9-vector`, `prfchw`, `rand`, `ras`, `rclass`, `rcpc`, `rcpc2`, `rdm`, `rdrand`, `rdseed`, `reference-types`, `relax`, `relaxed-simd`, `rtm`, `sb`, `sha`, `sha2`, `sha3`, `sign-ext`, `simd128`, `sm4`, `spe`, `ssbs`, `sse`, `sse2`, `sse3`, `sse4.1`, `sse4.2`, `sse4a`, `ssse3`, `sve`, `sve2`, `sve2-aes`, `sve2-bitperm`, `sve2-sha3`, `sve2-sm4`, `tbm`, `thumb-mode`, `thumb2`, `tme`, `trust`, `trustzone`, `ual`, `unaligned-scalar-mem`, `v`, `v5te`, `v6`, `v6k`, `v6t2`, `v7`, `v8`, `v8.1a`, `v8.2a`, `v8.3a`, `v8.4a`, `v8.5a`, `v8.6a`, `v8.7a`, `vaes`, `vdsp2e60f`, `vdspv1`, `vdspv2`, `vfp2`, `vfp3`, `vfp4`, `vh`, `virt`, `virtualization`, `vpclmulqdq`, `vsx`, `xsave`, `xsavec`, `xsaveopt`, `xsaves`, `zba`, `zbb`, `zbc`, `zbkb`, `zbkc`, `zbkx`, `zbs`, `zdinx`, `zfh`, `zfhmin`, `zfinx`, `zhinx`, `zhinxmin`, `zk`, `zkn`, `zknd`, `zkne`, `zknh`, `zkr`, `zks`, `zksed`, `zksh`, and `zkt`
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
diff --git a/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.rs b/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.rs
new file mode 100644
index 0000000000000..ecbfc0bce5c56
--- /dev/null
+++ b/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.rs
@@ -0,0 +1,6 @@
+//@ only-x86_64
+#[target_feature(enable = "amx-tile")]
+//~^ ERROR: currently unstable
+unsafe fn foo() {}
+
+fn main() {}
diff --git a/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.stderr b/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.stderr
new file mode 100644
index 0000000000000..58d577a37902c
--- /dev/null
+++ b/tests/ui/feature-gates/feature-gate-x86_amx_intrinsics.stderr
@@ -0,0 +1,13 @@
+error[E0658]: the target feature `amx-tile` is currently unstable
+  --> $DIR/feature-gate-x86_amx_intrinsics.rs:2:18
+   |
+LL | #[target_feature(enable = "amx-tile")]
+   |                  ^^^^^^^^^^^^^^^^^^^
+   |
+   = note: see issue #126622 <https://github.com/rust-lang/rust/issues/126622> for more information
+   = help: add `#![feature(x86_amx_intrinsics)]` to the crate attributes to enable
+   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
+
+error: aborting due to 1 previous error
+
+For more information about this error, try `rustc --explain E0658`.