Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent query cycles in the MIR inliner #68828

Merged
merged 6 commits into from
Jan 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions compiler/rustc_middle/src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,27 @@ rustc_queries! {
}

Other {
/// Check whether the function has any recursion that could cause the inliner to trigger
/// a cycle. Returns the call stack causing the cycle. The call stack does not contain the
/// current function, just all intermediate functions.
query mir_callgraph_reachable(key: (ty::Instance<'tcx>, LocalDefId)) -> bool {
fatal_cycle
desc { |tcx|
"computing if `{}` (transitively) calls `{}`",
key.0,
tcx.def_path_str(key.1.to_def_id()),
}
}

/// Obtain all the calls into other local functions
query mir_inliner_callees(key: ty::InstanceDef<'tcx>) -> &'tcx [(DefId, SubstsRef<'tcx>)] {
fatal_cycle
desc { |tcx|
"computing all local function calls in `{}`",
tcx.def_path_str(key.def_id()),
}
}

/// Evaluates a constant and returns the computed allocation.
///
/// **Do not use this** directly, use the `tcx.eval_static_initializer` wrapper.
Expand Down
11 changes: 11 additions & 0 deletions compiler/rustc_middle/src/ty/query/keys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,17 @@ impl Key for (DefId, DefId) {
}
}

impl Key for (ty::Instance<'tcx>, LocalDefId) {
type CacheSelector = DefaultCacheSelector;

fn query_crate(&self) -> CrateNum {
self.0.query_crate()
}
fn default_span(&self, tcx: TyCtxt<'_>) -> Span {
self.0.default_span(tcx)
}
}

impl Key for (DefId, LocalDefId) {
type CacheSelector = DefaultCacheSelector;

Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_mir/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ pub fn provide(providers: &mut Providers) {
providers.eval_to_const_value_raw = const_eval::eval_to_const_value_raw_provider;
providers.eval_to_allocation_raw = const_eval::eval_to_allocation_raw_provider;
providers.const_caller_location = const_eval::const_caller_location;
providers.mir_callgraph_reachable = transform::inline::cycle::mir_callgraph_reachable;
providers.mir_inliner_callees = transform::inline::cycle::mir_inliner_callees;
providers.destructure_const = |tcx, param_env_and_value| {
let (param_env, value) = param_env_and_value.into_parts();
const_eval::destructure_const(tcx, param_env, value)
Expand Down
10 changes: 9 additions & 1 deletion compiler/rustc_mir/src/transform/const_prop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,15 @@ impl<'mir, 'tcx> ConstPropagator<'mir, 'tcx> {
}

fn lint_root(&self, source_info: SourceInfo) -> Option<HirId> {
match &self.source_scopes[source_info.scope].local_data {
let mut data = &self.source_scopes[source_info.scope];
// FIXME(oli-obk): we should be able to just walk the `inlined_parent_scope`, but it
// does not work as I thought it would. Needs more investigation and documentation.
Comment on lines +444 to +445
Copy link
Member

@wesleywiser wesleywiser Jan 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think this needs to be fixed before this can be landed? Since the inliner is still gated behind mir-opt-level=2, imo this doesn't have to be resolved right now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, we should resolve this later, I need some input by eddyb first or just put in enough time to analyze and document all of this stuff.

while data.inlined.is_some() {
trace!(?data);
data = &self.source_scopes[data.parent_scope.unwrap()];
}
trace!(?data);
match &data.local_data {
ClearCrossCrate::Set(data) => Some(data.lint_root),
ClearCrossCrate::Clear => None,
}
Expand Down
91 changes: 74 additions & 17 deletions compiler/rustc_mir/src/transform/inline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ use crate::transform::MirPass;
use std::iter;
use std::ops::{Range, RangeFrom};

crate mod cycle;

const INSTR_COST: usize = 5;
const CALL_PENALTY: usize = 25;
const LANDINGPAD_PENALTY: usize = 50;
Expand All @@ -37,6 +39,9 @@ struct CallSite<'tcx> {

impl<'tcx> MirPass<'tcx> for Inline {
fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
// If you change this optimization level, also change the level in
// `mir_drops_elaborated_and_const_checked` for the call to `mir_inliner_callees`.
// Otherwise you will get an ICE about stolen MIR.
if tcx.sess.opts.debugging_opts.mir_opt_level < 2 {
oli-obk marked this conversation as resolved.
Show resolved Hide resolved
return;
}
Expand All @@ -50,6 +55,8 @@ impl<'tcx> MirPass<'tcx> for Inline {
return;
}

let span = trace_span!("inline", body = %tcx.def_path_str(body.source.def_id()));
let _guard = span.enter();
if inline(tcx, body) {
debug!("running simplify cfg on {:?}", body.source);
CfgSimplifier::new(body).simplify();
Expand Down Expand Up @@ -90,8 +97,8 @@ struct Inliner<'tcx> {
codegen_fn_attrs: &'tcx CodegenFnAttrs,
/// Caller HirID.
hir_id: hir::HirId,
/// Stack of inlined instances.
history: Vec<Instance<'tcx>>,
/// Stack of inlined Instances.
history: Vec<ty::Instance<'tcx>>,
/// Indicates that the caller body has been modified.
changed: bool,
}
Expand All @@ -103,13 +110,28 @@ impl Inliner<'tcx> {
None => continue,
Some(it) => it,
};
let span = trace_span!("process_blocks", %callsite.callee, ?bb);
let _guard = span.enter();

trace!(
"checking for self recursion ({:?} vs body_source: {:?})",
callsite.callee.def_id(),
caller_body.source.def_id()
);
if callsite.callee.def_id() == caller_body.source.def_id() {
debug!("Not inlining a function into itself");
continue;
}

if !self.is_mir_available(&callsite.callee, caller_body) {
if !self.is_mir_available(callsite.callee, caller_body) {
debug!("MIR unavailable {}", callsite.callee);
continue;
}

let span = trace_span!("instance_mir", %callsite.callee);
let instance_mir_guard = span.enter();
let callee_body = self.tcx.instance_mir(callsite.callee.def);
drop(instance_mir_guard);
if !self.should_inline(callsite, callee_body) {
continue;
}
Expand Down Expand Up @@ -137,28 +159,61 @@ impl Inliner<'tcx> {
}
}

fn is_mir_available(&self, callee: &Instance<'tcx>, caller_body: &Body<'tcx>) -> bool {
if let InstanceDef::Item(_) = callee.def {
if !self.tcx.is_mir_available(callee.def_id()) {
return false;
#[instrument(skip(self, caller_body))]
fn is_mir_available(&self, callee: Instance<'tcx>, caller_body: &Body<'tcx>) -> bool {
match callee.def {
InstanceDef::Item(_) => {
// If there is no MIR available (either because it was not in metadata or
// because it has no MIR because it's an extern function), then the inliner
// won't cause cycles on this.
if !self.tcx.is_mir_available(callee.def_id()) {
return false;
}
}
// These have no own callable MIR.
InstanceDef::Intrinsic(_) | InstanceDef::Virtual(..) => return false,
// This cannot result in an immediate cycle since the callee MIR is a shim, which does
// not get any optimizations run on it. Any subsequent inlining may cause cycles, but we
// do not need to catch this here, we can wait until the inliner decides to continue
// inlining a second time.
InstanceDef::VtableShim(_)
| InstanceDef::ReifyShim(_)
| InstanceDef::FnPtrShim(..)
| InstanceDef::ClosureOnceShim { .. }
| InstanceDef::DropGlue(..)
| InstanceDef::CloneShim(..) => return true,
}

if self.tcx.is_constructor(callee.def_id()) {
trace!("constructors always have MIR");
// Constructor functions cannot cause a query cycle.
return true;
}

if let Some(callee_def_id) = callee.def_id().as_local() {
let callee_hir_id = self.tcx.hir().local_def_id_to_hir_id(callee_def_id);
// Avoid a cycle here by only using `instance_mir` only if we have
// a lower `HirId` than the callee. This ensures that the callee will
// not inline us. This trick only works without incremental compilation.
// So don't do it if that is enabled. Also avoid inlining into generators,
// Avoid inlining into generators,
// since their `optimized_mir` is used for layout computation, which can
// create a cycle, even when no attempt is made to inline the function
// in the other direction.
!self.tcx.dep_graph.is_fully_enabled()
caller_body.generator_kind.is_none()
&& (
// Avoid a cycle here by only using `instance_mir` only if we have
// a lower `HirId` than the callee. This ensures that the callee will
// not inline us. This trick only works without incremental compilation.
// So don't do it if that is enabled.
!self.tcx.dep_graph.is_fully_enabled()
&& self.hir_id < callee_hir_id
&& caller_body.generator_kind.is_none()
// If we know for sure that the function we're calling will itself try to
// call us, then we avoid inlining that function.
|| !self.tcx.mir_callgraph_reachable((callee, caller_body.source.def_id().expect_local()))
)
} else {
// This cannot result in a cycle since the callee MIR is from another crate
// and is already optimized.
// This cannot result in an immediate cycle since the callee MIR is from another crate
// and is already optimized. Any subsequent inlining may cause cycles, but we do
// not need to catch this here, we can wait until the inliner decides to continue
// inlining a second time.
trace!("functions from other crates always have MIR");
true
}
}
Expand Down Expand Up @@ -203,8 +258,8 @@ impl Inliner<'tcx> {
None
}

#[instrument(skip(self, callee_body))]
fn should_inline(&self, callsite: CallSite<'tcx>, callee_body: &Body<'tcx>) -> bool {
debug!("should_inline({:?})", callsite);
let tcx = self.tcx;

if callsite.fn_sig.c_variadic() {
Expand Down Expand Up @@ -333,7 +388,9 @@ impl Inliner<'tcx> {
if let Ok(Some(instance)) =
Instance::resolve(self.tcx, self.param_env, def_id, substs)
{
if callsite.callee == instance || self.history.contains(&instance) {
if callsite.callee.def_id() == instance.def_id()
|| self.history.contains(&instance)
{
debug!("`callee is recursive - not inlining");
return false;
}
Expand Down
157 changes: 157 additions & 0 deletions compiler/rustc_mir/src/transform/inline/cycle.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_middle::mir::TerminatorKind;
use rustc_middle::ty::TypeFoldable;
use rustc_middle::ty::{self, subst::SubstsRef, InstanceDef, TyCtxt};

// FIXME: check whether it is cheaper to precompute the entire call graph instead of invoking
// this query riddiculously often.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Not sure if you're planning to leave this in when this is merged, but if so:

Suggested change
// this query riddiculously often.
// this query ridiculously often.

#[instrument(skip(tcx, root, target))]
crate fn mir_callgraph_reachable(
tcx: TyCtxt<'tcx>,
(root, target): (ty::Instance<'tcx>, LocalDefId),
) -> bool {
trace!(%root, target = %tcx.def_path_str(target.to_def_id()));
let param_env = tcx.param_env_reveal_all_normalized(target);
assert_ne!(
root.def_id().expect_local(),
target,
"you should not call `mir_callgraph_reachable` on immediate self recursion"
);
assert!(
matches!(root.def, InstanceDef::Item(_)),
"you should not call `mir_callgraph_reachable` on shims"
);
assert!(
!tcx.is_constructor(root.def_id()),
"you should not call `mir_callgraph_reachable` on enum/struct constructor functions"
);
#[instrument(skip(tcx, param_env, target, stack, seen, recursion_limiter, caller))]
fn process(
tcx: TyCtxt<'tcx>,
param_env: ty::ParamEnv<'tcx>,
caller: ty::Instance<'tcx>,
target: LocalDefId,
stack: &mut Vec<ty::Instance<'tcx>>,
seen: &mut FxHashSet<ty::Instance<'tcx>>,
recursion_limiter: &mut FxHashMap<DefId, usize>,
) -> bool {
trace!(%caller);
for &(callee, substs) in tcx.mir_inliner_callees(caller.def) {
let substs = caller.subst_mir_and_normalize_erasing_regions(tcx, param_env, substs);
let callee = match ty::Instance::resolve(tcx, param_env, callee, substs).unwrap() {
Some(callee) => callee,
None => {
trace!(?callee, "cannot resolve, skipping");
continue;
}
};

// Found a path.
if callee.def_id() == target.to_def_id() {
return true;
}

if tcx.is_constructor(callee.def_id()) {
trace!("constructors always have MIR");
// Constructor functions cannot cause a query cycle.
continue;
}

match callee.def {
InstanceDef::Item(_) => {
// If there is no MIR available (either because it was not in metadata or
// because it has no MIR because it's an extern function), then the inliner
// won't cause cycles on this.
if !tcx.is_mir_available(callee.def_id()) {
trace!(?callee, "no mir available, skipping");
continue;
}
}
// These have no own callable MIR.
InstanceDef::Intrinsic(_) | InstanceDef::Virtual(..) => continue,
// These have MIR and if that MIR is inlined, substituted and then inlining is run
// again, a function item can end up getting inlined. Thus we'll be able to cause
// a cycle that way
InstanceDef::VtableShim(_)
| InstanceDef::ReifyShim(_)
| InstanceDef::FnPtrShim(..)
| InstanceDef::ClosureOnceShim { .. }
| InstanceDef::CloneShim(..) => {}
InstanceDef::DropGlue(..) => {
// FIXME: A not fully substituted drop shim can cause ICEs if one attempts to
// have its MIR built. Likely oli-obk just screwed up the `ParamEnv`s, so this
// needs some more analysis.
if callee.needs_subst() {
continue;
}
}
}

if seen.insert(callee) {
let recursion = recursion_limiter.entry(callee.def_id()).or_default();
trace!(?callee, recursion = *recursion);
if tcx.sess.recursion_limit().value_within_limit(*recursion) {
*recursion += 1;
stack.push(callee);
let found_recursion = ensure_sufficient_stack(|| {
process(tcx, param_env, callee, target, stack, seen, recursion_limiter)
});
if found_recursion {
return true;
}
stack.pop();
} else {
// Pessimistically assume that there could be recursion.
return true;
}
}
}
false
}
process(
tcx,
param_env,
root,
target,
&mut Vec::new(),
&mut FxHashSet::default(),
&mut FxHashMap::default(),
)
}

crate fn mir_inliner_callees<'tcx>(
tcx: TyCtxt<'tcx>,
instance: ty::InstanceDef<'tcx>,
) -> &'tcx [(DefId, SubstsRef<'tcx>)] {
let steal;
let guard;
let body = match (instance, instance.def_id().as_local()) {
(InstanceDef::Item(_), Some(def_id)) => {
let def = ty::WithOptConstParam::unknown(def_id);
steal = tcx.mir_promoted(def).0;
guard = steal.borrow();
&*guard
}
// Functions from other crates and MIR shims
_ => tcx.instance_mir(instance),
};
let mut calls = Vec::new();
for bb_data in body.basic_blocks() {
let terminator = bb_data.terminator();
if let TerminatorKind::Call { func, .. } = &terminator.kind {
let ty = func.ty(&body.local_decls, tcx);
let call = match ty.kind() {
ty::FnDef(def_id, substs) => (*def_id, *substs),
_ => continue,
};
// We've seen this before
if calls.contains(&call) {
continue;
}
calls.push(call);
}
}
tcx.arena.alloc_slice(&calls)
}
Loading