Skip to content

Commit

Permalink
JIT: Support retbuf optimization for non 'lvIsTemp' locals (#104467)
Browse files Browse the repository at this point in the history
The retbuf optimization allows us to avoid address exposure for retbuf
definitions; instead we consider them to be just defined (and not exposed) by
the calls.

This optimization was previously only enabled for `lvIsTemp` locals, i.e. the
locals created by `lvaGrabTemp(true)`. The reason was that the definition of the
actual retbuf happened when we saw the `LCL_ADDR` node; if there were additional
uses after the `LCL_ADDR` node, then they would think they were referring to the
`LCL_ADDR` definition. The `lvIsTemp` gave us reasonable confidence that there
were no such additional uses.

This PR fixes the root cause of the problem and enables the optimization for
non-`lvIsTemp` locals. To do that it teaches the various liveness phases to
ignore the `LCL_ADDR` nodes when it gets to them and to instead handle the
definition at the point of the parent `CALL` node.
  • Loading branch information
jakobbotsch authored Jul 9, 2024
1 parent 048c8ed commit dd4b757
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 24 deletions.
4 changes: 3 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5516,6 +5516,7 @@ class Compiler

void fgLocalVarLivenessInit();

template <bool lowered>
void fgPerNodeLocalVarLiveness(GenTree* node);
void fgPerBlockLocalVarLiveness();

Expand All @@ -5527,7 +5528,7 @@ class Compiler

void fgLiveVarAnalysis();

void fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call);
void fgComputeLifeCall(VARSET_TP& life, VARSET_VALARG_TP keepAliveVars, GenTreeCall* call);

void fgComputeLifeTrackedLocalUse(VARSET_TP& life, LclVarDsc& varDsc, GenTreeLclVarCommon* node);
bool fgComputeLifeTrackedLocalDef(VARSET_TP& life,
Expand All @@ -5549,6 +5550,7 @@ class Compiler
bool* pStmtInfoDirty DEBUGARG(bool* treeModf));

void fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALARG_TP keepAliveVars);
bool fgIsTrackedRetBufferAddress(LIR::Range& range, GenTree* node);

bool fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange);

Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/gschecks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ void Compiler::gsParamsToShadows()
shadowVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
#ifdef DEBUG
shadowVarDsc->SetDoNotEnregReason(varDsc->GetDoNotEnregReason());
shadowVarDsc->SetHiddenBufferStructArg(varDsc->IsHiddenBufferStructArg());
#endif

if (varTypeIsStruct(type))
Expand Down Expand Up @@ -503,6 +504,7 @@ void Compiler::gsParamsToShadows()
}
}

compCurBB = fgFirstBB;
// Now insert code to copy the params to their shadow copy.
for (UINT lclNum = 0; lclNum < lvaOldCount; lclNum++)
{
Expand All @@ -522,6 +524,7 @@ void Compiler::gsParamsToShadows()
compCurBB = fgFirstBB; // Needed by some morphing
(void)fgNewStmtAtBeg(fgFirstBB, fgMorphTree(store));
}
compCurBB = nullptr;

// If the method has "Jmp CalleeMethod", then we need to copy shadow params back to original
// params before "jmp" to CalleeMethod.
Expand Down
9 changes: 3 additions & 6 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1250,12 +1250,9 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
if (m_compiler->opts.compJitOptimizeStructHiddenBuffer && (callUser != nullptr) &&
m_compiler->IsValidLclAddr(lclNum, val.Offset()))
{
// We will only attempt this optimization for locals that are:
// a) Not susceptible to liveness bugs (see "lvaSetHiddenBufferStructArg").
// b) Do not later turn into indirections.
//
bool isSuitableLocal =
varTypeIsStruct(varDsc) && varDsc->lvIsTemp && !m_compiler->lvaIsImplicitByRefLocal(lclNum);
// We will only attempt this optimization for locals that do not
// later turn into indirections.
bool isSuitableLocal = varTypeIsStruct(varDsc) && !m_compiler->lvaIsImplicitByRefLocal(lclNum);
#ifdef TARGET_X86
if (m_compiler->lvaIsArgAccessedViaVarArgsCookie(lclNum))
{
Expand Down
6 changes: 0 additions & 6 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3068,12 +3068,6 @@ void Compiler::lvaSetVarAddrExposed(unsigned varNum DEBUGARG(AddressExposedReaso
// *do not* mark the local address-exposed and treat the call much like a local store node throughout
// the compilation.
//
// TODO-ADDR-Bug: currently, we rely on these locals not being present in call argument lists,
// outside of the buffer address argument itself, as liveness - currently - treats the location node
// associated with the address itself as the definition point, and call arguments can be reordered
// rather arbitrarily. We should fix liveness to treat the call as the definition point instead and
// enable this optimization for "!lvIsTemp" locals.
//
void Compiler::lvaSetHiddenBufferStructArg(unsigned varNum)
{
LclVarDsc* varDsc = lvaGetDesc(varNum);
Expand Down
114 changes: 103 additions & 11 deletions src/coreclr/jit/liveness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,14 @@ void Compiler::fgLocalVarLivenessInit()
// Set fgCurMemoryUse and fgCurMemoryDef when memory is read or updated
// Call fgMarkUseDef for any Local variables encountered
//
// Template arguments:
// lowered - Whether or not this is liveness on lowered IR, where LCL_ADDRs
// on tracked locals may appear.
//
// Arguments:
// tree - The current node.
//
template <bool lowered>
void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)
{
assert(tree != nullptr);
Expand All @@ -209,12 +214,25 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)

case GT_LCL_VAR:
case GT_LCL_FLD:
case GT_LCL_ADDR:
case GT_STORE_LCL_VAR:
case GT_STORE_LCL_FLD:
fgMarkUseDef(tree->AsLclVarCommon());
break;

case GT_LCL_ADDR:
if (lowered)
{
// If this is a definition of a retbuf then we process it as
// part of the GT_CALL node.
if (fgIsTrackedRetBufferAddress(LIR::AsRange(compCurBB), tree))
{
break;
}

fgMarkUseDef(tree->AsLclVarCommon());
}
break;

case GT_IND:
case GT_BLK:
// For Volatile indirection, first mutate GcHeap/ByrefExposed
Expand Down Expand Up @@ -303,6 +321,12 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)
}
}
}

GenTreeLclVarCommon* definedLcl = gtCallGetDefinedRetBufLclAddr(call);
if (definedLcl != nullptr)
{
fgMarkUseDef(definedLcl);
}
break;
}

Expand Down Expand Up @@ -367,7 +391,7 @@ void Compiler::fgPerBlockLocalVarLiveness()
{
for (GenTree* node : LIR::AsRange(block))
{
fgPerNodeLocalVarLiveness(node);
fgPerNodeLocalVarLiveness<true>(node);
}
}
else if (fgNodeThreading == NodeThreading::AllTrees)
Expand All @@ -377,7 +401,7 @@ void Compiler::fgPerBlockLocalVarLiveness()
compCurStmt = stmt;
for (GenTree* const node : stmt->TreeList())
{
fgPerNodeLocalVarLiveness(node);
fgPerNodeLocalVarLiveness<false>(node);
}
}
}
Expand Down Expand Up @@ -744,10 +768,11 @@ void Compiler::fgLiveVarAnalysis()
// due to a GT_CALL node.
//
// Arguments:
// life - The live set that is being computed.
// call - The call node in question.
// life - The live set that is being computed.
// keepAliveVars - Tracked locals that must be kept alive everywhere in the block
// call - The call node in question.
//
void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call)
void Compiler::fgComputeLifeCall(VARSET_TP& life, VARSET_VALARG_TP keepAliveVars, GenTreeCall* call)
{
assert(call != nullptr);

Expand Down Expand Up @@ -808,6 +833,12 @@ void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call)
}
}
}

GenTreeLclVarCommon* definedLcl = gtCallGetDefinedRetBufLclAddr(call);
if (definedLcl != nullptr)
{
fgComputeLifeLocal(life, keepAliveVars, definedLcl);
}
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -1140,11 +1171,11 @@ void Compiler::fgComputeLife(VARSET_TP& life,
AGAIN:
assert(tree->OperGet() != GT_QMARK);

if (tree->gtOper == GT_CALL)
if (tree->IsCall())
{
fgComputeLifeCall(life, tree->AsCall());
fgComputeLifeCall(life, keepAliveVars, tree->AsCall());
}
else if (tree->OperIsNonPhiLocal() || tree->OperIs(GT_LCL_ADDR))
else if (tree->OperIsNonPhiLocal())
{
bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, tree);
if (isDeadStore)
Expand Down Expand Up @@ -1191,6 +1222,15 @@ void Compiler::fgComputeLife(VARSET_TP& life,
}
}

//---------------------------------------------------------------------
// fgComputeLifeLIR - fill out liveness flags in the IR nodes of the block
// provided the live-out set.
//
// Arguments
// life - the set of live-out variables from the block
// block - the block
// keepAliveVars - variables that are globally live (usually due to being live into an EH successor)
//
void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALARG_TP keepAliveVars)
{
noway_assert(VarSetOps::IsSubset(this, keepAliveVars, life));
Expand Down Expand Up @@ -1247,7 +1287,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
}
else
{
fgComputeLifeCall(life, call);
fgComputeLifeCall(life, keepAliveVars, call);
}
break;
}
Expand Down Expand Up @@ -1295,11 +1335,21 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
}
else
{
// For LCL_ADDRs that are defined by being passed as a
// retbuf we will handle them when we get to the call. We
// cannot consider them to be defined at the point of the
// LCL_ADDR since there may be uses between the LCL_ADDR
// and call.
if (fgIsTrackedRetBufferAddress(blockRange, node))
{
break;
}

isDeadStore = fgComputeLifeLocal(life, keepAliveVars, node);
if (isDeadStore)
{
LIR::Use addrUse;
if (blockRange.TryGetUse(node, &addrUse) && (addrUse.User()->OperIs(GT_STOREIND, GT_STORE_BLK)))
if (blockRange.TryGetUse(node, &addrUse) && addrUse.User()->OperIs(GT_STOREIND, GT_STORE_BLK))
{
GenTreeIndir* const store = addrUse.User()->AsIndir();

Expand Down Expand Up @@ -1465,6 +1515,48 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
}
}

//---------------------------------------------------------------------
// fgIsTrackedRetBufferAddress - given a LCL_ADDR node, check if it is the
// return buffer definition of a call.
//
// Arguments
// range - the block range containing the LCL_ADDR
// node - the LCL_ADDR
//
bool Compiler::fgIsTrackedRetBufferAddress(LIR::Range& range, GenTree* node)
{
assert(node->OperIs(GT_LCL_ADDR));
if ((node->gtFlags & GTF_VAR_DEF) == 0)
{
return false;
}

LclVarDsc* dsc = lvaGetDesc(node->AsLclVarCommon());
if (!dsc->lvTracked)
{
return false;
}

GenTree* curNode = node;
do
{
LIR::Use use;
if (!range.TryGetUse(curNode, &use))
{
return false;
}

curNode = use.User();

if (curNode->IsCall())
{
return gtCallGetDefinedRetBufLclAddr(curNode->AsCall()) == node;
}
} while (curNode->OperIs(GT_FIELD_LIST) || curNode->OperIsPutArg());

return false;
}

//---------------------------------------------------------------------
// fgTryRemoveNonLocal - try to remove a node if it is unused and has no direct
// side effects.
Expand Down

0 comments on commit dd4b757

Please sign in to comment.