Skip to content

Commit

Permalink
Contain memory operands under casts (#72719)
Browse files Browse the repository at this point in the history
* Add GenTreeCast::IsZeroExtending

* Cast descriptor support

* XARCH support

* ARM/ARM64 support

TODO: consider using a dedicated IND_EXT oper for ARM/ARM64 instead of containment.
This would allow us to cleany handle all indirections. It would not mean we'd give
up on the casts containment, as we'd still need to handle the "reg optional" case.

IND_EXT will be much like an ordinary IND, but have a "source" and "target" types.
The "target" type would always be int/long, while "source" could be of any integral
type.

This design would be a bit more natural, and nicely separable from casts. However,
the main problem with the current state of things, apart from the fact codegen of
indirections is tied strongly to "GenTreeIndir", is the fact that changing type of
the load can invalidate LEA containment. One would think this is solvable with some
tricks, like re-running containment analysis on an indirection after processing the
cast, but ARM64 codegen doesn't support uncontained LEAs in some cases.

A possible solution to that problem is uncontaining the whole address tree. That
would be messy, but ought to work. An additional complication is that these trees
can contain a lot of contained operands as part of ADDEX and BFIZ, so what would
have to be done first is the making of these into proper EXOPs.

In any case, this is all future work.
  • Loading branch information
SingleAccretion authored Aug 24, 2022
1 parent 6c56c43 commit 0469020
Show file tree
Hide file tree
Showing 10 changed files with 270 additions and 43 deletions.
7 changes: 7 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
ZERO_EXTEND_INT,
SIGN_EXTEND_INT,
#endif
LOAD_ZERO_EXTEND_SMALL_INT,
LOAD_SIGN_EXTEND_SMALL_INT,
#ifdef TARGET_64BIT
LOAD_ZERO_EXTEND_INT,
LOAD_SIGN_EXTEND_INT,
#endif
LOAD_SOURCE
};

private:
Expand Down
74 changes: 64 additions & 10 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3898,25 +3898,24 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
// cast - The GT_CAST node
//
// Assumptions:
// The cast node is not a contained node and must have an assigned register.
// Neither the source nor target type can be a floating point type.
//
// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
//
void CodeGen::genIntToIntCast(GenTreeCast* cast)
{
genConsumeRegs(cast->gtGetOp1());
genConsumeRegs(cast->CastOp());

const regNumber srcReg = cast->gtGetOp1()->GetRegNum();
GenTree* const src = cast->CastOp();
const regNumber srcReg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA;
const regNumber dstReg = cast->GetRegNum();
emitter* const emit = GetEmitter();

assert(genIsValidIntReg(srcReg));
assert(genIsValidIntReg(dstReg));

GenIntCastDesc desc(cast);

if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE)
{
assert(genIsValidIntReg(srcReg));
genIntCastOverflowCheck(cast, desc, srcReg);
}

Expand Down Expand Up @@ -3944,15 +3943,70 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
ins = INS_sxtw;
insSize = 8;
break;
#endif
default:
assert(desc.ExtendKind() == GenIntCastDesc::COPY);
#endif // TARGET_64BIT
case GenIntCastDesc::COPY:
ins = INS_mov;
insSize = desc.ExtendSrcSize();
break;
case GenIntCastDesc::LOAD_ZERO_EXTEND_SMALL_INT:
ins = (desc.ExtendSrcSize() == 1) ? INS_ldrb : INS_ldrh;
insSize = TARGET_POINTER_SIZE;
break;
case GenIntCastDesc::LOAD_SIGN_EXTEND_SMALL_INT:
ins = (desc.ExtendSrcSize() == 1) ? INS_ldrsb : INS_ldrsh;
insSize = TARGET_POINTER_SIZE;
break;
#ifdef TARGET_64BIT
case GenIntCastDesc::LOAD_ZERO_EXTEND_INT:
ins = INS_ldr;
insSize = 4;
break;
case GenIntCastDesc::LOAD_SIGN_EXTEND_INT:
ins = INS_ldrsw;
insSize = 8;
break;
#endif // TARGET_64BIT
case GenIntCastDesc::LOAD_SOURCE:
ins = ins_Load(src->TypeGet());
insSize = genTypeSize(genActualType(src));
break;

default:
unreached();
}

GetEmitter()->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false);
if (srcReg != REG_NA)
{
emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false);
}
else
{
// The "used from memory" case. On ArmArch casts are the only nodes which can have
// contained memory operands, so we have to handle all possible sources "manually".
assert(src->isUsedFromMemory());

if (src->isUsedFromSpillTemp())
{
assert(src->IsRegOptional());

TempDsc* tmpDsc = getSpillTempDsc(src);
unsigned tmpNum = tmpDsc->tdTempNum();
regSet.tmpRlsTemp(tmpDsc);

emit->emitIns_R_S(ins, EA_ATTR(insSize), dstReg, tmpNum, 0);
}
else if (src->OperIsLocal())
{
emit->emitIns_R_S(ins, EA_ATTR(insSize), dstReg, src->AsLclVarCommon()->GetLclNum(),
src->AsLclVarCommon()->GetLclOffs());
}
else
{
assert(src->OperIs(GT_IND) && !src->AsIndir()->IsVolatile() && !src->AsIndir()->IsUnaligned());
emit->emitIns_R_R_I(ins, EA_ATTR(insSize), dstReg, src->AsIndir()->Base()->GetRegNum(),
static_cast<int>(src->AsIndir()->Offset()));
}
}
}

genProduceReg(cast);
Expand Down
49 changes: 47 additions & 2 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2448,7 +2448,8 @@ void CodeGen::genCodeForCast(GenTreeOp* tree)

CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
{
const var_types srcType = genActualType(cast->gtGetOp1()->TypeGet());
GenTree* const src = cast->CastOp();
const var_types srcType = genActualType(src);
const bool srcUnsigned = cast->IsUnsigned();
const unsigned srcSize = genTypeSize(srcType);
const var_types castType = cast->gtCastType;
Expand All @@ -2457,7 +2458,9 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
const var_types dstType = genActualType(cast->TypeGet());
const unsigned dstSize = genTypeSize(dstType);
const bool overflow = cast->gtOverflow();
const bool castIsLoad = !src->isUsedFromReg();

assert(castIsLoad == src->isUsedFromMemory());
assert((srcSize == 4) || (srcSize == genTypeSize(TYP_I_IMPL)));
assert((dstSize == 4) || (dstSize == genTypeSize(TYP_I_IMPL)));

Expand All @@ -2473,7 +2476,7 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
// values of the castType without risk of integer overflow.
const int castNumBits = (castSize * 8) - (castUnsigned ? 0 : 1);
m_checkSmallIntMax = (1 << castNumBits) - 1;
m_checkSmallIntMin = (castUnsigned | srcUnsigned) ? 0 : (-m_checkSmallIntMax - 1);
m_checkSmallIntMin = (castUnsigned || srcUnsigned) ? 0 : (-m_checkSmallIntMax - 1);

m_extendKind = COPY;
m_extendSrcSize = dstSize;
Expand Down Expand Up @@ -2568,6 +2571,48 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
m_extendKind = COPY;
m_extendSrcSize = srcSize;
}

if (castIsLoad)
{
const var_types srcLoadType = src->TypeGet();

switch (m_extendKind)
{
case ZERO_EXTEND_SMALL_INT: // small type/int/long -> ubyte/ushort
assert(varTypeIsUnsigned(srcLoadType) || (genTypeSize(srcLoadType) >= genTypeSize(castType)));
m_extendKind = LOAD_ZERO_EXTEND_SMALL_INT;
m_extendSrcSize = min(genTypeSize(srcLoadType), genTypeSize(castType));
break;

case SIGN_EXTEND_SMALL_INT: // small type/int/long -> byte/short
assert(varTypeIsSigned(srcLoadType) || (genTypeSize(srcLoadType) >= genTypeSize(castType)));
m_extendKind = LOAD_SIGN_EXTEND_SMALL_INT;
m_extendSrcSize = min(genTypeSize(srcLoadType), genTypeSize(castType));
break;

#ifdef TARGET_64BIT
case ZERO_EXTEND_INT: // ubyte/ushort/int -> long.
assert(varTypeIsUnsigned(srcLoadType) || (srcLoadType == TYP_INT));
m_extendKind = varTypeIsSmall(srcLoadType) ? LOAD_ZERO_EXTEND_SMALL_INT : LOAD_ZERO_EXTEND_INT;
m_extendSrcSize = genTypeSize(srcLoadType);
break;

case SIGN_EXTEND_INT: // byte/short/int -> long.
assert(varTypeIsSigned(srcLoadType) || (srcLoadType == TYP_INT));
m_extendKind = varTypeIsSmall(srcLoadType) ? LOAD_SIGN_EXTEND_SMALL_INT : LOAD_SIGN_EXTEND_INT;
m_extendSrcSize = genTypeSize(srcLoadType);
break;
#endif // TARGET_64BIT

case COPY: // long -> long, small type/int/long -> int.
m_extendKind = LOAD_SOURCE;
m_extendSrcSize = 0;
break;

default:
unreached();
}
}
}

#if !defined(TARGET_64BIT)
Expand Down
34 changes: 25 additions & 9 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6752,27 +6752,25 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
// cast - The GT_CAST node
//
// Assumptions:
// The cast node is not a contained node and must have an assigned register.
// Neither the source nor target type can be a floating point type.
// On x86 casts to (U)BYTE require that the source be in a byte register.
//
// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
//
void CodeGen::genIntToIntCast(GenTreeCast* cast)
{
genConsumeRegs(cast->gtGetOp1());
genConsumeRegs(cast->CastOp());

const regNumber srcReg = cast->gtGetOp1()->GetRegNum();
GenTree* const src = cast->CastOp();
const regNumber srcReg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA;
const regNumber dstReg = cast->GetRegNum();
emitter* emit = GetEmitter();

assert(genIsValidIntReg(srcReg));
assert(genIsValidIntReg(dstReg));

GenIntCastDesc desc(cast);

if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE)
{
assert(genIsValidIntReg(srcReg));
genIntCastOverflowCheck(cast, desc, srcReg);
}

Expand All @@ -6783,33 +6781,51 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
switch (desc.ExtendKind())
{
case GenIntCastDesc::ZERO_EXTEND_SMALL_INT:
case GenIntCastDesc::LOAD_ZERO_EXTEND_SMALL_INT:
ins = INS_movzx;
insSize = desc.ExtendSrcSize();
break;
case GenIntCastDesc::SIGN_EXTEND_SMALL_INT:
case GenIntCastDesc::LOAD_SIGN_EXTEND_SMALL_INT:
ins = INS_movsx;
insSize = desc.ExtendSrcSize();
break;
#ifdef TARGET_64BIT
case GenIntCastDesc::ZERO_EXTEND_INT:
case GenIntCastDesc::LOAD_ZERO_EXTEND_INT:
ins = INS_mov;
insSize = 4;
canSkip = compiler->opts.OptimizationEnabled() && emit->AreUpper32BitsZero(srcReg);
break;
case GenIntCastDesc::SIGN_EXTEND_INT:
case GenIntCastDesc::LOAD_SIGN_EXTEND_INT:
ins = INS_movsxd;
insSize = 4;
break;
#endif
default:
assert(desc.ExtendKind() == GenIntCastDesc::COPY);
case GenIntCastDesc::COPY:
ins = INS_mov;
insSize = desc.ExtendSrcSize();
canSkip = true;
break;
case GenIntCastDesc::LOAD_SOURCE:
ins = ins_Load(src->TypeGet());
insSize = genTypeSize(src);
break;

default:
unreached();
}

emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip);
if (srcReg != REG_NA)
{
emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip);
}
else
{
assert(src->isUsedFromMemory());
inst_RV_TT(ins, EA_ATTR(insSize), dstReg, src);
}

genProduceReg(cast);
}
Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -3858,6 +3858,22 @@ struct GenTreeCast : public GenTreeOp
{
}
#endif

bool IsZeroExtending()
{
assert(varTypeIsIntegral(CastOp()) && varTypeIsIntegral(CastToType()));

if (varTypeIsSmall(CastToType()))
{
return varTypeIsUnsigned(CastToType());
}
if (TypeIs(TYP_LONG) && genActualTypeIsInt(CastOp()))
{
return IsUnsigned();
}

return false;
}
};

// GT_BOX nodes are place markers for boxed values. The "real" tree
Expand Down
4 changes: 0 additions & 4 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,7 @@ CONFIG_INTEGER(JitNoRangeChks, W("JitNoRngChks"), 0) // If 1, don't generate ran

// AltJitAssertOnNYI should be 0 on targets where JIT is under development or bring up stage, so as to facilitate
// fallback to main JIT on hitting a NYI.
#if defined(TARGET_ARM64) || defined(TARGET_X86)
CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 0) // Controls the AltJit behavior of NYI stuff
#else // !defined(TARGET_ARM64) && !defined(TARGET_X86)
CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff
#endif // defined(TARGET_ARM64) || defined(TARGET_X86)

CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 1) // Enable the register allocator to support EH-write thru:
// partial enregistration of vars exposed on EH boundaries
Expand Down
9 changes: 8 additions & 1 deletion src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2828,6 +2828,11 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
op2->ClearContained();
}
}
else
{
castOp->ClearContained();
}

cmp->AsOp()->gtOp1 = castOp;

BlockRange().Remove(cast);
Expand Down Expand Up @@ -5352,6 +5357,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par
#ifdef TARGET_ARM64
if ((index != nullptr) && index->OperIs(GT_CAST) && (scale == 1) && (offset == 0) && varTypeIsByte(targetType))
{
index->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands.
MakeSrcContained(addrMode, index);
}

Expand Down Expand Up @@ -6111,6 +6117,7 @@ void Lowering::LowerShift(GenTreeOp* shift)
// The parent was replaced, clear contain and regOpt flag.
shift->gtOp2->ClearContained();
}

ContainCheckShiftRotate(shift);

#ifdef TARGET_ARM64
Expand All @@ -6129,13 +6136,13 @@ void Lowering::LowerShift(GenTreeOp* shift)
unsigned dstBits = genTypeSize(cast) * BITS_PER_BYTE;
unsigned srcBits = varTypeIsSmall(cast->CastToType()) ? genTypeSize(cast->CastToType()) * BITS_PER_BYTE
: genTypeSize(cast->CastOp()) * BITS_PER_BYTE;
assert(!cast->CastOp()->isContained());

// It has to be an upcast and CNS must be in [1..srcBits) range
if ((srcBits < dstBits) && (cns->IconValue() > 0) && (cns->IconValue() < srcBits))
{
JITDUMP("Recognized ubfix/sbfix pattern in LSH(CAST, CNS). Changing op to GT_BFIZ");
shift->ChangeOper(GT_BFIZ);
cast->CastOp()->ClearContained(); // Uncontain any memory operands.
MakeSrcContained(shift, cast);
}
}
Expand Down
Loading

0 comments on commit 0469020

Please sign in to comment.