diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index fb7f2f73351af..a79730d86de43 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4874,6 +4874,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl bool doBranchOpt = true; bool doCse = true; bool doAssertionProp = true; + bool doVNBasedIntrinExpansion = true; bool doRangeAnalysis = true; bool doVNBasedDeadStoreRemoval = true; int iterations = 1; @@ -4887,6 +4888,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl doBranchOpt = doValueNum && (JitConfig.JitDoRedundantBranchOpts() != 0); doCse = doValueNum; doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0); + doVNBasedIntrinExpansion = doValueNum; doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0); doVNBasedDeadStoreRemoval = doValueNum && (JitConfig.JitDoVNBasedDeadStoreRemoval() != 0); @@ -4970,6 +4972,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_ASSERTION_PROP_MAIN, &Compiler::optAssertionPropMain); } + if (doVNBasedIntrinExpansion) + { + // Expand some intrinsics based on VN data + // + DoPhase(this, PHASE_VN_BASED_INTRINSIC_EXPAND, &Compiler::fgVNBasedIntrinsicExpansion); + } + if (doRangeAnalysis) { // Bounds check elimination via range analysis diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8a2d862d1fe2c..6058a4786f100 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5203,6 +5203,8 @@ class Compiler } } + bool GetObjectHandleAndOffset(GenTree* tree, ssize_t* byteOffset, CORINFO_OBJECT_HANDLE* pObj); + // Convert a BYTE which represents the VM's CorInfoGCtype to the JIT's var_types var_types getJitGCType(BYTE gcType); @@ -5346,6 +5348,10 @@ class Compiler PhaseStatus fgExpandStaticInit(); bool fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); + PhaseStatus fgVNBasedIntrinsicExpansion(); + bool fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); + bool fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); + PhaseStatus fgInsertGCPolls(); BasicBlock* fgCreateGCPoll(GCPollType pollType, BasicBlock* block); @@ -7074,6 +7080,7 @@ class Compiler #define OMF_HAS_MDARRAYREF 0x00004000 // Method contains multi-dimensional intrinsic array element loads or stores. #define OMF_HAS_STATIC_INIT 0x00008000 // Method has static initializations we might want to partially inline #define OMF_HAS_TLS_FIELD 0x00010000 // Method contains TLS field access +#define OMF_HAS_SPECIAL_INTRINSICS 0x00020000 // Method contains special intrinsics expanded in late phases // clang-format on @@ -7134,6 +7141,16 @@ class Compiler optMethodFlags |= OMF_HAS_TLS_FIELD; } + bool doesMethodHaveSpecialIntrinsics() + { + return (optMethodFlags & OMF_HAS_SPECIAL_INTRINSICS) != 0; + } + + void setMethodHasSpecialIntrinsics() + { + optMethodFlags |= OMF_HAS_SPECIAL_INTRINSICS; + } + void pickGDV(GenTreeCall* call, IL_OFFSET ilOffset, bool isInterface, @@ -8934,7 +8951,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX public: // Similar to roundUpSIMDSize, but for General Purpose Registers (GPR) - unsigned int roundUpGPRSize(unsigned size) + unsigned roundUpGPRSize(unsigned size) { if (size > 4 && (REGSIZE_BYTES == 8)) { @@ -8947,6 +8964,33 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return size; // 2, 1, 0 } + var_types roundDownMaxType(unsigned size) + { + assert(size > 0); + var_types result = TYP_UNDEF; +#ifdef FEATURE_SIMD + if (IsBaselineSimdIsaSupported() && (roundDownSIMDSize(size) > 0)) + { + return getSIMDTypeForSize(roundDownSIMDSize(size)); + } +#endif + int nearestPow2 = 1 << BitOperations::Log2((unsigned)size); + switch (min(nearestPow2, REGSIZE_BYTES)) + { + case 1: + return TYP_UBYTE; + case 2: + return TYP_USHORT; + case 4: + return TYP_INT; + case 8: + assert(REGSIZE_BYTES == 8); + return TYP_LONG; + default: + unreached(); + } + } + enum UnrollKind { Memset, diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 460b47f9bca85..0aa63000e6ae0 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -85,6 +85,7 @@ CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", false, -1, false) CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", false, -1, false) +CompPhaseNameMacro(PHASE_VN_BASED_INTRINSIC_EXPAND, "VN based intrinsic expansion", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_BRANCHES, "Redundant branch opts", false, -1, false) CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN, "Assertion prop", false, -1, false) CompPhaseNameMacro(PHASE_IF_CONVERSION, "If conversion", false, -1, false) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 271030a9e2dd9..e8c237ae1df9c 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -116,9 +116,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() bool Compiler::fgExpandRuntimeLookupsForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { BasicBlock* block = *pBlock; - assert(call->IsHelperCall()); - if (!call->IsExpRuntimeLookup()) + if (!call->IsHelperCall() || !call->IsExpRuntimeLookup()) { return false; } @@ -472,8 +471,8 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess() bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { BasicBlock* block = *pBlock; - assert(call->IsHelperCall()); - if (!call->IsExpTLSFieldAccess()) + + if (!call->IsHelperCall() || !call->IsExpTLSFieldAccess()) { return false; } @@ -874,7 +873,7 @@ bool Compiler::fgExpandHelperForBlock(BasicBlock** pBlock) for (GenTree* const tree : stmt->TreeList()) { - if (!tree->IsHelperCall()) + if (!tree->IsCall()) { continue; } @@ -942,7 +941,10 @@ PhaseStatus Compiler::fgExpandStaticInit() bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { BasicBlock* block = *pBlock; - assert(call->IsHelperCall()); + if (!call->IsHelperCall()) + { + return false; + } bool isGc = false; StaticHelperReturnValue retValKind = {}; @@ -1177,3 +1179,336 @@ bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, G call->gtInitClsHnd = NO_CLASS_HANDLE; return true; } + +//------------------------------------------------------------------------------ +// fgVNBasedIntrinsicExpansion: Expand specific calls marked as intrinsics using VN. +// +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// +PhaseStatus Compiler::fgVNBasedIntrinsicExpansion() +{ + PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + + if (!doesMethodHaveSpecialIntrinsics() || opts.OptimizationDisabled()) + { + return result; + } + + // TODO: Replace with opts.compCodeOpt once it's fixed + const bool preferSize = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT); + if (preferSize) + { + // The optimization comes with a codegen size increase + JITDUMP("Optimized for size - bail out.\n") + return result; + } + return fgExpandHelper<&Compiler::fgVNBasedIntrinsicExpansionForCall>(true); +} + +//------------------------------------------------------------------------------ +// fgVNBasedIntrinsicExpansionForCall : Expand specific calls marked as intrinsics using VN. +// +// Arguments: +// block - Block containing the intrinsic call to expand +// stmt - Statement containing the call +// call - The intrinsic call +// +// Returns: +// True if expanded, false otherwise. +// +bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) +{ + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0) + { + return false; + } + + NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd); + if (ni == NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8) + { + return fgVNBasedIntrinsicExpansionForCall_ReadUtf8(pBlock, stmt, call); + } + + // TODO: Expand IsKnownConstant here + // Also, move various unrollings here + + return false; +} + +//------------------------------------------------------------------------------ +// fgVNBasedIntrinsicExpansionForCall_ReadUtf8 : Expand NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8 +// when src data is a string literal (UTF16) that can be narrowed to ASCII (UTF8), e.g.: +// +// string str = "Hello, world!"; +// int bytesWritten = ReadUtf8(ref str[0], str.Length, buffer, buffer.Length); +// +// becomes: +// +// bytesWritten = 0; // default value +// if (buffer.Length >= 13) +// { +// memcpy(buffer, "Hello, world!"u8, 13); // note the u8 suffix +// bytesWritten = 13; +// } +// +// Arguments: +// block - Block containing the intrinsic call to expand +// stmt - Statement containing the call +// call - The intrinsic call +// +// Returns: +// True if expanded, false otherwise. +// +bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) +{ + BasicBlock* block = *pBlock; + + assert(call->gtArgs.CountUserArgs() == 4); + + GenTree* srcPtr = call->gtArgs.GetUserArgByIndex(0)->GetNode(); + + // We're interested in a case when srcPtr is a string literal and srcLen is a constant + // srcLen doesn't have to match the srcPtr's length, but it should not exceed it. + ssize_t strObjOffset = 0; + CORINFO_OBJECT_HANDLE strObj = nullptr; + if (!GetObjectHandleAndOffset(srcPtr, &strObjOffset, &strObj) || ((size_t)strObjOffset > INT_MAX)) + { + // We might want to support more cases here, e.g. ROS RVA data. + // Also, we check that strObjOffset (which is in most cases is expected to be just + // OFFSETOF__CORINFO_String__chars) doesn't exceed INT_MAX since we'll need to cast + // it to int for getObjectContent API. + JITDUMP("ReadUtf8: srcPtr is not an object handle\n") + return false; + } + + // We mostly expect string literal objects here, but let's be more agile just in case + if (!info.compCompHnd->isObjectImmutable(strObj)) + { + JITDUMP("ReadUtf8: srcPtr is not immutable (not a frozen string object?)\n") + return false; + } + + GenTree* srcLen = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + if (!srcLen->gtVNPair.BothEqual() || !vnStore->IsVNInt32Constant(srcLen->gtVNPair.GetLiberal())) + { + JITDUMP("ReadUtf8: srcLen is not constant\n") + return false; + } + + const int MaxPossibleUnrollThreshold = 256; + const unsigned unrollThreshold = min(getUnrollThreshold(UnrollKind::Memcpy), MaxPossibleUnrollThreshold); + const unsigned srcLenCns = (unsigned)vnStore->GetConstantInt32(srcLen->gtVNPair.GetLiberal()); + if ((srcLenCns == 0) || (srcLenCns > unrollThreshold)) + { + // TODO: handle srcLenCns == 0 if it's a common case + JITDUMP("ReadUtf8: srcLenCns is out of unrollable range\n") + return false; + } + + // Read the string literal (UTF16) into a local buffer (UTF8) + assert(strObj != nullptr); + uint16_t bufferU16[MaxPossibleUnrollThreshold]; + uint8_t bufferU8[MaxPossibleUnrollThreshold]; // twice smaller because of narrowing + + // Both must be within [0..INT_MAX] range as we're going to cast them to int + assert((unsigned)srcLenCns <= INT_MAX); + assert((unsigned)strObjOffset <= INT_MAX); + + // getObjectContent is expected to validate the offset and length + if (!info.compCompHnd->getObjectContent(strObj, (uint8_t*)bufferU16, (int)srcLenCns * 2, (int)strObjOffset)) + { + JITDUMP("ReadUtf8: getObjectContent returned false.\n") + return false; + } + + for (unsigned charIndex = 0; charIndex < srcLenCns; charIndex++) + { + // Buffer keeps the original utf16 chars + uint16_t ch = bufferU16[charIndex]; + if (ch > 127) + { + // Only ASCII is supported. + JITDUMP("ReadUtf8: %dth char is not ASCII.\n", charIndex) + return false; + } + + // Narrow U16 to U8 in the same buffer + bufferU8[charIndex] = (uint8_t)ch; + } + + DebugInfo debugInfo = stmt->GetDebugInfo(); + + // Split block right before the call tree (this is a standard pattern we use in helperexpansion.cpp) + BasicBlock* prevBb = block; + GenTree** callUse = nullptr; + Statement* newFirstStmt = nullptr; + block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); + assert(prevBb != nullptr && block != nullptr); + *pBlock = block; + + // If we suddenly need to use these arguments, we'll have to reload them from the call + // after the split, so let's null them to prevent accidental use. + srcLen = nullptr; + srcPtr = nullptr; + + // Block ops inserted by the split need to be morphed here since we are after morph. + // We cannot morph stmt yet as we may modify it further below, and the morphing + // could invalidate callUse + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + + // We don't need this flag anymore. + call->gtCallMoreFlags &= ~GTF_CALL_M_SPECIAL_INTRINSIC; + + // Grab a temp to store the result. + // The result corresponds the number of bytes written to dstPtr (int32). + assert(call->TypeIs(TYP_INT)); + const unsigned resultLclNum = lvaGrabTemp(true DEBUGARG("local for result")); + lvaTable[resultLclNum].lvType = TYP_INT; + *callUse = gtNewLclvNode(resultLclNum, TYP_INT); + fgMorphStmtBlockOps(block, stmt); + gtUpdateStmtSideEffects(stmt); + + // srcLenCns is the length of the string literal in chars (UTF16) + // but we're going to use the same value as the "bytesWritten" result in the fast path and in the length check. + GenTree* srcLenCnsNode = gtNewIconNode(srcLenCns); + fgValueNumberTreeConst(srcLenCnsNode); + + // We're going to insert the following blocks: + // + // prevBb: + // + // lengthCheckBb: + // bytesWritten = -1; + // if (dstLen + // bytesWritten = srcLenCns * 2; + // + // block: + // use(bytesWritten) + // + + // + // Block 1: lengthCheckBb (we check that dstLen < srcLen) + // + BasicBlock* lengthCheckBb = fgNewBBafter(BBJ_COND, prevBb, true); + lengthCheckBb->bbFlags |= BBF_INTERNAL; + + // Set bytesWritten -1 by default, if the fast path is not taken we'll return it as the result. + GenTree* bytesWrittenDefaultVal = gtNewStoreLclVarNode(resultLclNum, gtNewIconNode(-1)); + fgInsertStmtAtEnd(lengthCheckBb, fgNewStmtFromTree(bytesWrittenDefaultVal, debugInfo)); + + GenTree* dstLen = call->gtArgs.GetUserArgByIndex(3)->GetNode(); + GenTree* lengthCheck = gtNewOperNode(GT_LT, TYP_INT, gtCloneExpr(dstLen), srcLenCnsNode); + lengthCheck->gtFlags |= GTF_RELOP_JMP_USED; + Statement* lengthCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, lengthCheck), debugInfo); + fgInsertStmtAtEnd(lengthCheckBb, lengthCheckStmt); + lengthCheckBb->bbCodeOffs = block->bbCodeOffsEnd; + lengthCheckBb->bbCodeOffsEnd = block->bbCodeOffsEnd; + + // + // Block 2: fastpathBb - unrolled loop that copies the UTF8 const data to the destination + // + // We're going to emit a series of loads and stores to copy the data. + // In theory, we could just emit the const U8 data to the data section and use GT_BLK here + // but that would be a bit less efficient since we would have to load the data from memory. + // + BasicBlock* fastpathBb = fgNewBBafter(BBJ_NONE, lengthCheckBb, true); + fastpathBb->bbFlags |= BBF_INTERNAL; + + // The widest type we can use for loads + const var_types maxLoadType = roundDownMaxType(srcLenCns); + assert(genTypeSize(maxLoadType) > 0); + + // How many iterations we need to copy UTF8 const data to the destination + unsigned iterations = srcLenCns / genTypeSize(maxLoadType); + + // Add one more iteration if we have a remainder + iterations += (srcLenCns % genTypeSize(maxLoadType) == 0) ? 0 : 1; + + GenTree* dstPtr = call->gtArgs.GetUserArgByIndex(2)->GetNode(); + for (unsigned i = 0; i < iterations; i++) + { + ssize_t offset = (ssize_t)i * genTypeSize(maxLoadType); + + // Last iteration: overlap with previous load if needed + if (i == iterations - 1) + { + offset = (ssize_t)srcLenCns - genTypeSize(maxLoadType); + } + + // We're going to emit the following tree (in case of SIMD16 load): + // + // -A-XG------ * STOREIND simd16 (copy) + // -------N--- +--* ADD byref + // ----------- | +--* LCL_VAR byref + // ----------- | \--* CNS_INT int + // ----------- \--* CNS_VEC simd16 + + GenTreeIntCon* offsetNode = gtNewIconNode(offset, TYP_I_IMPL); + fgValueNumberTreeConst(offsetNode); + + // Grab a chunk from srcUtf8cnsData for the given offset and width + GenTree* utf8cnsChunkNode = gtNewGenericCon(maxLoadType, bufferU8 + offset); + fgValueNumberTreeConst(utf8cnsChunkNode); + + GenTree* dstAddOffsetNode = gtNewOperNode(GT_ADD, dstPtr->TypeGet(), gtCloneExpr(dstPtr), offsetNode); + GenTreeOp* storeInd = gtNewStoreIndNode(maxLoadType, dstAddOffsetNode, utf8cnsChunkNode); + fgInsertStmtAtEnd(fastpathBb, fgNewStmtFromTree(storeInd, debugInfo)); + } + + // Finally, store the number of bytes written to the resultLcl local + Statement* finalStmt = fgNewStmtFromTree(gtNewStoreLclVarNode(resultLclNum, gtCloneExpr(srcLenCnsNode)), debugInfo); + fgInsertStmtAtEnd(fastpathBb, finalStmt); + fastpathBb->bbCodeOffs = block->bbCodeOffsEnd; + fastpathBb->bbCodeOffsEnd = block->bbCodeOffsEnd; + + // + // Update preds in all new blocks + // + // block is no longer a predecessor of prevBb + fgRemoveRefPred(block, prevBb); + // prevBb flows into lengthCheckBb + fgAddRefPred(lengthCheckBb, prevBb); + // lengthCheckBb has two successors: block and fastpathBb + fgAddRefPred(fastpathBb, lengthCheckBb); + fgAddRefPred(block, lengthCheckBb); + // fastpathBb flows into block + fgAddRefPred(block, fastpathBb); + // lengthCheckBb jumps to block if condition is met + lengthCheckBb->bbJumpDest = block; + + // + // Re-distribute weights + // + lengthCheckBb->inheritWeight(prevBb); + fastpathBb->inheritWeight(lengthCheckBb); + block->inheritWeight(prevBb); + + // + // Update bbNatLoopNum for all new blocks + // + lengthCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fastpathBb->bbNatLoopNum = prevBb->bbNatLoopNum; + + // All blocks are expected to be in the same EH region + assert(BasicBlock::sameEHRegion(prevBb, block)); + assert(BasicBlock::sameEHRegion(prevBb, lengthCheckBb)); + assert(BasicBlock::sameEHRegion(prevBb, fastpathBb)); + + // Extra step: merge prevBb with lengthCheckBb if possible + if (fgCanCompactBlocks(prevBb, lengthCheckBb)) + { + fgCompactBlocks(prevBb, lengthCheckBb); + } + + JITDUMP("ReadUtf8: succesfully expanded!\n") + return true; +} diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index df1df1c881c73..9bb22a8289fe6 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3768,6 +3768,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } + case NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8: case NI_System_SpanHelpers_SequenceEqual: case NI_System_Buffer_Memmove: { @@ -9185,6 +9186,17 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } } + else if (strcmp(namespaceName, "Text") == 0) + { + if (strcmp(className, "UTF8EncodingSealed") == 0) + { + if (strcmp(methodName, "ReadUtf8") == 0) + { + assert(strcmp(enclosingClassName, "UTF8Encoding") == 0); + result = NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8; + } + } + } else if (strcmp(namespaceName, "Threading") == 0) { if (strcmp(className, "Interlocked") == 0) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3babf06e54161..4372355ee4084 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7712,6 +7712,16 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) #endif } + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0) + { + if (lookupNamedIntrinsic(call->AsCall()->gtCallMethHnd) == + NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8) + { + // Expanded in fgVNBasedIntrinsicExpansion + setMethodHasSpecialIntrinsics(); + } + } + if (((call->gtCallMoreFlags & (GTF_CALL_M_SPECIAL_INTRINSIC | GTF_CALL_M_LDVIRTFTN_INTERFACE)) == 0) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR) #ifdef FEATURE_READYTORUN diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 86c7d445dabba..0973a45571f49 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -66,6 +66,9 @@ enum NamedIntrinsic : unsigned short NI_System_Buffers_Binary_BinaryPrimitives_ReverseEndianness, NI_System_GC_KeepAlive, + + NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8, + NI_System_Threading_Thread_get_CurrentThread, NI_System_Threading_Thread_get_ManagedThreadId, NI_System_Threading_Volatile_Read, diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 0cb86cbd04e75..ccf8588c16b41 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -10665,7 +10665,6 @@ static bool GetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, s // the given tree. // // Arguments: -// vnStore - ValueNumStore object // tree - tree node to inspect // byteOffset - [Out] resulting byte offset // pObj - [Out] constant object handle @@ -10673,10 +10672,7 @@ static bool GetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, s // Return Value: // true if the given tree is a ObjHandle + CNS // -static bool GetObjectHandleAndOffset(ValueNumStore* vnStore, - GenTree* tree, - ssize_t* byteOffset, - CORINFO_OBJECT_HANDLE* pObj) +bool Compiler::GetObjectHandleAndOffset(GenTree* tree, ssize_t* byteOffset, CORINFO_OBJECT_HANDLE* pObj) { if (!tree->gtVNPair.BothEqual()) { @@ -10763,7 +10759,7 @@ bool Compiler::fgValueNumberConstLoad(GenTreeIndir* tree) } } else if (!tree->TypeIs(TYP_REF, TYP_BYREF, TYP_STRUCT) && - GetObjectHandleAndOffset(vnStore, tree->gtGetOp1(), &byteOffset, &obj)) + GetObjectHandleAndOffset(tree->gtGetOp1(), &byteOffset, &obj)) { // See if we can fold IND(ADD(FrozenObj, CNS)) to a constant assert(obj != nullptr); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs index dc5f36deca8de..31f61b16ee10e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace System.Text { @@ -147,12 +149,25 @@ private unsafe string GetStringForSmallInput(byte[] bytes) return new string(new ReadOnlySpan(ref *pDestination, charsWritten)); // this overload of ROS ctor doesn't validate length } - // TODO: Make this [Intrinsic] and handle JIT-time UTF8 encoding of literal `chars`. /// - public override unsafe bool TryGetBytes(ReadOnlySpan chars, Span bytes, out int bytesWritten) + public override bool TryGetBytes(ReadOnlySpan chars, Span bytes, out int bytesWritten) { return base.TryGetBytes(chars, bytes, out bytesWritten); } + + [MethodImpl(MethodImplOptions.NoInlining)] + [Intrinsic] // Can be unrolled by JIT + internal static unsafe int ReadUtf8(ref char input, int inputLength, ref byte output, int outputLength) + { + fixed (char* pInput = &input) + fixed (byte* pOutput = &output) + { + return s_default.GetBytesCommon( + pInput, inputLength, + pOutput, outputLength, + throwForDestinationOverflow: false); + } + } } } } diff --git a/src/tests/JIT/opt/Vectorization/ReadUtf8.cs b/src/tests/JIT/opt/Vectorization/ReadUtf8.cs new file mode 100644 index 0000000000000..957f6b5b13c7d --- /dev/null +++ b/src/tests/JIT/opt/Vectorization/ReadUtf8.cs @@ -0,0 +1,245 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading; +using Xunit; + +public class ReadUtf8 +{ + [Fact] + public static int TestEntryPoint() + { + // Warm up for PGO + for (int i=0; i<200; i++) + { + Test_empty(); + Test_hello(); + Test_CJK(); + Test_SIMD(); + Thread.Sleep(10); + } + return 100; + } + + static void Test_empty() + { + byte[] bytes = new byte[100]; + int bytesWritten = 0; + + Span span = bytes.AsSpan(0, 6); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(0, bytesWritten); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 0); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(0, bytesWritten); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_5(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("", buffer, out bytesWritten); + } + + static void Test_hello() + { + byte[] bytes = new byte[100]; + int bytesWritten = 0; + + Span span = bytes.AsSpan(0, 6); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(5, bytesWritten); + AssertIsTrue(span.SequenceEqual("hello\0"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 5); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(5, bytesWritten); + AssertIsTrue(span.SequenceEqual("hello"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 1); + AssertIsTrue(!TryGetBytes_5(span, out bytesWritten)); + AssertEquals(0, bytesWritten); + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 0); + AssertIsTrue(!TryGetBytes_5(span, out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(span.SequenceEqual(""u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_5(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("hello", buffer, out bytesWritten); + } + + static void Test_CJK() + { + byte[] bytes = new byte[100]; + int bytesWritten = 0; + + Span span = bytes.AsSpan(0, 3); + AssertIsTrue(TryGetBytes_5(span, out bytesWritten)); + AssertEquals(3, bytesWritten); + AssertIsTrue(span.SequenceEqual(new byte[] { 0xE9, 0x89, 0x84 })); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_5(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("\u9244", buffer, out bytesWritten); + } + + static void Test_SIMD() + { + byte[] bytes = new byte[1024]; + int bytesWritten = 0; + + Span span = bytes.AsSpan(0, 15); + AssertIsTrue(TryGetBytes_15(span, out bytesWritten)); + AssertEquals(15, bytesWritten); + AssertIsTrue(span.SequenceEqual("000011112222333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 16); + AssertIsTrue(TryGetBytes_16(span, out bytesWritten)); + AssertEquals(16, bytesWritten); + AssertIsTrue(span.SequenceEqual("0000111122223333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 31); + AssertIsTrue(TryGetBytes_31(span, out bytesWritten)); + AssertEquals(31, bytesWritten); + AssertIsTrue(span.SequenceEqual("0000111122223333000011112222333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 32); + AssertIsTrue(TryGetBytes_32(span, out bytesWritten)); + AssertEquals(32, bytesWritten); + AssertIsTrue(span.SequenceEqual("00001111222233330000111122223333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 64); + AssertIsTrue(TryGetBytes_64(span, out bytesWritten)); + AssertEquals(64, bytesWritten); + AssertIsTrue(span.SequenceEqual("0000111122223333000011112222333300001111222233330000111122223333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 128); + AssertIsTrue(TryGetBytes_128(span, out bytesWritten)); + AssertEquals(128, bytesWritten); + AssertIsTrue(span.SequenceEqual("00001111222233330000111122223333000011112222333300001111222233330000111122223333000011112222333300001111222233330000111122223333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + // Reset data + bytesWritten = 0; + bytes.AsSpan().Clear(); + + span = bytes.AsSpan(0, 31); + AssertIsTrue(TryGetBytes_31(span, out bytesWritten)); + AssertEquals(31, bytesWritten); + AssertIsTrue(span.SequenceEqual("0000111122223333000011112222333"u8)); + IsEmpty(bytes.AsSpan(span.Length)); // the rest is untouched + + + span = bytes.AsSpan(); + AssertIsTrue(!TryGetBytes_16(span.Slice(0, 15), out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(!TryGetBytes_31(span.Slice(0, 30), out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(!TryGetBytes_32(span.Slice(0, 31), out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(!TryGetBytes_64(span.Slice(0, 63), out bytesWritten)); + AssertEquals(0, bytesWritten); + AssertIsTrue(!TryGetBytes_128(span.Slice(0, 127), out bytesWritten)); + AssertEquals(0, bytesWritten); + + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_15(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("000011112222333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_16(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("0000111122223333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_31(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("0000111122223333000011112222333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_32(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("00001111222233330000111122223333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_64(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("0000111122223333000011112222333300001111222233330000111122223333", buffer, out bytesWritten); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool TryGetBytes_128(Span buffer, out int bytesWritten) => + Encoding.UTF8.TryGetBytes("00001111222233330000111122223333000011112222333300001111222233330000111122223333000011112222333300001111222233330000111122223333", buffer, out bytesWritten); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void AssertIsTrue(bool value) + { + if (!value) + throw new Exception(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void AssertEquals(int actual, int expected) + { + if (expected != actual) + throw new Exception($"{actual} != {expected}"); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void IsEmpty(Span span) + { + foreach (byte item in span) + { + if (item != 0) + throw new Exception($"{item} != 0"); + } + } +} diff --git a/src/tests/JIT/opt/Vectorization/ReadUtf8.csproj b/src/tests/JIT/opt/Vectorization/ReadUtf8.csproj new file mode 100644 index 0000000000000..7cabb6617c4c8 --- /dev/null +++ b/src/tests/JIT/opt/Vectorization/ReadUtf8.csproj @@ -0,0 +1,10 @@ + + + True + + + + + + +