Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Perf] Windows/x64: 11 Improvements on 12/28/2022 6:20:56 PM #11284

Closed
performanceautofiler bot opened this issue Jan 3, 2023 · 2 comments
Closed

[Perf] Windows/x64: 11 Improvements on 12/28/2022 6:20:56 PM #11284

performanceautofiler bot opened this issue Jan 3, 2023 · 2 comments

Comments

@performanceautofiler
Copy link

performanceautofiler bot commented Jan 3, 2023

Run Information

Architecture x64
OS Windows 10.0.18362
Baseline 8c58fc2347820ce48e09605d8adddb993df9ebb5
Compare 1d15f2140f7eb30a976c66290491ec89cd628da0
Diff Diff

Improvements in System.Numerics.Tests.Perf_Matrix4x4

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
CreateLookAtBenchmark - Duration of single invocation 31.56 ns 29.75 ns 0.94 0.14 False 149.18093115744372 138.9753481735635 0.9315892258836395 Trace Trace
CreateBillboardBenchmark - Duration of single invocation 23.04 ns 20.53 ns 0.89 0.11 False 116.78119720330676 106.01327812553706 0.907794068431893 Trace Trace
CreateWorldBenchmark - Duration of single invocation 23.81 ns 20.35 ns 0.85 0.09 False 119.7388710165708 110.01858998207872 0.9188210064787827 Trace Trace
CreateConstrainedBillboardBenchmark - Duration of single invocation 30.75 ns 29.19 ns 0.95 0.01 False Trace Trace

graph
graph
graph
graph
Test Report

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Numerics.Tests.Perf_Matrix4x4*'

Payloads

Baseline
Compare

Histogram

System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 29.745970644173973 < 30.579009425098956.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 8.03377540262315 (T) = (0 -29.786236112966343) / Math.Sqrt((0.6142171521206813 / (299)) + (1.2209179767904108 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.07457292595103529 = (32.18647578857234 - 29.786236112966343) / 32.18647578857234 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark()
       push      rsi
       sub       rsp,50
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFB23F432E0]
       vmovupd   xmm1,[7FFB23F432F0]
       vmovapd   [rsp+40],xmm0
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+30],xmm0
       vmovapd   [rsp+20],xmm1
       mov       rcx,rsi
       lea       rdx,[rsp+40]
       lea       r8,[rsp+30]
       lea       r9,[rsp+20]
       call      qword ptr [7FFB2468B1F8]; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,50
       pop       rsi
       ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       push      rdi
       push      rsi
       sub       rsp,0C8
       vzeroupper
       vmovaps   [rsp+0B0],xmm6
       vmovaps   [rsp+0A0],xmm7
       vmovaps   [rsp+90],xmm8
       vmovaps   [rsp+80],xmm9
       vmovaps   [rsp+70],xmm10
       vmovaps   [rsp+60],xmm11
       mov       rdi,rcx
       mov       rsi,rdx
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm6,xmm0,4
       vmovss    xmm0,dword ptr [r9+8]
       vmovsd    xmm1,qword ptr [r9]
       vshufps   xmm1,xmm1,xmm0,44
       vmovshdup xmm0,xmm1
       vunpckhps xmm7,xmm6,xmm6
       vmulss    xmm2,xmm0,xmm7
       vunpckhps xmm3,xmm1,xmm1
       vmovshdup xmm8,xmm6
       vmulss    xmm4,xmm3,xmm8
       vsubss    xmm2,xmm2,xmm4
       vmovaps   xmm9,xmm6
       vmulss    xmm3,xmm3,xmm9
       vmulss    xmm4,xmm1,xmm7
       vsubss    xmm3,xmm3,xmm4
       vmulss    xmm1,xmm1,xmm8
       vmulss    xmm0,xmm0,xmm9
       vsubss    xmm0,xmm1,xmm0
       vxorps    xmm1,xmm1,xmm1
       vmovss    xmm1,xmm1,xmm0
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm3
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm2
       vmovaps   xmm0,xmm1
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm10,xmm0,4
       vunpckhps xmm0,xmm10,xmm10
       vmulss    xmm0,xmm8,xmm0
       vmovshdup xmm1,xmm10
       vmulss    xmm1,xmm7,xmm1
       vsubss    xmm0,xmm0,xmm1
       vmovaps   xmm1,xmm10
       vmulss    xmm1,xmm7,xmm1
       vunpckhps xmm2,xmm10,xmm10
       vmulss    xmm2,xmm9,xmm2
       vsubss    xmm1,xmm1,xmm2
       vmovshdup xmm2,xmm10
       vmulss    xmm2,xmm9,xmm2
       vmovaps   xmm3,xmm10
       vmulss    xmm3,xmm8,xmm3
       vsubss    xmm2,xmm2,xmm3
       vxorps    xmm3,xmm3,xmm3
       vmovss    xmm3,xmm3,xmm2
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm1
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm0
       vmovaps   xmm11,xmm3
       lea       rcx,[rsp+20]
       call      qword ptr [7FFB2468B030]; System.Numerics.Matrix4x4.get_Identity()
       vmovaps   xmm0,xmm10
       vmovss    dword ptr [rsp+20],xmm0
       vmovaps   xmm0,xmm11
       vmovss    dword ptr [rsp+24],xmm0
       vmovss    dword ptr [rsp+28],xmm9
       vmovshdup xmm0,xmm10
       vmovss    dword ptr [rsp+30],xmm0
       vmovshdup xmm0,xmm11
       vmovss    dword ptr [rsp+34],xmm0
       vmovss    dword ptr [rsp+38],xmm8
       vunpckhps xmm0,xmm10,xmm10
       vmovss    dword ptr [rsp+40],xmm0
       vunpckhps xmm0,xmm11,xmm11
       vmovss    dword ptr [rsp+44],xmm0
       vmovss    dword ptr [rsp+48],xmm7
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm10,xmm1,71
       vxorps    xmm0,xmm0,[7FFB23F43640]
       vmovss    dword ptr [rsp+50],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm11,xmm1,71
       vxorps    xmm0,xmm0,[7FFB23F43640]
       vmovss    dword ptr [rsp+54],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm6,xmm1,71
       vxorps    xmm0,xmm0,[7FFB23F43640]
       vmovss    dword ptr [rsp+58],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdi],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+40]
       vmovdqu   ymmword ptr [rdi+20],ymm0
       mov       rax,rdi
       vmovaps   xmm6,[rsp+0B0]
       vmovaps   xmm7,[rsp+0A0]
       vmovaps   xmm8,[rsp+90]
       vmovaps   xmm9,[rsp+80]
       vmovaps   xmm10,[rsp+70]
       vmovaps   xmm11,[rsp+60]
       add       rsp,0C8
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 639

Compare Jit Disasm

; System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark()
       push      rsi
       sub       rsp,50
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFC51CB32A0]
       vmovapd   [rsp+40],xmm0
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+30],xmm0
       vmovupd   xmm0,[7FFC51CB32B0]
       vmovapd   [rsp+20],xmm0
       mov       rcx,rsi
       lea       rdx,[rsp+40]
       lea       r8,[rsp+30]
       lea       r9,[rsp+20]
       call      qword ptr [7FFC523FB1F8]; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,50
       pop       rsi
       ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       push      rdi
       push      rsi
       sub       rsp,0C8
       vzeroupper
       vmovaps   [rsp+0B0],xmm6
       vmovaps   [rsp+0A0],xmm7
       vmovaps   [rsp+90],xmm8
       vmovaps   [rsp+80],xmm9
       vmovaps   [rsp+70],xmm10
       vmovaps   [rsp+60],xmm11
       mov       rdi,rcx
       mov       rsi,rdx
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm6,xmm0,4
       vmovss    xmm0,dword ptr [r9+8]
       vmovsd    xmm1,qword ptr [r9]
       vshufps   xmm1,xmm1,xmm0,44
       vmovshdup xmm0,xmm1
       vunpckhps xmm7,xmm6,xmm6
       vmulss    xmm2,xmm0,xmm7
       vunpckhps xmm3,xmm1,xmm1
       vmovshdup xmm8,xmm6
       vmulss    xmm4,xmm3,xmm8
       vsubss    xmm2,xmm2,xmm4
       vmovaps   xmm9,xmm6
       vmulss    xmm3,xmm3,xmm9
       vmulss    xmm4,xmm1,xmm7
       vsubss    xmm3,xmm3,xmm4
       vinsertps xmm2,xmm2,xmm3,10
       vmulss    xmm1,xmm1,xmm8
       vmulss    xmm0,xmm0,xmm9
       vsubss    xmm0,xmm1,xmm0
       vinsertps xmm0,xmm2,xmm0,28
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm10,xmm0,4
       vunpckhps xmm0,xmm10,xmm10
       vmulss    xmm0,xmm8,xmm0
       vmovshdup xmm1,xmm10
       vmulss    xmm1,xmm7,xmm1
       vsubss    xmm0,xmm0,xmm1
       vmovaps   xmm1,xmm10
       vmulss    xmm1,xmm7,xmm1
       vunpckhps xmm2,xmm10,xmm10
       vmulss    xmm2,xmm9,xmm2
       vsubss    xmm1,xmm1,xmm2
       vinsertps xmm0,xmm0,xmm1,10
       vmovshdup xmm1,xmm10
       vmulss    xmm1,xmm9,xmm1
       vmovaps   xmm2,xmm10
       vmulss    xmm2,xmm8,xmm2
       vsubss    xmm1,xmm1,xmm2
       vinsertps xmm11,xmm0,xmm1,28
       lea       rcx,[rsp+20]
       call      qword ptr [7FFC523FB030]; System.Numerics.Matrix4x4.get_Identity()
       vmovaps   xmm0,xmm10
       vmovss    dword ptr [rsp+20],xmm0
       vmovaps   xmm0,xmm11
       vmovss    dword ptr [rsp+24],xmm0
       vmovss    dword ptr [rsp+28],xmm9
       vmovshdup xmm0,xmm10
       vmovss    dword ptr [rsp+30],xmm0
       vmovshdup xmm0,xmm11
       vmovss    dword ptr [rsp+34],xmm0
       vmovss    dword ptr [rsp+38],xmm8
       vunpckhps xmm0,xmm10,xmm10
       vmovss    dword ptr [rsp+40],xmm0
       vunpckhps xmm0,xmm11,xmm11
       vmovss    dword ptr [rsp+44],xmm0
       vmovss    dword ptr [rsp+48],xmm7
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm10,xmm1,71
       vxorps    xmm0,xmm0,[7FFC51CB35E0]
       vmovss    dword ptr [rsp+50],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm11,xmm1,71
       vxorps    xmm0,xmm0,[7FFC51CB35E0]
       vmovss    dword ptr [rsp+54],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm6,xmm1,71
       vxorps    xmm0,xmm0,[7FFC51CB35E0]
       vmovss    dword ptr [rsp+58],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdi],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+40]
       vmovdqu   ymmword ptr [rdi+20],ymm0
       mov       rax,rdi
       vmovaps   xmm6,[rsp+0B0]
       vmovaps   xmm7,[rsp+0A0]
       vmovaps   xmm8,[rsp+90]
       vmovaps   xmm9,[rsp+80]
       vmovaps   xmm10,[rsp+70]
       vmovaps   xmm11,[rsp+60]
       add       rsp,0C8
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 603

System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 20.532278443711558 < 21.753609662882017.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 2.1484834712218572 (T) = (0 -21.392544886476852) / Math.Sqrt((0.49130840670645715 / (299)) + (7.969156026645255 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.07052986970187088 = (23.01584977197186 - 21.392544886476852) / 23.01584977197186 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark()
       push      rsi
       sub       rsp,70
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFE0B2C3360]
       vmovupd   xmm1,[7FFE0B2C3370]
       vmovupd   xmm2,[7FFE0B2C3380]
       vxorps    xmm3,xmm3,xmm3
       vmovapd   [rsp+60],xmm3
       vmovapd   [rsp+50],xmm0
       vmovapd   [rsp+40],xmm1
       vmovapd   [rsp+30],xmm2
       mov       rcx,rsi
       lea       rdx,[rsp+60]
       lea       r8,[rsp+50]
       lea       r9,[rsp+40]
       lea       rax,[rsp+30]
       mov       [rsp+20],rax
       call      qword ptr [7FFE0BA0B180]; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,70
       pop       rsi
       ret
; Total bytes of code 106
; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       sub       rsp,88
       vzeroupper
       vmovaps   [rsp+70],xmm6
       vmovaps   [rsp+60],xmm7
       vmovaps   [rsp+50],xmm8
       vmovaps   [rsp+40],xmm9
       vmovss    xmm0,dword ptr [rdx+8]
       vmovsd    xmm1,qword ptr [rdx]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm2,xmm0,xmm0,71
       vmovss    xmm3,dword ptr [7FFE0B2C3600]
       vucomiss  xmm3,xmm2
       jbe       short M01_L00
       mov       rax,[rsp+0B0]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm2,qword ptr [rax]
       vshufps   xmm2,xmm2,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm2
       jmp       short M01_L01
M01_L00:
       vsqrtss   xmm2,xmm2,xmm2
       vmovss    xmm3,dword ptr [7FFE0B2C3604]
       vdivss    xmm2,xmm3,xmm2
       vbroadcastss xmm2,xmm2
       vmulps    xmm0,xmm0,xmm2
M01_L01:
       vmovss    xmm2,dword ptr [r9+8]
       vmovsd    xmm3,qword ptr [r9]
       vshufps   xmm3,xmm3,xmm2,44
       vmovshdup xmm2,xmm3
       vunpckhps xmm4,xmm0,xmm0
       vmulss    xmm5,xmm2,xmm4
       vunpckhps xmm6,xmm3,xmm3
       vmovshdup xmm7,xmm0
       vmulss    xmm8,xmm6,xmm7
       vsubss    xmm5,xmm5,xmm8
       vmovaps   xmm8,xmm0
       vmulss    xmm6,xmm6,xmm8
       vmulss    xmm9,xmm3,xmm4
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm3,xmm3,xmm7
       vmulss    xmm2,xmm2,xmm8
       vsubss    xmm2,xmm3,xmm2
       vxorps    xmm3,xmm3,xmm3
       vmovss    xmm3,xmm3,xmm2
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm6
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm5
       vmovaps   xmm2,xmm3
       vdpps     xmm3,xmm2,xmm2,71
       vsqrtss   xmm3,xmm3,xmm3
       vbroadcastss xmm3,xmm3
       vdivps    xmm2,xmm2,xmm3
       vpslldq   xmm2,xmm2,4
       vpsrldq   xmm2,xmm2,4
       vunpckhps xmm3,xmm2,xmm2
       vmulss    xmm5,xmm7,xmm3
       vmovshdup xmm6,xmm2
       vmulss    xmm9,xmm4,xmm6
       vsubss    xmm5,xmm5,xmm9
       vmovaps   xmm9,xmm2
       vmulss    xmm4,xmm4,xmm9
       vmulss    xmm3,xmm8,xmm3
       vsubss    xmm3,xmm4,xmm3
       vmulss    xmm4,xmm8,xmm6
       vmulss    xmm6,xmm7,xmm9
       vsubss    xmm4,xmm4,xmm6
       vxorps    xmm6,xmm6,xmm6
       vmovss    xmm6,xmm6,xmm4
       vpslldq   xmm6,xmm6,4
       vmovss    xmm6,xmm6,xmm3
       vpslldq   xmm6,xmm6,4
       vmovss    xmm6,xmm6,xmm5
       vmovaps   xmm3,xmm6
       vmovsd    qword ptr [rsp],xmm2
       vpshufd   xmm4,xmm2,2
       vmovss    dword ptr [rsp+8],xmm4
       xor       eax,eax
       mov       [rsp+0C],eax
       vmovsd    qword ptr [rsp+10],xmm3
       vpshufd   xmm2,xmm3,2
       vmovss    dword ptr [rsp+18],xmm2
       mov       [rsp+1C],eax
       vmovsd    qword ptr [rsp+20],xmm0
       vpshufd   xmm2,xmm0,2
       vmovss    dword ptr [rsp+28],xmm2
       mov       [rsp+2C],eax
       vmovsd    qword ptr [rsp+30],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+38],xmm0
       mov       dword ptr [rsp+3C],3F800000
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       vmovaps   xmm6,[rsp+70]
       vmovaps   xmm7,[rsp+60]
       vmovaps   xmm8,[rsp+50]
       vmovaps   xmm9,[rsp+40]
       add       rsp,88
       ret
; Total bytes of code 505

Compare Jit Disasm

; System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark()
       push      rsi
       sub       rsp,70
       vzeroupper
       mov       rsi,rdx
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+60],xmm0
       vmovupd   xmm0,[7FFE2B6F3320]
       vmovapd   [rsp+50],xmm0
       vmovupd   xmm0,[7FFE2B6F3330]
       vmovapd   [rsp+40],xmm0
       vmovupd   xmm0,[7FFE2B6F3320]
       vmovapd   [rsp+30],xmm0
       mov       rcx,rsi
       lea       rdx,[rsp+60]
       lea       r8,[rsp+50]
       lea       r9,[rsp+40]
       lea       rax,[rsp+30]
       mov       [rsp+20],rax
       call      qword ptr [7FFE2BE3B180]; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,70
       pop       rsi
       ret
; Total bytes of code 106
; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       sub       rsp,88
       vzeroupper
       vmovaps   [rsp+70],xmm6
       vmovaps   [rsp+60],xmm7
       vmovaps   [rsp+50],xmm8
       vmovaps   [rsp+40],xmm9
       vmovss    xmm0,dword ptr [rdx+8]
       vmovsd    xmm1,qword ptr [rdx]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm2,xmm0,xmm0,71
       vmovss    xmm3,dword ptr [7FFE2B6F3588]
       vucomiss  xmm3,xmm2
       jbe       short M01_L00
       mov       rax,[rsp+0B0]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm2,qword ptr [rax]
       vshufps   xmm2,xmm2,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm2
       jmp       short M01_L01
M01_L00:
       vsqrtss   xmm2,xmm2,xmm2
       vmovss    xmm3,dword ptr [7FFE2B6F358C]
       vdivss    xmm2,xmm3,xmm2
       vbroadcastss xmm2,xmm2
       vmulps    xmm0,xmm0,xmm2
M01_L01:
       vmovss    xmm2,dword ptr [r9+8]
       vmovsd    xmm3,qword ptr [r9]
       vshufps   xmm3,xmm3,xmm2,44
       vmovshdup xmm2,xmm3
       vunpckhps xmm4,xmm0,xmm0
       vmulss    xmm5,xmm2,xmm4
       vunpckhps xmm6,xmm3,xmm3
       vmovshdup xmm7,xmm0
       vmulss    xmm8,xmm6,xmm7
       vsubss    xmm5,xmm5,xmm8
       vmovaps   xmm8,xmm0
       vmulss    xmm6,xmm6,xmm8
       vmulss    xmm9,xmm3,xmm4
       vsubss    xmm6,xmm6,xmm9
       vinsertps xmm5,xmm5,xmm6,10
       vmulss    xmm3,xmm3,xmm7
       vmulss    xmm2,xmm2,xmm8
       vsubss    xmm2,xmm3,xmm2
       vinsertps xmm2,xmm5,xmm2,28
       vdpps     xmm3,xmm2,xmm2,71
       vsqrtss   xmm3,xmm3,xmm3
       vbroadcastss xmm3,xmm3
       vdivps    xmm2,xmm2,xmm3
       vpslldq   xmm2,xmm2,4
       vpsrldq   xmm2,xmm2,4
       vunpckhps xmm3,xmm2,xmm2
       vmulss    xmm5,xmm7,xmm3
       vmovshdup xmm6,xmm2
       vmulss    xmm9,xmm4,xmm6
       vsubss    xmm5,xmm5,xmm9
       vmovaps   xmm9,xmm2
       vmulss    xmm4,xmm4,xmm9
       vmulss    xmm3,xmm8,xmm3
       vsubss    xmm3,xmm4,xmm3
       vinsertps xmm3,xmm5,xmm3,10
       vmulss    xmm4,xmm8,xmm6
       vmulss    xmm5,xmm7,xmm9
       vsubss    xmm4,xmm4,xmm5
       vinsertps xmm3,xmm3,xmm4,28
       vmovsd    qword ptr [rsp],xmm2
       vpshufd   xmm4,xmm2,2
       vmovss    dword ptr [rsp+8],xmm4
       xor       eax,eax
       mov       [rsp+0C],eax
       vmovsd    qword ptr [rsp+10],xmm3
       vpshufd   xmm2,xmm3,2
       vmovss    dword ptr [rsp+18],xmm2
       mov       [rsp+1C],eax
       vmovsd    qword ptr [rsp+20],xmm0
       vpshufd   xmm2,xmm0,2
       vmovss    dword ptr [rsp+28],xmm2
       mov       [rsp+2C],eax
       vmovsd    qword ptr [rsp+30],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+38],xmm0
       mov       dword ptr [rsp+3C],3F800000
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       vmovaps   xmm6,[rsp+70]
       vmovaps   xmm7,[rsp+60]
       vmovaps   xmm8,[rsp+50]
       vmovaps   xmm9,[rsp+40]
       add       rsp,88
       ret
; Total bytes of code 469

System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 20.347633209236008 < 22.398269254881626.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 24.12215320316142 (T) = (0 -20.914799837469435) / Math.Sqrt((0.4263786384621453 / (299)) + (0.15510467445082196 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.11424016351104417 = (23.612269348735833 - 20.914799837469435) / 23.612269348735833 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark()
       push      rsi
       sub       rsp,50
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFAA94532E0]
       vmovupd   xmm1,[7FFAA94532F0]
       vxorps    xmm2,xmm2,xmm2
       vmovapd   [rsp+40],xmm2
       vmovapd   [rsp+30],xmm0
       vmovapd   [rsp+20],xmm1
       mov       rcx,rsi
       lea       rdx,[rsp+40]
       lea       r8,[rsp+30]
       lea       r9,[rsp+20]
       call      qword ptr [7FFAA9B9B408]; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,50
       pop       rsi
       ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       push      rdi
       push      rsi
       sub       rsp,98
       vzeroupper
       vmovaps   [rsp+80],xmm6
       vmovaps   [rsp+70],xmm7
       vmovaps   [rsp+60],xmm8
       mov       rdi,rcx
       mov       rsi,rdx
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm1,qword ptr [r8]
       vshufps   xmm1,xmm1,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm1
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm6,xmm0,4
       vmovss    xmm0,dword ptr [r9+8]
       vmovsd    xmm1,qword ptr [r9]
       vshufps   xmm1,xmm1,xmm0,44
       vmovshdup xmm0,xmm1
       vunpckhps xmm2,xmm6,xmm6
       vmulss    xmm3,xmm0,xmm2
       vunpckhps xmm4,xmm1,xmm1
       vmovshdup xmm5,xmm6
       vmulss    xmm7,xmm4,xmm5
       vsubss    xmm3,xmm3,xmm7
       vmovaps   xmm7,xmm6
       vmulss    xmm4,xmm4,xmm7
       vmulss    xmm8,xmm1,xmm2
       vsubss    xmm4,xmm4,xmm8
       vmulss    xmm1,xmm1,xmm5
       vmulss    xmm0,xmm0,xmm7
       vsubss    xmm0,xmm1,xmm0
       vxorps    xmm1,xmm1,xmm1
       vmovss    xmm1,xmm1,xmm0
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm4
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm3
       vmovaps   xmm0,xmm1
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm8,xmm0,4
       vunpckhps xmm0,xmm8,xmm8
       vmulss    xmm1,xmm5,xmm0
       vmovshdup xmm3,xmm8
       vmulss    xmm4,xmm2,xmm3
       vsubss    xmm1,xmm1,xmm4
       vmovaps   xmm4,xmm8
       vmulss    xmm2,xmm2,xmm4
       vmulss    xmm0,xmm7,xmm0
       vsubss    xmm0,xmm2,xmm0
       vmulss    xmm2,xmm7,xmm3
       vmulss    xmm3,xmm5,xmm4
       vsubss    xmm2,xmm2,xmm3
       vxorps    xmm3,xmm3,xmm3
       vmovss    xmm3,xmm3,xmm2
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm0
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm1
       vmovaps   xmm7,xmm3
       lea       rcx,[rsp+20]
       call      qword ptr [7FFAA9B9B030]; System.Numerics.Matrix4x4.get_Identity()
       vmovsd    qword ptr [rsp+20],xmm8
       vpshufd   xmm0,xmm8,2
       vmovss    dword ptr [rsp+28],xmm0
       vmovsd    qword ptr [rsp+30],xmm7
       vpshufd   xmm0,xmm7,2
       vmovss    dword ptr [rsp+38],xmm0
       vmovsd    qword ptr [rsp+40],xmm6
       vpshufd   xmm0,xmm6,2
       vmovss    dword ptr [rsp+48],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vmovsd    qword ptr [rsp+50],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+58],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdi],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+40]
       vmovdqu   ymmword ptr [rdi+20],ymm0
       mov       rax,rdi
       vmovaps   xmm6,[rsp+80]
       vmovaps   xmm7,[rsp+70]
       vmovaps   xmm8,[rsp+60]
       add       rsp,98
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 454

Compare Jit Disasm

; System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark()
       push      rsi
       sub       rsp,50
       vzeroupper
       mov       rsi,rdx
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+40],xmm0
       vmovupd   xmm0,[7FF93C0332A0]
       vmovapd   [rsp+30],xmm0
       vmovupd   xmm0,[7FF93C0332B0]
       vmovapd   [rsp+20],xmm0
       mov       rcx,rsi
       lea       rdx,[rsp+40]
       lea       r8,[rsp+30]
       lea       r9,[rsp+20]
       call      qword ptr [7FF93C77B408]; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,50
       pop       rsi
       ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       push      rdi
       push      rsi
       sub       rsp,98
       vzeroupper
       vmovaps   [rsp+80],xmm6
       vmovaps   [rsp+70],xmm7
       vmovaps   [rsp+60],xmm8
       mov       rdi,rcx
       mov       rsi,rdx
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm1,qword ptr [r8]
       vshufps   xmm1,xmm1,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm1
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm6,xmm0,4
       vmovss    xmm0,dword ptr [r9+8]
       vmovsd    xmm1,qword ptr [r9]
       vshufps   xmm1,xmm1,xmm0,44
       vmovshdup xmm0,xmm1
       vunpckhps xmm2,xmm6,xmm6
       vmulss    xmm3,xmm0,xmm2
       vunpckhps xmm4,xmm1,xmm1
       vmovshdup xmm5,xmm6
       vmulss    xmm7,xmm4,xmm5
       vsubss    xmm3,xmm3,xmm7
       vmovaps   xmm7,xmm6
       vmulss    xmm4,xmm4,xmm7
       vmulss    xmm8,xmm1,xmm2
       vsubss    xmm4,xmm4,xmm8
       vinsertps xmm3,xmm3,xmm4,10
       vmulss    xmm1,xmm1,xmm5
       vmulss    xmm0,xmm0,xmm7
       vsubss    xmm0,xmm1,xmm0
       vinsertps xmm0,xmm3,xmm0,28
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm8,xmm0,4
       vunpckhps xmm0,xmm8,xmm8
       vmulss    xmm1,xmm5,xmm0
       vmovshdup xmm3,xmm8
       vmulss    xmm4,xmm2,xmm3
       vsubss    xmm1,xmm1,xmm4
       vmovaps   xmm4,xmm8
       vmulss    xmm2,xmm2,xmm4
       vmulss    xmm0,xmm7,xmm0
       vsubss    xmm0,xmm2,xmm0
       vinsertps xmm0,xmm1,xmm0,10
       vmulss    xmm1,xmm7,xmm3
       vmulss    xmm2,xmm5,xmm4
       vsubss    xmm1,xmm1,xmm2
       vinsertps xmm7,xmm0,xmm1,28
       lea       rcx,[rsp+20]
       call      qword ptr [7FF93C77B030]; System.Numerics.Matrix4x4.get_Identity()
       vmovsd    qword ptr [rsp+20],xmm8
       vpshufd   xmm0,xmm8,2
       vmovss    dword ptr [rsp+28],xmm0
       vmovsd    qword ptr [rsp+30],xmm7
       vpshufd   xmm0,xmm7,2
       vmovss    dword ptr [rsp+38],xmm0
       vmovsd    qword ptr [rsp+40],xmm6
       vpshufd   xmm0,xmm6,2
       vmovss    dword ptr [rsp+48],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vmovsd    qword ptr [rsp+50],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+58],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdi],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+40]
       vmovdqu   ymmword ptr [rdi+20],ymm0
       mov       rax,rdi
       vmovaps   xmm6,[rsp+80]
       vmovaps   xmm7,[rsp+70]
       vmovaps   xmm8,[rsp+60]
       add       rsp,98
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 418

System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 29.194004052835457 < 29.32247219066154.
IsChangePoint: Marked as a change because one of 10/31/2022 10:23:25 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 14.512061347326483 (T) = (0 -29.236423889734663) / Math.Sqrt((3.323298949304289 / (299)) + (0.06780201896632884 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.05900355398482968 = (31.069643263310823 - 29.236423889734663) / 31.069643263310823 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark()
       push      rsi
       sub       rsp,80
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFD3CDD33A0]
       vmovupd   xmm1,[7FFD3CDD33B0]
       vmovupd   xmm2,[7FFD3CDD33C0]
       vmovupd   xmm3,[7FFD3CDD33D0]
       vxorps    xmm4,xmm4,xmm4
       vmovapd   [rsp+70],xmm4
       vmovapd   [rsp+60],xmm0
       vmovapd   [rsp+50],xmm1
       vmovapd   [rsp+40],xmm2
       vmovapd   [rsp+30],xmm3
       mov       rcx,rsi
       lea       rdx,[rsp+70]
       lea       r8,[rsp+60]
       lea       r9,[rsp+50]
       lea       rax,[rsp+40]
       mov       [rsp+20],rax
       lea       rax,[rsp+30]
       mov       [rsp+28],rax
       call      qword ptr [7FFD3D51B198]; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,80
       pop       rsi
       ret
; Total bytes of code 136
; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       sub       rsp,88
       vzeroupper
       vmovaps   [rsp+70],xmm6
       vmovaps   [rsp+60],xmm7
       vmovaps   [rsp+50],xmm8
       vmovaps   [rsp+40],xmm9
       vmovss    xmm0,dword ptr [rdx+8]
       vmovsd    xmm1,qword ptr [rdx]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm2,xmm0,xmm0,71
       vmovss    xmm3,dword ptr [7FFD3CDD3840]
       vucomiss  xmm3,xmm2
       jbe       short M01_L00
       mov       rax,[rsp+0B0]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm2,qword ptr [rax]
       vshufps   xmm2,xmm2,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm2
       jmp       short M01_L01
M01_L00:
       vsqrtss   xmm2,xmm2,xmm2
       vmovss    xmm3,dword ptr [7FFD3CDD3830]
       vdivss    xmm2,xmm3,xmm2
       vbroadcastss xmm2,xmm2
       vmulps    xmm0,xmm0,xmm2
M01_L01:
       vmovss    xmm2,dword ptr [r9+8]
       vmovsd    xmm3,qword ptr [r9]
       vshufps   xmm3,xmm3,xmm2,44
       vmovaps   xmm2,xmm3
       vdpps     xmm4,xmm3,xmm0,71
       vandps    xmm4,xmm4,[7FFD3CDD3850]
       vmovss    xmm5,dword ptr [7FFD3CDD3860]
       vucomiss  xmm4,xmm5
       jbe       near ptr M01_L04
       mov       rax,[rsp+0B8]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm4,qword ptr [rax]
       vshufps   xmm4,xmm4,xmm0,44
       vdpps     xmm0,xmm3,xmm4,71
       vandps    xmm0,xmm0,[7FFD3CDD3850]
       vucomiss  xmm0,xmm5
       jbe       short M01_L03
       vunpckhps xmm0,xmm3,xmm3
       vandps    xmm0,xmm0,[7FFD3CDD3850]
       vucomiss  xmm0,xmm5
       ja        short M01_L02
       vmovupd   xmm4,[7FFD3CDD3820]
       jmp       short M01_L03
M01_L02:
       vmovupd   xmm4,[7FFD3CDD3830]
M01_L03:
       vmovshdup xmm0,xmm3
       vunpckhps xmm5,xmm4,xmm4
       vmulss    xmm6,xmm0,xmm5
       vunpckhps xmm7,xmm3,xmm3
       vmovshdup xmm8,xmm4
       vmulss    xmm9,xmm7,xmm8
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm9,xmm7,xmm4
       vmulss    xmm5,xmm3,xmm5
       vsubss    xmm5,xmm9,xmm5
       vmulss    xmm8,xmm3,xmm8
       vmulss    xmm4,xmm0,xmm4
       vsubss    xmm4,xmm8,xmm4
       vxorps    xmm8,xmm8,xmm8
       vmovss    xmm8,xmm8,xmm4
       vpslldq   xmm8,xmm8,4
       vmovss    xmm8,xmm8,xmm5
       vpslldq   xmm8,xmm8,4
       vmovss    xmm8,xmm8,xmm6
       vmovaps   xmm4,xmm8
       vdpps     xmm5,xmm4,xmm4,71
       vsqrtss   xmm5,xmm5,xmm5
       vbroadcastss xmm5,xmm5
       vdivps    xmm4,xmm4,xmm5
       vpslldq   xmm4,xmm4,4
       vpsrldq   xmm4,xmm4,4
       vmovshdup xmm5,xmm4
       vmulss    xmm6,xmm5,xmm7
       vunpckhps xmm8,xmm4,xmm4
       vmulss    xmm9,xmm8,xmm0
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm8,xmm8,xmm3
       vmovaps   xmm9,xmm4
       vmulss    xmm7,xmm9,xmm7
       vsubss    xmm7,xmm8,xmm7
       vmulss    xmm0,xmm9,xmm0
       vmulss    xmm3,xmm5,xmm3
       vsubss    xmm0,xmm0,xmm3
       vxorps    xmm3,xmm3,xmm3
       vmovss    xmm3,xmm3,xmm0
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm7
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm6
       vmovaps   xmm0,xmm3
       vdpps     xmm3,xmm0,xmm0,71
       vsqrtss   xmm3,xmm3,xmm3
       vbroadcastss xmm3,xmm3
       vdivps    xmm0,xmm0,xmm3
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm0,xmm0,4
       jmp       near ptr M01_L05
M01_L04:
       vmovshdup xmm4,xmm3
       vunpckhps xmm5,xmm0,xmm0
       vmulss    xmm6,xmm4,xmm5
       vunpckhps xmm7,xmm3,xmm3
       vmovshdup xmm8,xmm0
       vmulss    xmm9,xmm7,xmm8
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm9,xmm7,xmm0
       vmulss    xmm5,xmm3,xmm5
       vsubss    xmm5,xmm9,xmm5
       vmulss    xmm8,xmm3,xmm8
       vmulss    xmm0,xmm4,xmm0
       vsubss    xmm0,xmm8,xmm0
       vxorps    xmm8,xmm8,xmm8
       vmovss    xmm8,xmm8,xmm0
       vpslldq   xmm8,xmm8,4
       vmovss    xmm8,xmm8,xmm5
       vpslldq   xmm8,xmm8,4
       vmovss    xmm8,xmm8,xmm6
       vmovaps   xmm0,xmm8
       vdpps     xmm5,xmm0,xmm0,71
       vsqrtss   xmm5,xmm5,xmm5
       vbroadcastss xmm5,xmm5
       vdivps    xmm0,xmm0,xmm5
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm0,xmm0,4
       vmovshdup xmm5,xmm0
       vmulss    xmm6,xmm5,xmm7
       vunpckhps xmm8,xmm0,xmm0
       vmulss    xmm9,xmm8,xmm4
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm8,xmm8,xmm3
       vmovaps   xmm9,xmm0
       vmulss    xmm7,xmm9,xmm7
       vsubss    xmm7,xmm8,xmm7
       vmulss    xmm4,xmm9,xmm4
       vmulss    xmm3,xmm5,xmm3
       vsubss    xmm3,xmm4,xmm3
       vxorps    xmm4,xmm4,xmm4
       vmovss    xmm4,xmm4,xmm3
       vpslldq   xmm4,xmm4,4
       vmovss    xmm4,xmm4,xmm7
       vpslldq   xmm4,xmm4,4
       vmovss    xmm4,xmm4,xmm6
       vmovaps   xmm3,xmm4
       vdpps     xmm4,xmm3,xmm3,71
       vsqrtss   xmm4,xmm4,xmm4
       vbroadcastss xmm4,xmm4
       vdivps    xmm3,xmm3,xmm4
       vpslldq   xmm3,xmm3,4
       vpsrldq   xmm3,xmm3,4
       vmovaps   xmm4,xmm0
       vmovaps   xmm0,xmm3
M01_L05:
       vmovsd    qword ptr [rsp],xmm4
       vpshufd   xmm3,xmm4,2
       vmovss    dword ptr [rsp+8],xmm3
       xor       eax,eax
       mov       [rsp+0C],eax
       vmovsd    qword ptr [rsp+10],xmm2
       vpshufd   xmm3,xmm2,2
       vmovss    dword ptr [rsp+18],xmm3
       mov       [rsp+1C],eax
       vmovsd    qword ptr [rsp+20],xmm0
       vpshufd   xmm2,xmm0,2
       vmovss    dword ptr [rsp+28],xmm2
       mov       [rsp+2C],eax
       vmovsd    qword ptr [rsp+30],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+38],xmm0
       mov       dword ptr [rsp+3C],3F800000
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       vmovaps   xmm6,[rsp+70]
       vmovaps   xmm7,[rsp+60]
       vmovaps   xmm8,[rsp+50]
       vmovaps   xmm9,[rsp+40]
       add       rsp,88
       ret
; Total bytes of code 884

Compare Jit Disasm

; System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark()
       push      rsi
       sub       rsp,80
       vzeroupper
       mov       rsi,rdx
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+70],xmm0
       vmovupd   xmm0,[7FFA676C3360]
       vmovapd   [rsp+60],xmm0
       vmovupd   xmm0,[7FFA676C3370]
       vmovapd   [rsp+50],xmm0
       vmovupd   xmm0,[7FFA676C3380]
       vmovapd   [rsp+40],xmm0
       vmovupd   xmm0,[7FFA676C3360]
       vmovapd   [rsp+30],xmm0
       mov       rcx,rsi
       lea       rdx,[rsp+70]
       lea       r8,[rsp+60]
       lea       r9,[rsp+50]
       lea       rax,[rsp+40]
       mov       [rsp+20],rax
       lea       rax,[rsp+30]
       mov       [rsp+28],rax
       call      qword ptr [7FFA67E0B198]; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,80
       pop       rsi
       ret
; Total bytes of code 136
; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       sub       rsp,88
       vzeroupper
       vmovaps   [rsp+70],xmm6
       vmovaps   [rsp+60],xmm7
       vmovaps   [rsp+50],xmm8
       vmovaps   [rsp+40],xmm9
       vmovss    xmm0,dword ptr [rdx+8]
       vmovsd    xmm1,qword ptr [rdx]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm2,xmm0,xmm0,71
       vmovss    xmm3,dword ptr [7FFA676C3780]
       vucomiss  xmm3,xmm2
       jbe       short M01_L00
       mov       rax,[rsp+0B0]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm2,qword ptr [rax]
       vshufps   xmm2,xmm2,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm2
       jmp       short M01_L01
M01_L00:
       vsqrtss   xmm2,xmm2,xmm2
       vmovss    xmm3,dword ptr [7FFA676C3784]
       vdivss    xmm2,xmm3,xmm2
       vbroadcastss xmm2,xmm2
       vmulps    xmm0,xmm0,xmm2
M01_L01:
       vmovss    xmm2,dword ptr [r9+8]
       vmovsd    xmm3,qword ptr [r9]
       vshufps   xmm3,xmm3,xmm2,44
       vmovaps   xmm2,xmm3
       vdpps     xmm4,xmm3,xmm0,71
       vandps    xmm4,xmm4,[7FFA676C3790]
       vmovss    xmm5,dword ptr [7FFA676C37A0]
       vucomiss  xmm4,xmm5
       jbe       near ptr M01_L04
       mov       rax,[rsp+0B8]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm4,qword ptr [rax]
       vshufps   xmm4,xmm4,xmm0,44
       vdpps     xmm0,xmm3,xmm4,71
       vandps    xmm0,xmm0,[7FFA676C3790]
       vucomiss  xmm0,xmm5
       jbe       short M01_L03
       vunpckhps xmm0,xmm3,xmm3
       vandps    xmm0,xmm0,[7FFA676C3790]
       vucomiss  xmm0,xmm5
       ja        short M01_L02
       vmovupd   xmm4,[7FFA676C37B0]
       jmp       short M01_L03
M01_L02:
       vmovupd   xmm4,[7FFA676C37C0]
M01_L03:
       vmovshdup xmm0,xmm3
       vunpckhps xmm5,xmm4,xmm4
       vmulss    xmm6,xmm0,xmm5
       vunpckhps xmm7,xmm3,xmm3
       vmovshdup xmm8,xmm4
       vmulss    xmm9,xmm7,xmm8
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm9,xmm7,xmm4
       vmulss    xmm5,xmm3,xmm5
       vsubss    xmm5,xmm9,xmm5
       vinsertps xmm5,xmm6,xmm5,10
       vmulss    xmm6,xmm3,xmm8
       vmulss    xmm4,xmm0,xmm4
       vsubss    xmm4,xmm6,xmm4
       vinsertps xmm4,xmm5,xmm4,28
       vdpps     xmm5,xmm4,xmm4,71
       vsqrtss   xmm5,xmm5,xmm5
       vbroadcastss xmm5,xmm5
       vdivps    xmm4,xmm4,xmm5
       vpslldq   xmm4,xmm4,4
       vpsrldq   xmm4,xmm4,4
       vmovshdup xmm5,xmm4
       vmulss    xmm6,xmm5,xmm7
       vunpckhps xmm8,xmm4,xmm4
       vmulss    xmm9,xmm8,xmm0
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm8,xmm8,xmm3
       vmovaps   xmm9,xmm4
       vmulss    xmm7,xmm9,xmm7
       vsubss    xmm7,xmm8,xmm7
       vinsertps xmm6,xmm6,xmm7,10
       vmulss    xmm0,xmm9,xmm0
       vmulss    xmm3,xmm5,xmm3
       vsubss    xmm0,xmm0,xmm3
       vinsertps xmm0,xmm6,xmm0,28
       vdpps     xmm3,xmm0,xmm0,71
       vsqrtss   xmm3,xmm3,xmm3
       vbroadcastss xmm3,xmm3
       vdivps    xmm0,xmm0,xmm3
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm0,xmm0,4
       jmp       near ptr M01_L05
M01_L04:
       vmovshdup xmm4,xmm3
       vunpckhps xmm5,xmm0,xmm0
       vmulss    xmm6,xmm4,xmm5
       vunpckhps xmm7,xmm3,xmm3
       vmovshdup xmm8,xmm0
       vmulss    xmm9,xmm7,xmm8
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm9,xmm7,xmm0
       vmulss    xmm5,xmm3,xmm5
       vsubss    xmm5,xmm9,xmm5
       vinsertps xmm5,xmm6,xmm5,10
       vmulss    xmm6,xmm3,xmm8
       vmulss    xmm0,xmm4,xmm0
       vsubss    xmm0,xmm6,xmm0
       vinsertps xmm0,xmm5,xmm0,28
       vdpps     xmm5,xmm0,xmm0,71
       vsqrtss   xmm5,xmm5,xmm5
       vbroadcastss xmm5,xmm5
       vdivps    xmm0,xmm0,xmm5
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm0,xmm0,4
       vmovshdup xmm5,xmm0
       vmulss    xmm6,xmm5,xmm7
       vunpckhps xmm8,xmm0,xmm0
       vmulss    xmm9,xmm8,xmm4
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm8,xmm8,xmm3
       vmovaps   xmm9,xmm0
       vmulss    xmm7,xmm9,xmm7
       vsubss    xmm7,xmm8,xmm7
       vinsertps xmm6,xmm6,xmm7,10
       vmulss    xmm4,xmm9,xmm4
       vmulss    xmm3,xmm5,xmm3
       vsubss    xmm3,xmm4,xmm3
       vinsertps xmm3,xmm6,xmm3,28
       vdpps     xmm4,xmm3,xmm3,71
       vsqrtss   xmm4,xmm4,xmm4
       vbroadcastss xmm4,xmm4
       vdivps    xmm3,xmm3,xmm4
       vpslldq   xmm3,xmm3,4
       vpsrldq   xmm3,xmm3,4
       vmovaps   xmm4,xmm0
       vmovaps   xmm0,xmm3
M01_L05:
       vmovsd    qword ptr [rsp],xmm4
       vpshufd   xmm3,xmm4,2
       vmovss    dword ptr [rsp+8],xmm3
       xor       eax,eax
       mov       [rsp+0C],eax
       vmovsd    qword ptr [rsp+10],xmm2
       vpshufd   xmm3,xmm2,2
       vmovss    dword ptr [rsp+18],xmm3
       mov       [rsp+1C],eax
       vmovsd    qword ptr [rsp+20],xmm0
       vpshufd   xmm2,xmm0,2
       vmovss    dword ptr [rsp+28],xmm2
       mov       [rsp+2C],eax
       vmovsd    qword ptr [rsp+30],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+38],xmm0
       mov       dword ptr [rsp+3C],3F800000
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       vmovaps   xmm6,[rsp+70]
       vmovaps   xmm7,[rsp+60]
       vmovaps   xmm8,[rsp+50]
       vmovaps   xmm9,[rsp+40]
       add       rsp,88
       ret
; Total bytes of code 804

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

@performanceautofiler
Copy link
Author

performanceautofiler bot commented Jan 3, 2023

Run Information

Architecture x64
OS Windows 10.0.18362
Baseline 8c58fc2347820ce48e09605d8adddb993df9ebb5
Compare 1d15f2140f7eb30a976c66290491ec89cd628da0
Diff Diff

Improvements in PerfLabTests.CastingPerf

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
ScalarValueTypeObj - Duration of single invocation 438.38 μs 385.78 μs 0.88 0.04 False Trace Trace

graph
Test Report

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'PerfLabTests.CastingPerf*'

Payloads

Baseline
Compare

Histogram

PerfLabTests.CastingPerf.ScalarValueTypeObj


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 385.7844543650793 < 416.3832572337962.
IsChangePoint: Marked as a change because one of 11/18/2022 8:48:12 PM, 12/17/2022 10:44:50 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 15.787190690987147 (T) = (0 -383826.62622452434) / Math.Sqrt((448854648.7871664 / (299)) + (3719204.32425675 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.05183835249080365 = (404811.381300678 - 383826.62622452434) / 404811.381300678 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; PerfLabTests.CastingPerf.ScalarValueTypeObj()
       push      rdi
       push      rsi
       push      rbp
       push      rbx
       sub       rsp,28
       xor       esi,esi
       mov       edi,[7FF93A5B4C80]
       test      edi,edi
       jle       short M00_L02
       mov       rcx,28711806298
       mov       rbx,[rcx]
       mov       rbp,28711806288
M00_L00:
       mov       rdx,rbx
       mov       rax,rdx
       test      rax,rax
       je        short M00_L01
       mov       rcx,offset MT_PerfLabTests.FooSVT[]
       cmp       [rax],rcx
       je        short M00_L01
       call      qword ptr [7FF939DED858]; System.Runtime.CompilerServices.CastHelpers.ChkCastAny(Void*, System.Object)
M00_L01:
       mov       rcx,rbp
       mov       rdx,rax
       call      CORINFO_HELP_ASSIGN_REF
       inc       esi
       cmp       esi,edi
       jl        short M00_L00
M00_L02:
       add       rsp,28
       pop       rbx
       pop       rbp
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 101
; System.Runtime.CompilerServices.CastHelpers.ChkCastAny(Void*, System.Object)
       push      rdi
       push      rsi
       sub       rsp,28
       test      rdx,rdx
       je        near ptr M01_L03
       mov       rax,[rdx]
       cmp       rax,rcx
       je        near ptr M01_L03
       mov       r8,28711800D08
       mov       r8,[r8]
       add       r8,10
       rorx      r9,rax,20
       xor       r9,rcx
       mov       r10,9E3779B97F4A7C15
       imul      r9,r10
       mov       r10d,[r8]
       shrx      r9,r9,r10
       xor       r10d,r10d
M01_L00:
       lea       r11d,[r9+1]
       movsxd    r11,r11d
       lea       r11,[r11+r11*2]
       lea       r11,[r8+r11*8]
       mov       esi,[r11]
       mov       rdi,[r11+8]
       and       esi,0FFFFFFFE
       cmp       rdi,rax
       jne       short M01_L01
       mov       rdi,[r11+10]
       xor       rdi,rcx
       cmp       rdi,1
       jbe       short M01_L02
M01_L01:
       test      esi,esi
       je        short M01_L04
       inc       r10d
       add       r9d,r10d
       and       r9d,[r8+4]
       cmp       r10d,8
       jl        short M01_L00
       jmp       short M01_L04
M01_L02:
       cmp       esi,[r11]
       jne       short M01_L04
       mov       eax,edi
       cmp       eax,1
       jne       short M01_L04
M01_L03:
       mov       rax,rdx
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
M01_L04:
       call      System.Runtime.CompilerServices.CastHelpers.ChkCastAny_NoCacheLookup(Void*, System.Object)
       nop
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 178

Compare Jit Disasm

; PerfLabTests.CastingPerf.ScalarValueTypeObj()
       push      rdi
       push      rsi
       push      rbp
       push      rbx
       sub       rsp,28
       xor       esi,esi
       mov       edi,[7FF963974C80]
       test      edi,edi
       jle       short M00_L02
       mov       rcx,271B9406298
       mov       rbx,[rcx]
       mov       rbp,271B9406288
M00_L00:
       mov       rdx,rbx
       mov       rax,rdx
       test      rax,rax
       je        short M00_L01
       mov       rcx,offset MT_PerfLabTests.FooSVT[]
       cmp       [rax],rcx
       je        short M00_L01
       call      qword ptr [7FF9631AD858]; System.Runtime.CompilerServices.CastHelpers.ChkCastAny(Void*, System.Object)
M00_L01:
       mov       rcx,rbp
       mov       rdx,rax
       call      CORINFO_HELP_ASSIGN_REF
       inc       esi
       cmp       esi,edi
       jl        short M00_L00
M00_L02:
       add       rsp,28
       pop       rbx
       pop       rbp
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 101
; System.Runtime.CompilerServices.CastHelpers.ChkCastAny(Void*, System.Object)
       push      rdi
       push      rsi
       sub       rsp,28
       test      rdx,rdx
       je        near ptr M01_L03
       mov       rax,[rdx]
       cmp       rax,rcx
       je        near ptr M01_L03
       mov       r8,271B9400D10
       mov       r8,[r8]
       add       r8,10
       rorx      r9,rax,20
       xor       r9,rcx
       mov       r10,9E3779B97F4A7C15
       imul      r9,r10
       mov       r10d,[r8]
       shrx      r9,r9,r10
       xor       r10d,r10d
M01_L00:
       lea       r11d,[r9+1]
       movsxd    r11,r11d
       lea       r11,[r11+r11*2]
       lea       r11,[r8+r11*8]
       mov       esi,[r11]
       mov       rdi,[r11+8]
       and       esi,0FFFFFFFE
       cmp       rdi,rax
       jne       short M01_L01
       mov       rdi,[r11+10]
       xor       rdi,rcx
       cmp       rdi,1
       jbe       short M01_L02
M01_L01:
       test      esi,esi
       je        short M01_L04
       inc       r10d
       add       r9d,r10d
       and       r9d,[r8+4]
       cmp       r10d,8
       jl        short M01_L00
       jmp       short M01_L04
M01_L02:
       cmp       esi,[r11]
       jne       short M01_L04
       mov       eax,edi
       cmp       eax,1
       jne       short M01_L04
M01_L03:
       mov       rax,rdx
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
M01_L04:
       call      System.Runtime.CompilerServices.CastHelpers.ChkCastAny_NoCacheLookup(Void*, System.Object)
       nop
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 178

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

### Run Information
Architecture x64
OS Windows 10.0.18362
Baseline 8c58fc2347820ce48e09605d8adddb993df9ebb5
Compare 1d15f2140f7eb30a976c66290491ec89cd628da0
Diff Diff

Improvements in System.Numerics.Tests.Perf_Plane

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
TransformByMatrix4x4Benchmark - Duration of single invocation 34.78 ns 32.77 ns 0.94 0.01 False Trace Trace

graph
Test Report

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Numerics.Tests.Perf_Plane*'

Payloads

Baseline
Compare

Histogram

System.Numerics.Tests.Perf_Plane.TransformByMatrix4x4Benchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 32.76973612871624 < 33.030895501781075.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 38.481267995544115 (T) = (0 -32.78076698709662) / Math.Sqrt((0.204904688753246 / (299)) + (0.028475640986215304 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.05768374604111148 = (34.78743664812852 - 32.78076698709662) / 34.78743664812852 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Plane.TransformByMatrix4x4Benchmark()
       push      rsi
       sub       rsp,100
       vzeroupper
       vmovaps   [rsp+0F0],xmm6
       vmovaps   [rsp+0E0],xmm7
       mov       rsi,rdx
       mov       rcx,23F87C06A50
       mov       rcx,[rcx]
       add       rcx,8
       vmovss    xmm0,dword ptr [rcx+8]
       vmovsd    xmm6,qword ptr [rcx]
       vshufps   xmm6,xmm6,xmm0,44
       vmovss    xmm7,dword ptr [rcx+0C]
       lea       rcx,[rsp+0A0]
       call      qword ptr [7FFC37A9B030]; System.Numerics.Matrix4x4.get_Identity()
       vmovdqu   ymm0,ymmword ptr [rsp+0A0]
       vmovdqu   ymmword ptr [rsp+20],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+0C0]
       vmovdqu   ymmword ptr [rsp+40],ymm0
       lea       rcx,[rsp+20]
       lea       rdx,[rsp+60]
       call      qword ptr [7FFC37A9B600]; System.Numerics.Matrix4x4.<Invert>g__SseImpl|68_0(System.Numerics.Matrix4x4, System.Numerics.Matrix4x4 ByRef)
       vmovaps   xmm0,xmm6
       vmovshdup xmm1,xmm6
       vunpckhps xmm2,xmm6,xmm6
       vmulss    xmm3,xmm0,dword ptr [rsp+60]
       vmulss    xmm4,xmm1,dword ptr [rsp+64]
       vaddss    xmm3,xmm3,xmm4
       vmulss    xmm4,xmm2,dword ptr [rsp+68]
       vaddss    xmm3,xmm3,xmm4
       vmulss    xmm4,xmm7,dword ptr [rsp+6C]
       vaddss    xmm3,xmm3,xmm4
       vmulss    xmm4,xmm0,dword ptr [rsp+70]
       vmulss    xmm5,xmm1,dword ptr [rsp+74]
       vaddss    xmm4,xmm4,xmm5
       vmulss    xmm5,xmm2,dword ptr [rsp+78]
       vaddss    xmm4,xmm4,xmm5
       vmulss    xmm5,xmm7,dword ptr [rsp+7C]
       vaddss    xmm4,xmm4,xmm5
       vmulss    xmm5,xmm0,dword ptr [rsp+80]
       vmulss    xmm6,xmm1,dword ptr [rsp+84]
       vaddss    xmm5,xmm5,xmm6
       vmulss    xmm6,xmm2,dword ptr [rsp+88]
       vaddss    xmm5,xmm5,xmm6
       vmulss    xmm6,xmm7,dword ptr [rsp+8C]
       vaddss    xmm5,xmm5,xmm6
       vmulss    xmm0,xmm0,dword ptr [rsp+90]
       vmulss    xmm1,xmm1,dword ptr [rsp+94]
       vaddss    xmm0,xmm0,xmm1
       vmulss    xmm1,xmm2,dword ptr [rsp+98]
       vaddss    xmm0,xmm0,xmm1
       vmulss    xmm1,xmm7,dword ptr [rsp+9C]
       vaddss    xmm0,xmm0,xmm1
       vxorps    xmm1,xmm1,xmm1
       vmovss    xmm1,xmm1,xmm5
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm4
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm3
       vmovsd    qword ptr [rsi],xmm1
       vpshufd   xmm2,xmm1,2
       vmovss    dword ptr [rsi+8],xmm2
       vmovss    dword ptr [rsi+0C],xmm0
       mov       rax,rsi
       vmovaps   xmm6,[rsp+0F0]
       vmovaps   xmm7,[rsp+0E0]
       add       rsp,100
       pop       rsi
       ret
; Total bytes of code 383
; System.Numerics.Matrix4x4.get_Identity()
       vzeroupper
       mov       rax,23F87C00460
       mov       rax,[rax]
       vmovdqu   ymm0,ymmword ptr [rax+8]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rax+28]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       ret
; Total bytes of code 39
; System.Numerics.Matrix4x4.<Invert>g__SseImpl|68_0(System.Numerics.Matrix4x4, System.Numerics.Matrix4x4 ByRef)
       sub       rsp,118
       vzeroupper
       vmovaps   [rsp+100],xmm6
       vmovaps   [rsp+0F0],xmm7
       vmovaps   [rsp+0E0],xmm8
       vmovaps   [rsp+0D0],xmm9
       vmovaps   [rsp+0C0],xmm10
       vmovaps   [rsp+0B0],xmm11
       vmovaps   [rsp+0A0],xmm12
       vmovaps   [rsp+90],xmm13
       vmovaps   [rsp+80],xmm14
       vmovaps   [rsp+70],xmm15
       vmovups   xmm0,[rcx]
       vmovups   xmm1,[rcx+10]
       vmovups   xmm2,[rcx+20]
       vmovups   xmm3,[rcx+30]
       vshufps   xmm4,xmm0,xmm1,44
       vshufps   xmm5,xmm0,xmm1,0EE
       vshufps   xmm1,xmm2,xmm3,44
       vshufps   xmm3,xmm2,xmm3,0EE
       vshufps   xmm0,xmm4,xmm1,88
       vshufps   xmm1,xmm4,xmm1,0DD
       vshufps   xmm2,xmm5,xmm3,88
       vshufps   xmm3,xmm5,xmm3,0DD
       vpermilps xmm4,xmm2,50
       vpermilps xmm5,xmm3,0EE
       vpermilps xmm6,xmm0,50
       vpermilps xmm7,xmm1,0EE
       vshufps   xmm8,xmm2,xmm0,88
       vshufps   xmm9,xmm3,xmm1,0DD
       vmulps    xmm10,xmm4,xmm5
       vmulps    xmm11,xmm6,xmm7
       vmulps    xmm12,xmm8,xmm9
       vpermilps xmm4,xmm2,0EE
       vpermilps xmm5,xmm3,50
       vpermilps xmm6,xmm0,0EE
       vpermilps xmm7,xmm1,50
       vshufps   xmm8,xmm2,xmm0,0DD
       vshufps   xmm9,xmm3,xmm1,88
       vmulps    xmm4,xmm4,xmm5
       vsubps    xmm10,xmm10,xmm4
       vmulps    xmm5,xmm6,xmm7
       vsubps    xmm11,xmm11,xmm5
       vmulps    xmm4,xmm8,xmm9
       vsubps    xmm12,xmm12,xmm4
       vshufps   xmm7,xmm10,xmm12,5D
       vpermilps xmm4,xmm1,49
       vshufps   xmm5,xmm7,xmm10,32
       vpermilps xmm6,xmm0,12
       vshufps   xmm7,xmm7,xmm10,99
       vshufps   xmm13,xmm11,xmm12,0FD
       vpermilps xmm8,xmm3,49
       vshufps   xmm9,xmm13,xmm11,32
       vpermilps xmm14,xmm2,12
       vshufps   xmm13,xmm13,xmm11,99
       vmulps    xmm15,xmm4,xmm5
       vmulps    xmm4,xmm6,xmm7
       vmovapd   [rsp+60],xmm4
       vmulps    xmm5,xmm8,xmm9
       vmovapd   [rsp+50],xmm5
       vmulps    xmm7,xmm14,xmm13
       vmovapd   [rsp+40],xmm7
       vshufps   xmm6,xmm10,xmm12,4
       vpermilps xmm13,xmm1,9E
       vshufps   xmm8,xmm10,xmm6,93
       vpermilps xmm9,xmm0,7B
       vshufps   xmm6,xmm10,xmm6,26
       vshufps   xmm14,xmm11,xmm12,0A4
       vpermilps xmm7,xmm3,9E
       vshufps   xmm5,xmm11,xmm14,93
       vpermilps xmm4,xmm2,7B
       vshufps   xmm14,xmm11,xmm14,26
       vmulps    xmm13,xmm13,xmm8
       vsubps    xmm15,xmm15,xmm13
       vmulps    xmm8,xmm9,xmm6
       vmovapd   xmm13,[rsp+60]
       vsubps    xmm13,xmm13,xmm8
       vmulps    xmm5,xmm7,xmm5
       vmovapd   xmm8,[rsp+50]
       vsubps    xmm8,xmm8,xmm5
       vmulps    xmm4,xmm4,xmm14
       vmovapd   xmm7,[rsp+40]
       vsubps    xmm7,xmm7,xmm4
       vpermilps xmm1,xmm1,33
       vshufps   xmm4,xmm10,xmm12,4A
       vpermilps xmm4,xmm4,2C
       vpermilps xmm9,xmm0,8D
       vshufps   xmm6,xmm10,xmm12,4C
       vpermilps xmm6,xmm6,93
       vpermilps xmm3,xmm3,33
       vshufps   xmm5,xmm11,xmm12,0EA
       vpermilps xmm5,xmm5,2C
       vpermilps xmm2,xmm2,8D
       vshufps   xmm14,xmm11,xmm12,0EC
       vpermilps xmm14,xmm14,93
       vmulps    xmm1,xmm1,xmm4
       vmulps    xmm9,xmm9,xmm6
       vmulps    xmm3,xmm3,xmm5
       vmulps    xmm2,xmm2,xmm14
       vsubps    xmm4,xmm15,xmm1
       vaddps    xmm15,xmm15,xmm1
       vaddps    xmm1,xmm13,xmm9
       vsubps    xmm13,xmm13,xmm9
       vsubps    xmm5,xmm8,xmm3
       vaddps    xmm8,xmm8,xmm3
       vaddps    xmm3,xmm7,xmm2
       vsubps    xmm7,xmm7,xmm2
       vshufps   xmm15,xmm15,xmm4,0D8
       vshufps   xmm13,xmm13,xmm1,0D8
       vshufps   xmm8,xmm8,xmm5,0D8
       vshufps   xmm7,xmm7,xmm3,0D8
       vpermilps xmm15,xmm15,0D8
       vpermilps xmm13,xmm13,0D8
       vpermilps xmm8,xmm8,0D8
       vpermilps xmm7,xmm7,0D8
       vdpps     xmm0,xmm15,xmm0,0F1
       vandps    xmm1,xmm0,[7FFC37173CD0]
       vmovss    xmm2,dword ptr [7FFC37173CE0]
       vucomiss  xmm2,xmm1
       jbe       near ptr M02_L00
       vxorps    ymm0,ymm0,ymm0
       vmovdqu   ymmword ptr [rsp],ymm0
       vmovdqu   ymmword ptr [rsp+20],ymm0
       vmovss    xmm0,dword ptr [7FFC37173CE4]
       vmovss    dword ptr [rsp],xmm0
       vmovss    dword ptr [rsp+4],xmm0
       vmovss    dword ptr [rsp+8],xmm0
       vmovss    dword ptr [rsp+0C],xmm0
       vmovss    dword ptr [rsp+10],xmm0
       vmovss    dword ptr [rsp+14],xmm0
       vmovss    dword ptr [rsp+18],xmm0
       vmovss    dword ptr [rsp+1C],xmm0
       vmovss    dword ptr [rsp+20],xmm0
       vmovss    dword ptr [rsp+24],xmm0
       vmovss    dword ptr [rsp+28],xmm0
       vmovss    dword ptr [rsp+2C],xmm0
       vmovss    dword ptr [rsp+30],xmm0
       vmovss    dword ptr [rsp+34],xmm0
       vmovss    dword ptr [rsp+38],xmm0
       vmovss    dword ptr [rsp+3C],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rdx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdx+20],ymm0
       xor       eax,eax
       vmovaps   xmm6,[rsp+100]
       vmovaps   xmm7,[rsp+0F0]
       vmovaps   xmm8,[rsp+0E0]
       vmovaps   xmm9,[rsp+0D0]
       vmovaps   xmm10,[rsp+0C0]
       vmovaps   xmm11,[rsp+0B0]
       vmovaps   xmm12,[rsp+0A0]
       vmovaps   xmm13,[rsp+90]
       vmovaps   xmm14,[rsp+80]
       vmovaps   xmm15,[rsp+70]
       add       rsp,118
       ret
M02_L00:
       vbroadcastss xmm0,xmm0
       vmovupd   xmm1,[7FFC37173CF0]
       vdivps    xmm0,xmm1,xmm0
       vmulps    xmm1,xmm15,xmm0
       vmulps    xmm2,xmm13,xmm0
       vmulps    xmm3,xmm8,xmm0
       vmulps    xmm0,xmm7,xmm0
       vmovupd   [rdx],xmm1
       vmovupd   [rdx+10],xmm2
       vmovupd   [rdx+20],xmm3
       vmovupd   [rdx+30],xmm0
       mov       eax,1
       vmovaps   xmm6,[rsp+100]
       vmovaps   xmm7,[rsp+0F0]
       vmovaps   xmm8,[rsp+0E0]
       vmovaps   xmm9,[rsp+0D0]
       vmovaps   xmm10,[rsp+0C0]
       vmovaps   xmm11,[rsp+0B0]
       vmovaps   xmm12,[rsp+0A0]
       vmovaps   xmm13,[rsp+90]
       vmovaps   xmm14,[rsp+80]
       vmovaps   xmm15,[rsp+70]
       add       rsp,118
       ret
; Total bytes of code 1056

Compare Jit Disasm

; System.Numerics.Tests.Perf_Plane.TransformByMatrix4x4Benchmark()
       push      rsi
       sub       rsp,100
       vzeroupper
       vmovaps   [rsp+0F0],xmm6
       vmovaps   [rsp+0E0],xmm7
       mov       rsi,rdx
       mov       rcx,2516A406A50
       mov       rcx,[rcx]
       add       rcx,8
       vmovss    xmm0,dword ptr [rcx+8]
       vmovsd    xmm6,qword ptr [rcx]
       vshufps   xmm6,xmm6,xmm0,44
       vmovss    xmm7,dword ptr [rcx+0C]
       lea       rcx,[rsp+0A0]
       call      qword ptr [7FFE0AABB030]; System.Numerics.Matrix4x4.get_Identity()
       vmovdqu   ymm0,ymmword ptr [rsp+0A0]
       vmovdqu   ymmword ptr [rsp+20],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+0C0]
       vmovdqu   ymmword ptr [rsp+40],ymm0
       lea       rcx,[rsp+20]
       lea       rdx,[rsp+60]
       call      qword ptr [7FFE0AABB600]; System.Numerics.Matrix4x4.<Invert>g__SseImpl|68_0(System.Numerics.Matrix4x4, System.Numerics.Matrix4x4 ByRef)
       vmovaps   xmm0,xmm6
       vmovshdup xmm1,xmm6
       vunpckhps xmm2,xmm6,xmm6
       vmulss    xmm3,xmm0,dword ptr [rsp+60]
       vmulss    xmm4,xmm1,dword ptr [rsp+64]
       vaddss    xmm3,xmm3,xmm4
       vmulss    xmm4,xmm2,dword ptr [rsp+68]
       vaddss    xmm3,xmm3,xmm4
       vmulss    xmm4,xmm7,dword ptr [rsp+6C]
       vaddss    xmm3,xmm3,xmm4
       vmulss    xmm4,xmm0,dword ptr [rsp+70]
       vmulss    xmm5,xmm1,dword ptr [rsp+74]
       vaddss    xmm4,xmm4,xmm5
       vmulss    xmm5,xmm2,dword ptr [rsp+78]
       vaddss    xmm4,xmm4,xmm5
       vmulss    xmm5,xmm7,dword ptr [rsp+7C]
       vaddss    xmm4,xmm4,xmm5
       vmulss    xmm5,xmm0,dword ptr [rsp+80]
       vmulss    xmm6,xmm1,dword ptr [rsp+84]
       vaddss    xmm5,xmm5,xmm6
       vmulss    xmm6,xmm2,dword ptr [rsp+88]
       vaddss    xmm5,xmm5,xmm6
       vmulss    xmm6,xmm7,dword ptr [rsp+8C]
       vaddss    xmm5,xmm5,xmm6
       vmulss    xmm0,xmm0,dword ptr [rsp+90]
       vmulss    xmm1,xmm1,dword ptr [rsp+94]
       vaddss    xmm0,xmm0,xmm1
       vmulss    xmm1,xmm2,dword ptr [rsp+98]
       vaddss    xmm0,xmm0,xmm1
       vmulss    xmm1,xmm7,dword ptr [rsp+9C]
       vaddss    xmm0,xmm0,xmm1
       vinsertps xmm1,xmm3,xmm4,10
       vinsertps xmm1,xmm1,xmm5,28
       vmovsd    qword ptr [rsi],xmm1
       vpshufd   xmm2,xmm1,2
       vmovss    dword ptr [rsi+8],xmm2
       vmovss    dword ptr [rsi+0C],xmm0
       mov       rax,rsi
       vmovaps   xmm6,[rsp+0F0]
       vmovaps   xmm7,[rsp+0E0]
       add       rsp,100
       pop       rsi
       ret
; Total bytes of code 369
; System.Numerics.Matrix4x4.get_Identity()
       vzeroupper
       mov       rax,2516A400460
       mov       rax,[rax]
       vmovdqu   ymm0,ymmword ptr [rax+8]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rax+28]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       ret
; Total bytes of code 39
; System.Numerics.Matrix4x4.<Invert>g__SseImpl|68_0(System.Numerics.Matrix4x4, System.Numerics.Matrix4x4 ByRef)
       sub       rsp,118
       vzeroupper
       vmovaps   [rsp+100],xmm6
       vmovaps   [rsp+0F0],xmm7
       vmovaps   [rsp+0E0],xmm8
       vmovaps   [rsp+0D0],xmm9
       vmovaps   [rsp+0C0],xmm10
       vmovaps   [rsp+0B0],xmm11
       vmovaps   [rsp+0A0],xmm12
       vmovaps   [rsp+90],xmm13
       vmovaps   [rsp+80],xmm14
       vmovaps   [rsp+70],xmm15
       vmovups   xmm0,[rcx]
       vmovups   xmm1,[rcx+10]
       vmovups   xmm2,[rcx+20]
       vmovups   xmm3,[rcx+30]
       vshufps   xmm4,xmm0,xmm1,44
       vshufps   xmm5,xmm0,xmm1,0EE
       vshufps   xmm1,xmm2,xmm3,44
       vshufps   xmm3,xmm2,xmm3,0EE
       vshufps   xmm0,xmm4,xmm1,88
       vshufps   xmm1,xmm4,xmm1,0DD
       vshufps   xmm2,xmm5,xmm3,88
       vshufps   xmm3,xmm5,xmm3,0DD
       vpermilps xmm4,xmm2,50
       vpermilps xmm5,xmm3,0EE
       vpermilps xmm6,xmm0,50
       vpermilps xmm7,xmm1,0EE
       vshufps   xmm8,xmm2,xmm0,88
       vshufps   xmm9,xmm3,xmm1,0DD
       vmulps    xmm10,xmm4,xmm5
       vmulps    xmm11,xmm6,xmm7
       vmulps    xmm12,xmm8,xmm9
       vpermilps xmm4,xmm2,0EE
       vpermilps xmm5,xmm3,50
       vpermilps xmm6,xmm0,0EE
       vpermilps xmm7,xmm1,50
       vshufps   xmm8,xmm2,xmm0,0DD
       vshufps   xmm9,xmm3,xmm1,88
       vmulps    xmm4,xmm4,xmm5
       vsubps    xmm10,xmm10,xmm4
       vmulps    xmm5,xmm6,xmm7
       vsubps    xmm11,xmm11,xmm5
       vmulps    xmm4,xmm8,xmm9
       vsubps    xmm12,xmm12,xmm4
       vshufps   xmm7,xmm10,xmm12,5D
       vpermilps xmm4,xmm1,49
       vshufps   xmm5,xmm7,xmm10,32
       vpermilps xmm6,xmm0,12
       vshufps   xmm7,xmm7,xmm10,99
       vshufps   xmm13,xmm11,xmm12,0FD
       vpermilps xmm8,xmm3,49
       vshufps   xmm9,xmm13,xmm11,32
       vpermilps xmm14,xmm2,12
       vshufps   xmm13,xmm13,xmm11,99
       vmulps    xmm15,xmm4,xmm5
       vmulps    xmm4,xmm6,xmm7
       vmovapd   [rsp+60],xmm4
       vmulps    xmm5,xmm8,xmm9
       vmovapd   [rsp+50],xmm5
       vmulps    xmm7,xmm14,xmm13
       vmovapd   [rsp+40],xmm7
       vshufps   xmm6,xmm10,xmm12,4
       vpermilps xmm13,xmm1,9E
       vshufps   xmm8,xmm10,xmm6,93
       vpermilps xmm9,xmm0,7B
       vshufps   xmm6,xmm10,xmm6,26
       vshufps   xmm14,xmm11,xmm12,0A4
       vpermilps xmm7,xmm3,9E
       vshufps   xmm5,xmm11,xmm14,93
       vpermilps xmm4,xmm2,7B
       vshufps   xmm14,xmm11,xmm14,26
       vmulps    xmm13,xmm13,xmm8
       vsubps    xmm15,xmm15,xmm13
       vmulps    xmm8,xmm9,xmm6
       vmovapd   xmm13,[rsp+60]
       vsubps    xmm13,xmm13,xmm8
       vmulps    xmm5,xmm7,xmm5
       vmovapd   xmm8,[rsp+50]
       vsubps    xmm8,xmm8,xmm5
       vmulps    xmm4,xmm4,xmm14
       vmovapd   xmm7,[rsp+40]
       vsubps    xmm7,xmm7,xmm4
       vpermilps xmm1,xmm1,33
       vshufps   xmm4,xmm10,xmm12,4A
       vpermilps xmm4,xmm4,2C
       vpermilps xmm9,xmm0,8D
       vshufps   xmm6,xmm10,xmm12,4C
       vpermilps xmm6,xmm6,93
       vpermilps xmm3,xmm3,33
       vshufps   xmm5,xmm11,xmm12,0EA
       vpermilps xmm5,xmm5,2C
       vpermilps xmm2,xmm2,8D
       vshufps   xmm14,xmm11,xmm12,0EC
       vpermilps xmm14,xmm14,93
       vmulps    xmm1,xmm1,xmm4
       vmulps    xmm9,xmm9,xmm6
       vmulps    xmm3,xmm3,xmm5
       vmulps    xmm2,xmm2,xmm14
       vsubps    xmm4,xmm15,xmm1
       vaddps    xmm15,xmm15,xmm1
       vaddps    xmm1,xmm13,xmm9
       vsubps    xmm13,xmm13,xmm9
       vsubps    xmm5,xmm8,xmm3
       vaddps    xmm8,xmm8,xmm3
       vaddps    xmm3,xmm7,xmm2
       vsubps    xmm7,xmm7,xmm2
       vshufps   xmm15,xmm15,xmm4,0D8
       vshufps   xmm13,xmm13,xmm1,0D8
       vshufps   xmm8,xmm8,xmm5,0D8
       vshufps   xmm7,xmm7,xmm3,0D8
       vpermilps xmm15,xmm15,0D8
       vpermilps xmm13,xmm13,0D8
       vpermilps xmm8,xmm8,0D8
       vpermilps xmm7,xmm7,0D8
       vdpps     xmm0,xmm15,xmm0,0F1
       vandps    xmm1,xmm0,[7FFE0A193C80]
       vmovss    xmm2,dword ptr [7FFE0A193C90]
       vucomiss  xmm2,xmm1
       jbe       near ptr M02_L00
       vxorps    ymm0,ymm0,ymm0
       vmovdqu   ymmword ptr [rsp],ymm0
       vmovdqu   ymmword ptr [rsp+20],ymm0
       vmovss    xmm0,dword ptr [7FFE0A193C94]
       vmovss    dword ptr [rsp],xmm0
       vmovss    dword ptr [rsp+4],xmm0
       vmovss    dword ptr [rsp+8],xmm0
       vmovss    dword ptr [rsp+0C],xmm0
       vmovss    dword ptr [rsp+10],xmm0
       vmovss    dword ptr [rsp+14],xmm0
       vmovss    dword ptr [rsp+18],xmm0
       vmovss    dword ptr [rsp+1C],xmm0
       vmovss    dword ptr [rsp+20],xmm0
       vmovss    dword ptr [rsp+24],xmm0
       vmovss    dword ptr [rsp+28],xmm0
       vmovss    dword ptr [rsp+2C],xmm0
       vmovss    dword ptr [rsp+30],xmm0
       vmovss    dword ptr [rsp+34],xmm0
       vmovss    dword ptr [rsp+38],xmm0
       vmovss    dword ptr [rsp+3C],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rdx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdx+20],ymm0
       xor       eax,eax
       vmovaps   xmm6,[rsp+100]
       vmovaps   xmm7,[rsp+0F0]
       vmovaps   xmm8,[rsp+0E0]
       vmovaps   xmm9,[rsp+0D0]
       vmovaps   xmm10,[rsp+0C0]
       vmovaps   xmm11,[rsp+0B0]
       vmovaps   xmm12,[rsp+0A0]
       vmovaps   xmm13,[rsp+90]
       vmovaps   xmm14,[rsp+80]
       vmovaps   xmm15,[rsp+70]
       add       rsp,118
       ret
M02_L00:
       vbroadcastss xmm0,xmm0
       vmovupd   xmm1,[7FFE0A193CA0]
       vdivps    xmm0,xmm1,xmm0
       vmulps    xmm1,xmm15,xmm0
       vmulps    xmm2,xmm13,xmm0
       vmulps    xmm3,xmm8,xmm0
       vmulps    xmm0,xmm7,xmm0
       vmovupd   [rdx],xmm1
       vmovupd   [rdx+10],xmm2
       vmovupd   [rdx+20],xmm3
       vmovupd   [rdx+30],xmm0
       mov       eax,1
       vmovaps   xmm6,[rsp+100]
       vmovaps   xmm7,[rsp+0F0]
       vmovaps   xmm8,[rsp+0E0]
       vmovaps   xmm9,[rsp+0D0]
       vmovaps   xmm10,[rsp+0C0]
       vmovaps   xmm11,[rsp+0B0]
       vmovaps   xmm12,[rsp+0A0]
       vmovaps   xmm13,[rsp+90]
       vmovaps   xmm14,[rsp+80]
       vmovaps   xmm15,[rsp+70]
       add       rsp,118
       ret
; Total bytes of code 1056

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Run Information

Architecture x64
OS Windows 10.0.18362
Baseline 8c58fc2347820ce48e09605d8adddb993df9ebb5
Compare 1d15f2140f7eb30a976c66290491ec89cd628da0
Diff Diff

Improvements in System.Linq.Tests.Perf_Enumerable

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
WhereSingle_LastElementMatches - Duration of single invocation 333.95 ns 313.17 ns 0.94 0.07 False Trace Trace

graph
Test Report

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Linq.Tests.Perf_Enumerable*'

Payloads

Baseline
Compare

Histogram

System.Linq.Tests.Perf_Enumerable.WhereSingle_LastElementMatches(input: List)


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 313.17282115735014 < 319.82714756498723.
IsChangePoint: Marked as a change because one of 11/7/2022 4:10:19 PM, 11/17/2022 7:01:39 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 8.922121740389377 (T) = (0 -311.4975207969216) / Math.Sqrt((113.52642790739384 / (299)) + (53.439177061771915 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.055425233636386476 = (329.77539935362915 - 311.4975207969216) / 329.77539935362915 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Linq.Tests.Perf_Enumerable.WhereSingle_LastElementMatches(System.Linq.Tests.LinqTestData)
       push      rdi
       push      rsi
       sub       rsp,28
       mov       rsi,[rdx+8]
       mov       rcx,1BF328071F0
       mov       rdx,[rcx]
       test      rdx,rdx
       jne       short M00_L00
       mov       rcx,offset MT_System.Func`2[[System.Int32, System.Private.CoreLib],[System.Boolean, System.Private.CoreLib]]
       call      CORINFO_HELP_NEWSFAST
       mov       rdi,rax
       mov       rdx,1BF32807190
       mov       rdx,[rdx]
       lea       rcx,[rdi+8]
       call      CORINFO_HELP_ASSIGN_REF
       mov       rdx,7FF88EE48138
       mov       [rdi+18],rdx
       mov       rcx,1BF328071F0
       mov       rdx,rdi
       call      CORINFO_HELP_ASSIGN_REF
       mov       rdx,rdi
M00_L00:
       mov       rcx,rsi
       call      qword ptr [7FF88EE49318]; System.Linq.Enumerable.Where[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, System.Func`2<Int32,Boolean>)
       mov       rcx,rax
       lea       rdx,[rsp+20]
       call      qword ptr [7FF88EE495A0]; System.Linq.Enumerable.TryGetSingle[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, Boolean ByRef)
       cmp       byte ptr [rsp+20],0
       je        short M00_L01
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
M00_L01:
       call      qword ptr [7FF88EC277F8]
       int       3
; Total bytes of code 147
; System.Linq.Enumerable.Where[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, System.Func`2<Int32,Boolean>)
       push      rdi
       push      rsi
       push      rbp
       push      rbx
       sub       rsp,28
       mov       rsi,rcx
       mov       rdi,rdx
       test      rsi,rsi
       je        near ptr M01_L05
       test      rdi,rdi
       je        near ptr M01_L06
       mov       rdx,rsi
       mov       rcx,offset MT_System.Linq.Enumerable+Iterator`1[[System.Int32, System.Private.CoreLib]]
       call      qword ptr [7FF88E4CD828]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfClass(Void*, System.Object)
       test      rax,rax
       je        short M01_L00
       mov       rcx,rax
       mov       rdx,rdi
       mov       rax,[rax]
       mov       rax,[rax+48]
       add       rsp,28
       pop       rbx
       pop       rbp
       pop       rsi
       pop       rdi
       jmp       qword ptr [rax+10]
M01_L00:
       mov       rdx,rsi
       mov       rcx,offset MT_System.Int32[]
       call      qword ptr [7FF88E4CD7F8]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfAny(Void*, System.Object)
       mov       rbx,rax
       test      rbx,rbx
       je        short M01_L02
       cmp       dword ptr [rbx+8],0
       je        short M01_L01
       mov       rcx,offset MT_System.Linq.Enumerable+WhereArrayIterator`1[[System.Int32, System.Private.CoreLib]]
       call      CORINFO_HELP_NEWSFAST
       mov       rbp,rax
       call      CORINFO_HELP_GETCURRENTMANAGEDTHREADID
       mov       [rbp+8],eax
       lea       rcx,[rbp+18]
       mov       rdx,rbx
       call      CORINFO_HELP_ASSIGN_REF
       lea       rcx,[rbp+20]
       mov       rdx,rdi
       call      CORINFO_HELP_ASSIGN_REF
       jmp       near ptr M01_L04
M01_L01:
       mov       rcx,7FF88EB26628
       mov       edx,4
       call      CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS
       mov       rdx,1BF32807CA0
       mov       rbp,[rdx]
       jmp       near ptr M01_L04
M01_L02:
       mov       rdx,rsi
       mov       rcx,offset MT_System.Collections.Generic.List`1[[System.Int32, System.Private.CoreLib]]
       call      qword ptr [7FF88E4CD828]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfClass(Void*, System.Object)
       mov       rbp,rax
       test      rbp,rbp
       je        short M01_L03
       mov       rcx,offset MT_System.Linq.Enumerable+WhereListIterator`1[[System.Int32, System.Private.CoreLib]]
       call      CORINFO_HELP_NEWSFAST
       mov       rsi,rax
       call      CORINFO_HELP_GETCURRENTMANAGEDTHREADID
       mov       [rsi+8],eax
       lea       rcx,[rsi+18]
       mov       rdx,rbp
       call      CORINFO_HELP_ASSIGN_REF
       lea       rcx,[rsi+20]
       mov       rdx,rdi
       call      CORINFO_HELP_ASSIGN_REF
       mov       rbp,rsi
       jmp       short M01_L04
M01_L03:
       mov       rcx,offset MT_System.Linq.Enumerable+WhereEnumerableIterator`1[[System.Int32, System.Private.CoreLib]]
       call      CORINFO_HELP_NEWSFAST
       mov       rbp,rax
       call      CORINFO_HELP_GETCURRENTMANAGEDTHREADID
       mov       [rbp+8],eax
       lea       rcx,[rbp+18]
       mov       rdx,rsi
       call      CORINFO_HELP_ASSIGN_REF
       lea       rcx,[rbp+20]
       mov       rdx,rdi
       call      CORINFO_HELP_ASSIGN_REF
M01_L04:
       mov       rax,rbp
       add       rsp,28
       pop       rbx
       pop       rbp
       pop       rsi
       pop       rdi
       ret
M01_L05:
       mov       ecx,10
       call      qword ptr [7FF88EC27798]
       int       3
M01_L06:
       mov       ecx,0C
       call      qword ptr [7FF88EC27798]
       int       3
; Total bytes of code 375
; System.Linq.Enumerable.TryGetSingle[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, Boolean ByRef)
       push      rbp
       push      rdi
       push      rsi
       push      rbx
       sub       rsp,38
       lea       rbp,[rsp+50]
       mov       [rbp-30],rsp
       mov       rdi,rcx
       mov       rsi,rdx
       test      rdi,rdi
       je        near ptr M02_L05
       mov       rdx,rdi
       mov       rcx,offset MT_System.Collections.Generic.IList`1[[System.Int32, System.Private.CoreLib]]
       call      qword ptr [7FF88E4CD810]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfInterface(Void*, System.Object)
       mov       rbx,rax
       test      rbx,rbx
       je        short M02_L01
       mov       rcx,rbx
       mov       r11,7FF88E3205F8
       call      qword ptr [r11]
       test      eax,eax
       je        short M02_L00
       cmp       eax,1
       jne       near ptr M02_L06
       mov       byte ptr [rsi],1
       mov       rcx,rbx
       mov       r11,7FF88E320600
       xor       edx,edx
       call      qword ptr [r11]
       nop
       add       rsp,38
       pop       rbx
       pop       rsi
       pop       rdi
       pop       rbp
       ret
M02_L00:
       mov       byte ptr [rsi],0
       xor       eax,eax
       add       rsp,38
       pop       rbx
       pop       rsi
       pop       rdi
       pop       rbp
       ret
M02_L01:
       mov       rcx,rdi
       mov       r11,7FF88E3205D0
       call      qword ptr [r11]
       mov       rcx,rax
       mov       [rbp-28],rcx
       mov       r11,7FF88E3205D8
       call      qword ptr [r11]
       test      eax,eax
       jne       short M02_L02
       mov       byte ptr [rsi],0
       xor       ecx,ecx
       mov       [rbp-1C],ecx
       jmp       short M02_L04
M02_L02:
       mov       rcx,[rbp-28]
       mov       r11,7FF88E3205E0
       call      qword ptr [r11]
       mov       edi,eax
       mov       rcx,[rbp-28]
       mov       r11,7FF88E3205E8
       call      qword ptr [r11]
       test      eax,eax
       jne       short M02_L03
       mov       byte ptr [rsi],1
       mov       [rbp-1C],edi
       jmp       short M02_L04
M02_L03:
       mov       rcx,[rbp-28]
       mov       r11,7FF88E3205F0
       call      qword ptr [r11]
       jmp       short M02_L06
M02_L04:
       mov       rcx,rsp
       call      M02_L07
       nop
       mov       eax,[rbp-1C]
       add       rsp,38
       pop       rbx
       pop       rsi
       pop       rdi
       pop       rbp
       ret
M02_L05:
       mov       ecx,10
       call      qword ptr [7FF88EC27798]
       int       3
M02_L06:
       mov       byte ptr [rsi],0
       call      qword ptr [7FF88EC277C8]
       int       3
M02_L07:
       push      rbp
       push      rdi
       push      rsi
       push      rbx
       sub       rsp,28
       mov       rbp,[rcx+20]
       mov       [rsp+20],rbp
       lea       rbp,[rbp+50]
       mov       rcx,[rbp-28]
       test      rcx,rcx
       je        short M02_L08
       mov       r11,7FF88E3205F0
       call      qword ptr [r11]
M02_L08:
       nop
       add       rsp,28
       pop       rbx
       pop       rsi
       pop       rdi
       pop       rbp
       ret
; Total bytes of code 346

Compare Jit Disasm

; System.Linq.Tests.Perf_Enumerable.WhereSingle_LastElementMatches(System.Linq.Tests.LinqTestData)
       push      rdi
       push      rsi
       sub       rsp,28
       mov       rsi,[rdx+8]
       mov       rcx,19F928071F0
       mov       rdx,[rcx]
       test      rdx,rdx
       jne       short M00_L00
       mov       rcx,offset MT_System.Func`2[[System.Int32, System.Private.CoreLib],[System.Boolean, System.Private.CoreLib]]
       call      CORINFO_HELP_NEWSFAST
       mov       rdi,rax
       mov       rdx,19F92807190
       mov       rdx,[rdx]
       lea       rcx,[rdi+8]
       call      CORINFO_HELP_ASSIGN_REF
       mov       rdx,7FFCED238138
       mov       [rdi+18],rdx
       mov       rcx,19F928071F0
       mov       rdx,rdi
       call      CORINFO_HELP_ASSIGN_REF
       mov       rdx,rdi
M00_L00:
       mov       rcx,rsi
       call      qword ptr [7FFCED239318]; System.Linq.Enumerable.Where[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, System.Func`2<Int32,Boolean>)
       mov       rcx,rax
       lea       rdx,[rsp+20]
       call      qword ptr [7FFCED2395A0]; System.Linq.Enumerable.TryGetSingle[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, Boolean ByRef)
       cmp       byte ptr [rsp+20],0
       je        short M00_L01
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
M00_L01:
       call      qword ptr [7FFCED0177F8]
       int       3
; Total bytes of code 147
; System.Linq.Enumerable.Where[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, System.Func`2<Int32,Boolean>)
       push      rdi
       push      rsi
       push      rbp
       push      rbx
       sub       rsp,28
       mov       rsi,rcx
       mov       rdi,rdx
       test      rsi,rsi
       je        near ptr M01_L05
       test      rdi,rdi
       je        near ptr M01_L06
       mov       rdx,rsi
       mov       rcx,offset MT_System.Linq.Enumerable+Iterator`1[[System.Int32, System.Private.CoreLib]]
       call      qword ptr [7FFCEC8BD828]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfClass(Void*, System.Object)
       test      rax,rax
       je        short M01_L00
       mov       rcx,rax
       mov       rdx,rdi
       mov       rax,[rax]
       mov       rax,[rax+48]
       add       rsp,28
       pop       rbx
       pop       rbp
       pop       rsi
       pop       rdi
       jmp       qword ptr [rax+10]
M01_L00:
       mov       rdx,rsi
       mov       rcx,offset MT_System.Int32[]
       call      qword ptr [7FFCEC8BD7F8]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfAny(Void*, System.Object)
       mov       rbx,rax
       test      rbx,rbx
       je        short M01_L02
       cmp       dword ptr [rbx+8],0
       je        short M01_L01
       mov       rcx,offset MT_System.Linq.Enumerable+WhereArrayIterator`1[[System.Int32, System.Private.CoreLib]]
       call      CORINFO_HELP_NEWSFAST
       mov       rbp,rax
       call      CORINFO_HELP_GETCURRENTMANAGEDTHREADID
       mov       [rbp+8],eax
       lea       rcx,[rbp+18]
       mov       rdx,rbx
       call      CORINFO_HELP_ASSIGN_REF
       lea       rcx,[rbp+20]
       mov       rdx,rdi
       call      CORINFO_HELP_ASSIGN_REF
       jmp       near ptr M01_L04
M01_L01:
       mov       rcx,7FFCECF16620
       mov       edx,4
       call      CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS
       mov       rdx,19F92807CA8
       mov       rbp,[rdx]
       jmp       near ptr M01_L04
M01_L02:
       mov       rdx,rsi
       mov       rcx,offset MT_System.Collections.Generic.List`1[[System.Int32, System.Private.CoreLib]]
       call      qword ptr [7FFCEC8BD828]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfClass(Void*, System.Object)
       mov       rbp,rax
       test      rbp,rbp
       je        short M01_L03
       mov       rcx,offset MT_System.Linq.Enumerable+WhereListIterator`1[[System.Int32, System.Private.CoreLib]]
       call      CORINFO_HELP_NEWSFAST
       mov       rsi,rax
       call      CORINFO_HELP_GETCURRENTMANAGEDTHREADID
       mov       [rsi+8],eax
       lea       rcx,[rsi+18]
       mov       rdx,rbp
       call      CORINFO_HELP_ASSIGN_REF
       lea       rcx,[rsi+20]
       mov       rdx,rdi
       call      CORINFO_HELP_ASSIGN_REF
       mov       rbp,rsi
       jmp       short M01_L04
M01_L03:
       mov       rcx,offset MT_System.Linq.Enumerable+WhereEnumerableIterator`1[[System.Int32, System.Private.CoreLib]]
       call      CORINFO_HELP_NEWSFAST
       mov       rbp,rax
       call      CORINFO_HELP_GETCURRENTMANAGEDTHREADID
       mov       [rbp+8],eax
       lea       rcx,[rbp+18]
       mov       rdx,rsi
       call      CORINFO_HELP_ASSIGN_REF
       lea       rcx,[rbp+20]
       mov       rdx,rdi
       call      CORINFO_HELP_ASSIGN_REF
M01_L04:
       mov       rax,rbp
       add       rsp,28
       pop       rbx
       pop       rbp
       pop       rsi
       pop       rdi
       ret
M01_L05:
       mov       ecx,10
       call      qword ptr [7FFCED017798]
       int       3
M01_L06:
       mov       ecx,0C
       call      qword ptr [7FFCED017798]
       int       3
; Total bytes of code 375
; System.Linq.Enumerable.TryGetSingle[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, Boolean ByRef)
       push      rbp
       push      rdi
       push      rsi
       push      rbx
       sub       rsp,38
       lea       rbp,[rsp+50]
       mov       [rbp-30],rsp
       mov       rdi,rcx
       mov       rsi,rdx
       test      rdi,rdi
       je        near ptr M02_L05
       mov       rdx,rdi
       mov       rcx,offset MT_System.Collections.Generic.IList`1[[System.Int32, System.Private.CoreLib]]
       call      qword ptr [7FFCEC8BD810]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfInterface(Void*, System.Object)
       mov       rbx,rax
       test      rbx,rbx
       je        short M02_L01
       mov       rcx,rbx
       mov       r11,7FFCEC7105F8
       call      qword ptr [r11]
       test      eax,eax
       je        short M02_L00
       cmp       eax,1
       jne       near ptr M02_L06
       mov       byte ptr [rsi],1
       mov       rcx,rbx
       mov       r11,7FFCEC710600
       xor       edx,edx
       call      qword ptr [r11]
       nop
       add       rsp,38
       pop       rbx
       pop       rsi
       pop       rdi
       pop       rbp
       ret
M02_L00:
       mov       byte ptr [rsi],0
       xor       eax,eax
       add       rsp,38
       pop       rbx
       pop       rsi
       pop       rdi
       pop       rbp
       ret
M02_L01:
       mov       rcx,rdi
       mov       r11,7FFCEC7105D0
       call      qword ptr [r11]
       mov       rcx,rax
       mov       [rbp-28],rcx
       mov       r11,7FFCEC7105D8
       call      qword ptr [r11]
       test      eax,eax
       jne       short M02_L02
       mov       byte ptr [rsi],0
       xor       ecx,ecx
       mov       [rbp-1C],ecx
       jmp       short M02_L04
M02_L02:
       mov       rcx,[rbp-28]
       mov       r11,7FFCEC7105E0
       call      qword ptr [r11]
       mov       edi,eax
       mov       rcx,[rbp-28]
       mov       r11,7FFCEC7105E8
       call      qword ptr [r11]
       test      eax,eax
       jne       short M02_L03
       mov       byte ptr [rsi],1
       mov       [rbp-1C],edi
       jmp       short M02_L04
M02_L03:
       mov       rcx,[rbp-28]
       mov       r11,7FFCEC7105F0
       call      qword ptr [r11]
       jmp       short M02_L06
M02_L04:
       mov       rcx,rsp
       call      M02_L07
       nop
       mov       eax,[rbp-1C]
       add       rsp,38
       pop       rbx
       pop       rsi
       pop       rdi
       pop       rbp
       ret
M02_L05:
       mov       ecx,10
       call      qword ptr [7FFCED017798]
       int       3
M02_L06:
       mov       byte ptr [rsi],0
       call      qword ptr [7FFCED0177C8]
       int       3
M02_L07:
       push      rbp
       push      rdi
       push      rsi
       push      rbx
       sub       rsp,28
       mov       rbp,[rcx+20]
       mov       [rsp+20],rbp
       lea       rbp,[rbp+50]
       mov       rcx,[rbp-28]
       test      rcx,rcx
       je        short M02_L08
       mov       r11,7FFCEC7105F0
       call      qword ptr [r11]
M02_L08:
       nop
       add       rsp,28
       pop       rbx
       pop       rsi
       pop       rdi
       pop       rbp
       ret
; Total bytes of code 346

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Run Information

Architecture x64
OS Windows 10.0.18362
Baseline 8c58fc2347820ce48e09605d8adddb993df9ebb5
Compare 1d15f2140f7eb30a976c66290491ec89cd628da0
Diff Diff

Improvements in System.Collections.ContainsKeyFalse<Int32, Int32>

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
IDictionary - Duration of single invocation 5.67 μs 5.10 μs 0.90 0.19 False

graph
Test Report

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Collections.ContainsKeyFalse&lt;Int32, Int32&gt;*'

Payloads

Baseline
Compare

Histogram

System.Collections.ContainsKeyFalse<Int32, Int32>.IDictionary(Size: 512)


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 5.102370045484081 < 5.414068052867665.
IsChangePoint: Marked as a change because one of 11/18/2022 8:48:12 PM, 12/1/2022 7:29:59 PM, 12/20/2022 11:23:14 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 4.280896822282533 (T) = (0 -5111.846252605496) / Math.Sqrt((86917.17212956118 / (299)) + (83793.71971483406 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.06221569698354114 = (5450.982956488854 - 5111.846252605496) / 5450.982956488854 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

@performanceautofiler
Copy link
Author

performanceautofiler bot commented Jan 3, 2023

Run Information

Architecture x64
OS Windows 10.0.18362
Baseline 8c58fc2347820ce48e09605d8adddb993df9ebb5
Compare 1d15f2140f7eb30a976c66290491ec89cd628da0
Diff Diff

Improvements in System.Collections.IterateForEach<String>

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
IEnumerable - Duration of single invocation 2.92 μs 2.29 μs 0.78 0.33 False
Dictionary - Duration of single invocation 3.28 μs 2.63 μs 0.80 0.43 False

graph
graph
Test Report

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Collections.IterateForEach&lt;String&gt;*'

Payloads

Baseline
Compare

Histogram

System.Collections.IterateForEach<String>.IEnumerable(Size: 512)


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 2.2892038189360666 < 2.7677489939422046.
IsChangePoint: Marked as a change because one of 12/14/2022 5:20:21 PM, 12/17/2022 10:44:50 PM, 12/20/2022 1:34:12 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 25.27546691266064 (T) = (0 -2279.0487839320267) / Math.Sqrt((54558.12757800759 / (299)) + (26.67604998255376 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.1308805143093541 = (2622.250244592066 - 2279.0487839320267) / 2622.250244592066 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```#### System.Collections.IterateForEach&lt;String&gt;.Dictionary(Size: 512)

```log

Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 2.6319215392303845 < 3.1175988358542224.
IsChangePoint: Marked as a change because one of 10/17/2022 3:19:47 PM, 11/2/2022 9:35:40 AM, 12/10/2022 11:25:25 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 6.905695209465284 (T) = (0 -2742.8031290407052) / Math.Sqrt((58488.78505961677 / (299)) + (14968.383136179747 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.08218216828380467 = (2988.395991296044 - 2742.8031290407052) / 2988.395991296044 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

### Run Information
Architecture x64
OS Windows 10.0.18362
Baseline 8c58fc2347820ce48e09605d8adddb993df9ebb5
Compare 1d15f2140f7eb30a976c66290491ec89cd628da0
Diff Diff

Improvements in IfStatements.IfStatements

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
And - Duration of single invocation 60.28 μs 54.35 μs 0.90 0.02 False Trace Trace

graph
Test Report

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'IfStatements.IfStatements*'

Payloads

Baseline
Compare

Histogram

IfStatements.IfStatements.And


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 54.35260344827586 < 57.294417450142454.
IsChangePoint: Marked as a change because one of 11/18/2022 8:48:12 PM, 12/14/2022 5:20:21 PM, 12/17/2022 10:44:50 PM, 12/20/2022 1:34:12 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 35.15506123542728 (T) = (0 -54296.66106635237) / Math.Sqrt((2376376.7253830386 / (299)) + (16160.672881742112 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.058177388216793004 = (57650.6237873705 - 54296.66106635237) / 57650.6237873705 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; IfStatements.IfStatements.And()
       push      rdi
       push      rsi
       sub       rsp,28
       xor       esi,esi
       mov       rdi,1A655006218
M00_L00:
       mov       rcx,[rdi]
       mov       rdx,rcx
       mov       eax,[rdx+8]
       cmp       esi,eax
       jae       short M00_L01
       mov       r8d,esi
       mov       edx,[rdx+r8*4+10]
       inc       esi
       cmp       esi,eax
       jae       short M00_L01
       mov       eax,esi
       mov       eax,[rcx+rax*4+10]
       mov       ecx,edx
       mov       edx,eax
       call      qword ptr [7FF952BF3708]; IfStatements.IfStatements.AndInner(Int32, Int32)
       cmp       esi,2710
       jl        short M00_L00
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
M00_L01:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 82
; IfStatements.IfStatements.AndInner(Int32, Int32)
       sub       rsp,28
       mov       r8d,ecx
       and       r8d,1
       mov       r9d,edx
       and       r9d,1
       or        r8d,r9d
       jne       short M01_L00
       mov       ecx,5
M01_L00:
       xor       r8d,r8d
       xor       r9d,r9d
       call      qword ptr [7FF952BF36A8]; IfStatements.IfStatements.Consume(Int32, Int32, Int32, Int32)
       nop
       add       rsp,28
       ret
; Total bytes of code 46

Compare Jit Disasm

; IfStatements.IfStatements.And()
       push      rdi
       push      rsi
       sub       rsp,28
       xor       esi,esi
       mov       rdi,28046406218
M00_L00:
       mov       rcx,[rdi]
       mov       rdx,rcx
       mov       eax,[rdx+8]
       cmp       esi,eax
       jae       short M00_L01
       mov       r8d,esi
       mov       edx,[rdx+r8*4+10]
       inc       esi
       cmp       esi,eax
       jae       short M00_L01
       mov       eax,esi
       mov       eax,[rcx+rax*4+10]
       mov       ecx,edx
       mov       edx,eax
       call      qword ptr [7FFC29203708]; IfStatements.IfStatements.AndInner(Int32, Int32)
       cmp       esi,2710
       jl        short M00_L00
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
M00_L01:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 82
; IfStatements.IfStatements.AndInner(Int32, Int32)
       sub       rsp,28
       mov       r8d,ecx
       and       r8d,1
       mov       r9d,edx
       and       r9d,1
       or        r8d,r9d
       jne       short M01_L00
       mov       ecx,5
M01_L00:
       xor       r8d,r8d
       xor       r9d,r9d
       call      qword ptr [7FFC292036A8]; IfStatements.IfStatements.Consume(Int32, Int32, Int32, Int32)
       nop
       add       rsp,28
       ret
; Total bytes of code 46

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant