-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Perf] Windows/x64: 11 Improvements on 12/28/2022 6:20:56 PM #11284
Comments
Run Information
Improvements in PerfLabTests.CastingPerf
Reprogit clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'PerfLabTests.CastingPerf*' PayloadsHistogramPerfLabTests.CastingPerf.ScalarValueTypeObj
Description of detection logic
; System.Runtime.CompilerServices.CastHelpers.ChkCastAny(Void*, System.Object)
push rdi
push rsi
sub rsp,28
test rdx,rdx
je near ptr M01_L03
mov rax,[rdx]
cmp rax,rcx
je near ptr M01_L03
mov r8,28711800D08
mov r8,[r8]
add r8,10
rorx r9,rax,20
xor r9,rcx
mov r10,9E3779B97F4A7C15
imul r9,r10
mov r10d,[r8]
shrx r9,r9,r10
xor r10d,r10d
M01_L00:
lea r11d,[r9+1]
movsxd r11,r11d
lea r11,[r11+r11*2]
lea r11,[r8+r11*8]
mov esi,[r11]
mov rdi,[r11+8]
and esi,0FFFFFFFE
cmp rdi,rax
jne short M01_L01
mov rdi,[r11+10]
xor rdi,rcx
cmp rdi,1
jbe short M01_L02
M01_L01:
test esi,esi
je short M01_L04
inc r10d
add r9d,r10d
and r9d,[r8+4]
cmp r10d,8
jl short M01_L00
jmp short M01_L04
M01_L02:
cmp esi,[r11]
jne short M01_L04
mov eax,edi
cmp eax,1
jne short M01_L04
M01_L03:
mov rax,rdx
add rsp,28
pop rsi
pop rdi
ret
M01_L04:
call System.Runtime.CompilerServices.CastHelpers.ChkCastAny_NoCacheLookup(Void*, System.Object)
nop
add rsp,28
pop rsi
pop rdi
ret
; Total bytes of code 178 Compare Jit Disasm; PerfLabTests.CastingPerf.ScalarValueTypeObj()
push rdi
push rsi
push rbp
push rbx
sub rsp,28
xor esi,esi
mov edi,[7FF963974C80]
test edi,edi
jle short M00_L02
mov rcx,271B9406298
mov rbx,[rcx]
mov rbp,271B9406288
M00_L00:
mov rdx,rbx
mov rax,rdx
test rax,rax
je short M00_L01
mov rcx,offset MT_PerfLabTests.FooSVT[]
cmp [rax],rcx
je short M00_L01
call qword ptr [7FF9631AD858]; System.Runtime.CompilerServices.CastHelpers.ChkCastAny(Void*, System.Object)
M00_L01:
mov rcx,rbp
mov rdx,rax
call CORINFO_HELP_ASSIGN_REF
inc esi
cmp esi,edi
jl short M00_L00
M00_L02:
add rsp,28
pop rbx
pop rbp
pop rsi
pop rdi
ret
; Total bytes of code 101 ; System.Runtime.CompilerServices.CastHelpers.ChkCastAny(Void*, System.Object)
push rdi
push rsi
sub rsp,28
test rdx,rdx
je near ptr M01_L03
mov rax,[rdx]
cmp rax,rcx
je near ptr M01_L03
mov r8,271B9400D10
mov r8,[r8]
add r8,10
rorx r9,rax,20
xor r9,rcx
mov r10,9E3779B97F4A7C15
imul r9,r10
mov r10d,[r8]
shrx r9,r9,r10
xor r10d,r10d
M01_L00:
lea r11d,[r9+1]
movsxd r11,r11d
lea r11,[r11+r11*2]
lea r11,[r8+r11*8]
mov esi,[r11]
mov rdi,[r11+8]
and esi,0FFFFFFFE
cmp rdi,rax
jne short M01_L01
mov rdi,[r11+10]
xor rdi,rcx
cmp rdi,1
jbe short M01_L02
M01_L01:
test esi,esi
je short M01_L04
inc r10d
add r9d,r10d
and r9d,[r8+4]
cmp r10d,8
jl short M01_L00
jmp short M01_L04
M01_L02:
cmp esi,[r11]
jne short M01_L04
mov eax,edi
cmp eax,1
jne short M01_L04
M01_L03:
mov rax,rdx
add rsp,28
pop rsi
pop rdi
ret
M01_L04:
call System.Runtime.CompilerServices.CastHelpers.ChkCastAny_NoCacheLookup(Void*, System.Object)
nop
add rsp,28
pop rsi
pop rdi
ret
; Total bytes of code 178 DocsProfiling workflow for dotnet/runtime repository
Improvements in System.Numerics.Tests.Perf_Plane
Reprogit clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Numerics.Tests.Perf_Plane*' PayloadsHistogramSystem.Numerics.Tests.Perf_Plane.TransformByMatrix4x4Benchmark
Description of detection logic
; System.Numerics.Matrix4x4.get_Identity()
vzeroupper
mov rax,23F87C00460
mov rax,[rax]
vmovdqu ymm0,ymmword ptr [rax+8]
vmovdqu ymmword ptr [rcx],ymm0
vmovdqu ymm0,ymmword ptr [rax+28]
vmovdqu ymmword ptr [rcx+20],ymm0
mov rax,rcx
ret
; Total bytes of code 39 ; System.Numerics.Matrix4x4.<Invert>g__SseImpl|68_0(System.Numerics.Matrix4x4, System.Numerics.Matrix4x4 ByRef)
sub rsp,118
vzeroupper
vmovaps [rsp+100],xmm6
vmovaps [rsp+0F0],xmm7
vmovaps [rsp+0E0],xmm8
vmovaps [rsp+0D0],xmm9
vmovaps [rsp+0C0],xmm10
vmovaps [rsp+0B0],xmm11
vmovaps [rsp+0A0],xmm12
vmovaps [rsp+90],xmm13
vmovaps [rsp+80],xmm14
vmovaps [rsp+70],xmm15
vmovups xmm0,[rcx]
vmovups xmm1,[rcx+10]
vmovups xmm2,[rcx+20]
vmovups xmm3,[rcx+30]
vshufps xmm4,xmm0,xmm1,44
vshufps xmm5,xmm0,xmm1,0EE
vshufps xmm1,xmm2,xmm3,44
vshufps xmm3,xmm2,xmm3,0EE
vshufps xmm0,xmm4,xmm1,88
vshufps xmm1,xmm4,xmm1,0DD
vshufps xmm2,xmm5,xmm3,88
vshufps xmm3,xmm5,xmm3,0DD
vpermilps xmm4,xmm2,50
vpermilps xmm5,xmm3,0EE
vpermilps xmm6,xmm0,50
vpermilps xmm7,xmm1,0EE
vshufps xmm8,xmm2,xmm0,88
vshufps xmm9,xmm3,xmm1,0DD
vmulps xmm10,xmm4,xmm5
vmulps xmm11,xmm6,xmm7
vmulps xmm12,xmm8,xmm9
vpermilps xmm4,xmm2,0EE
vpermilps xmm5,xmm3,50
vpermilps xmm6,xmm0,0EE
vpermilps xmm7,xmm1,50
vshufps xmm8,xmm2,xmm0,0DD
vshufps xmm9,xmm3,xmm1,88
vmulps xmm4,xmm4,xmm5
vsubps xmm10,xmm10,xmm4
vmulps xmm5,xmm6,xmm7
vsubps xmm11,xmm11,xmm5
vmulps xmm4,xmm8,xmm9
vsubps xmm12,xmm12,xmm4
vshufps xmm7,xmm10,xmm12,5D
vpermilps xmm4,xmm1,49
vshufps xmm5,xmm7,xmm10,32
vpermilps xmm6,xmm0,12
vshufps xmm7,xmm7,xmm10,99
vshufps xmm13,xmm11,xmm12,0FD
vpermilps xmm8,xmm3,49
vshufps xmm9,xmm13,xmm11,32
vpermilps xmm14,xmm2,12
vshufps xmm13,xmm13,xmm11,99
vmulps xmm15,xmm4,xmm5
vmulps xmm4,xmm6,xmm7
vmovapd [rsp+60],xmm4
vmulps xmm5,xmm8,xmm9
vmovapd [rsp+50],xmm5
vmulps xmm7,xmm14,xmm13
vmovapd [rsp+40],xmm7
vshufps xmm6,xmm10,xmm12,4
vpermilps xmm13,xmm1,9E
vshufps xmm8,xmm10,xmm6,93
vpermilps xmm9,xmm0,7B
vshufps xmm6,xmm10,xmm6,26
vshufps xmm14,xmm11,xmm12,0A4
vpermilps xmm7,xmm3,9E
vshufps xmm5,xmm11,xmm14,93
vpermilps xmm4,xmm2,7B
vshufps xmm14,xmm11,xmm14,26
vmulps xmm13,xmm13,xmm8
vsubps xmm15,xmm15,xmm13
vmulps xmm8,xmm9,xmm6
vmovapd xmm13,[rsp+60]
vsubps xmm13,xmm13,xmm8
vmulps xmm5,xmm7,xmm5
vmovapd xmm8,[rsp+50]
vsubps xmm8,xmm8,xmm5
vmulps xmm4,xmm4,xmm14
vmovapd xmm7,[rsp+40]
vsubps xmm7,xmm7,xmm4
vpermilps xmm1,xmm1,33
vshufps xmm4,xmm10,xmm12,4A
vpermilps xmm4,xmm4,2C
vpermilps xmm9,xmm0,8D
vshufps xmm6,xmm10,xmm12,4C
vpermilps xmm6,xmm6,93
vpermilps xmm3,xmm3,33
vshufps xmm5,xmm11,xmm12,0EA
vpermilps xmm5,xmm5,2C
vpermilps xmm2,xmm2,8D
vshufps xmm14,xmm11,xmm12,0EC
vpermilps xmm14,xmm14,93
vmulps xmm1,xmm1,xmm4
vmulps xmm9,xmm9,xmm6
vmulps xmm3,xmm3,xmm5
vmulps xmm2,xmm2,xmm14
vsubps xmm4,xmm15,xmm1
vaddps xmm15,xmm15,xmm1
vaddps xmm1,xmm13,xmm9
vsubps xmm13,xmm13,xmm9
vsubps xmm5,xmm8,xmm3
vaddps xmm8,xmm8,xmm3
vaddps xmm3,xmm7,xmm2
vsubps xmm7,xmm7,xmm2
vshufps xmm15,xmm15,xmm4,0D8
vshufps xmm13,xmm13,xmm1,0D8
vshufps xmm8,xmm8,xmm5,0D8
vshufps xmm7,xmm7,xmm3,0D8
vpermilps xmm15,xmm15,0D8
vpermilps xmm13,xmm13,0D8
vpermilps xmm8,xmm8,0D8
vpermilps xmm7,xmm7,0D8
vdpps xmm0,xmm15,xmm0,0F1
vandps xmm1,xmm0,[7FFC37173CD0]
vmovss xmm2,dword ptr [7FFC37173CE0]
vucomiss xmm2,xmm1
jbe near ptr M02_L00
vxorps ymm0,ymm0,ymm0
vmovdqu ymmword ptr [rsp],ymm0
vmovdqu ymmword ptr [rsp+20],ymm0
vmovss xmm0,dword ptr [7FFC37173CE4]
vmovss dword ptr [rsp],xmm0
vmovss dword ptr [rsp+4],xmm0
vmovss dword ptr [rsp+8],xmm0
vmovss dword ptr [rsp+0C],xmm0
vmovss dword ptr [rsp+10],xmm0
vmovss dword ptr [rsp+14],xmm0
vmovss dword ptr [rsp+18],xmm0
vmovss dword ptr [rsp+1C],xmm0
vmovss dword ptr [rsp+20],xmm0
vmovss dword ptr [rsp+24],xmm0
vmovss dword ptr [rsp+28],xmm0
vmovss dword ptr [rsp+2C],xmm0
vmovss dword ptr [rsp+30],xmm0
vmovss dword ptr [rsp+34],xmm0
vmovss dword ptr [rsp+38],xmm0
vmovss dword ptr [rsp+3C],xmm0
vmovdqu ymm0,ymmword ptr [rsp]
vmovdqu ymmword ptr [rdx],ymm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rdx+20],ymm0
xor eax,eax
vmovaps xmm6,[rsp+100]
vmovaps xmm7,[rsp+0F0]
vmovaps xmm8,[rsp+0E0]
vmovaps xmm9,[rsp+0D0]
vmovaps xmm10,[rsp+0C0]
vmovaps xmm11,[rsp+0B0]
vmovaps xmm12,[rsp+0A0]
vmovaps xmm13,[rsp+90]
vmovaps xmm14,[rsp+80]
vmovaps xmm15,[rsp+70]
add rsp,118
ret
M02_L00:
vbroadcastss xmm0,xmm0
vmovupd xmm1,[7FFC37173CF0]
vdivps xmm0,xmm1,xmm0
vmulps xmm1,xmm15,xmm0
vmulps xmm2,xmm13,xmm0
vmulps xmm3,xmm8,xmm0
vmulps xmm0,xmm7,xmm0
vmovupd [rdx],xmm1
vmovupd [rdx+10],xmm2
vmovupd [rdx+20],xmm3
vmovupd [rdx+30],xmm0
mov eax,1
vmovaps xmm6,[rsp+100]
vmovaps xmm7,[rsp+0F0]
vmovaps xmm8,[rsp+0E0]
vmovaps xmm9,[rsp+0D0]
vmovaps xmm10,[rsp+0C0]
vmovaps xmm11,[rsp+0B0]
vmovaps xmm12,[rsp+0A0]
vmovaps xmm13,[rsp+90]
vmovaps xmm14,[rsp+80]
vmovaps xmm15,[rsp+70]
add rsp,118
ret
; Total bytes of code 1056 Compare Jit Disasm; System.Numerics.Tests.Perf_Plane.TransformByMatrix4x4Benchmark()
push rsi
sub rsp,100
vzeroupper
vmovaps [rsp+0F0],xmm6
vmovaps [rsp+0E0],xmm7
mov rsi,rdx
mov rcx,2516A406A50
mov rcx,[rcx]
add rcx,8
vmovss xmm0,dword ptr [rcx+8]
vmovsd xmm6,qword ptr [rcx]
vshufps xmm6,xmm6,xmm0,44
vmovss xmm7,dword ptr [rcx+0C]
lea rcx,[rsp+0A0]
call qword ptr [7FFE0AABB030]; System.Numerics.Matrix4x4.get_Identity()
vmovdqu ymm0,ymmword ptr [rsp+0A0]
vmovdqu ymmword ptr [rsp+20],ymm0
vmovdqu ymm0,ymmword ptr [rsp+0C0]
vmovdqu ymmword ptr [rsp+40],ymm0
lea rcx,[rsp+20]
lea rdx,[rsp+60]
call qword ptr [7FFE0AABB600]; System.Numerics.Matrix4x4.<Invert>g__SseImpl|68_0(System.Numerics.Matrix4x4, System.Numerics.Matrix4x4 ByRef)
vmovaps xmm0,xmm6
vmovshdup xmm1,xmm6
vunpckhps xmm2,xmm6,xmm6
vmulss xmm3,xmm0,dword ptr [rsp+60]
vmulss xmm4,xmm1,dword ptr [rsp+64]
vaddss xmm3,xmm3,xmm4
vmulss xmm4,xmm2,dword ptr [rsp+68]
vaddss xmm3,xmm3,xmm4
vmulss xmm4,xmm7,dword ptr [rsp+6C]
vaddss xmm3,xmm3,xmm4
vmulss xmm4,xmm0,dword ptr [rsp+70]
vmulss xmm5,xmm1,dword ptr [rsp+74]
vaddss xmm4,xmm4,xmm5
vmulss xmm5,xmm2,dword ptr [rsp+78]
vaddss xmm4,xmm4,xmm5
vmulss xmm5,xmm7,dword ptr [rsp+7C]
vaddss xmm4,xmm4,xmm5
vmulss xmm5,xmm0,dword ptr [rsp+80]
vmulss xmm6,xmm1,dword ptr [rsp+84]
vaddss xmm5,xmm5,xmm6
vmulss xmm6,xmm2,dword ptr [rsp+88]
vaddss xmm5,xmm5,xmm6
vmulss xmm6,xmm7,dword ptr [rsp+8C]
vaddss xmm5,xmm5,xmm6
vmulss xmm0,xmm0,dword ptr [rsp+90]
vmulss xmm1,xmm1,dword ptr [rsp+94]
vaddss xmm0,xmm0,xmm1
vmulss xmm1,xmm2,dword ptr [rsp+98]
vaddss xmm0,xmm0,xmm1
vmulss xmm1,xmm7,dword ptr [rsp+9C]
vaddss xmm0,xmm0,xmm1
vinsertps xmm1,xmm3,xmm4,10
vinsertps xmm1,xmm1,xmm5,28
vmovsd qword ptr [rsi],xmm1
vpshufd xmm2,xmm1,2
vmovss dword ptr [rsi+8],xmm2
vmovss dword ptr [rsi+0C],xmm0
mov rax,rsi
vmovaps xmm6,[rsp+0F0]
vmovaps xmm7,[rsp+0E0]
add rsp,100
pop rsi
ret
; Total bytes of code 369 ; System.Numerics.Matrix4x4.get_Identity()
vzeroupper
mov rax,2516A400460
mov rax,[rax]
vmovdqu ymm0,ymmword ptr [rax+8]
vmovdqu ymmword ptr [rcx],ymm0
vmovdqu ymm0,ymmword ptr [rax+28]
vmovdqu ymmword ptr [rcx+20],ymm0
mov rax,rcx
ret
; Total bytes of code 39 ; System.Numerics.Matrix4x4.<Invert>g__SseImpl|68_0(System.Numerics.Matrix4x4, System.Numerics.Matrix4x4 ByRef)
sub rsp,118
vzeroupper
vmovaps [rsp+100],xmm6
vmovaps [rsp+0F0],xmm7
vmovaps [rsp+0E0],xmm8
vmovaps [rsp+0D0],xmm9
vmovaps [rsp+0C0],xmm10
vmovaps [rsp+0B0],xmm11
vmovaps [rsp+0A0],xmm12
vmovaps [rsp+90],xmm13
vmovaps [rsp+80],xmm14
vmovaps [rsp+70],xmm15
vmovups xmm0,[rcx]
vmovups xmm1,[rcx+10]
vmovups xmm2,[rcx+20]
vmovups xmm3,[rcx+30]
vshufps xmm4,xmm0,xmm1,44
vshufps xmm5,xmm0,xmm1,0EE
vshufps xmm1,xmm2,xmm3,44
vshufps xmm3,xmm2,xmm3,0EE
vshufps xmm0,xmm4,xmm1,88
vshufps xmm1,xmm4,xmm1,0DD
vshufps xmm2,xmm5,xmm3,88
vshufps xmm3,xmm5,xmm3,0DD
vpermilps xmm4,xmm2,50
vpermilps xmm5,xmm3,0EE
vpermilps xmm6,xmm0,50
vpermilps xmm7,xmm1,0EE
vshufps xmm8,xmm2,xmm0,88
vshufps xmm9,xmm3,xmm1,0DD
vmulps xmm10,xmm4,xmm5
vmulps xmm11,xmm6,xmm7
vmulps xmm12,xmm8,xmm9
vpermilps xmm4,xmm2,0EE
vpermilps xmm5,xmm3,50
vpermilps xmm6,xmm0,0EE
vpermilps xmm7,xmm1,50
vshufps xmm8,xmm2,xmm0,0DD
vshufps xmm9,xmm3,xmm1,88
vmulps xmm4,xmm4,xmm5
vsubps xmm10,xmm10,xmm4
vmulps xmm5,xmm6,xmm7
vsubps xmm11,xmm11,xmm5
vmulps xmm4,xmm8,xmm9
vsubps xmm12,xmm12,xmm4
vshufps xmm7,xmm10,xmm12,5D
vpermilps xmm4,xmm1,49
vshufps xmm5,xmm7,xmm10,32
vpermilps xmm6,xmm0,12
vshufps xmm7,xmm7,xmm10,99
vshufps xmm13,xmm11,xmm12,0FD
vpermilps xmm8,xmm3,49
vshufps xmm9,xmm13,xmm11,32
vpermilps xmm14,xmm2,12
vshufps xmm13,xmm13,xmm11,99
vmulps xmm15,xmm4,xmm5
vmulps xmm4,xmm6,xmm7
vmovapd [rsp+60],xmm4
vmulps xmm5,xmm8,xmm9
vmovapd [rsp+50],xmm5
vmulps xmm7,xmm14,xmm13
vmovapd [rsp+40],xmm7
vshufps xmm6,xmm10,xmm12,4
vpermilps xmm13,xmm1,9E
vshufps xmm8,xmm10,xmm6,93
vpermilps xmm9,xmm0,7B
vshufps xmm6,xmm10,xmm6,26
vshufps xmm14,xmm11,xmm12,0A4
vpermilps xmm7,xmm3,9E
vshufps xmm5,xmm11,xmm14,93
vpermilps xmm4,xmm2,7B
vshufps xmm14,xmm11,xmm14,26
vmulps xmm13,xmm13,xmm8
vsubps xmm15,xmm15,xmm13
vmulps xmm8,xmm9,xmm6
vmovapd xmm13,[rsp+60]
vsubps xmm13,xmm13,xmm8
vmulps xmm5,xmm7,xmm5
vmovapd xmm8,[rsp+50]
vsubps xmm8,xmm8,xmm5
vmulps xmm4,xmm4,xmm14
vmovapd xmm7,[rsp+40]
vsubps xmm7,xmm7,xmm4
vpermilps xmm1,xmm1,33
vshufps xmm4,xmm10,xmm12,4A
vpermilps xmm4,xmm4,2C
vpermilps xmm9,xmm0,8D
vshufps xmm6,xmm10,xmm12,4C
vpermilps xmm6,xmm6,93
vpermilps xmm3,xmm3,33
vshufps xmm5,xmm11,xmm12,0EA
vpermilps xmm5,xmm5,2C
vpermilps xmm2,xmm2,8D
vshufps xmm14,xmm11,xmm12,0EC
vpermilps xmm14,xmm14,93
vmulps xmm1,xmm1,xmm4
vmulps xmm9,xmm9,xmm6
vmulps xmm3,xmm3,xmm5
vmulps xmm2,xmm2,xmm14
vsubps xmm4,xmm15,xmm1
vaddps xmm15,xmm15,xmm1
vaddps xmm1,xmm13,xmm9
vsubps xmm13,xmm13,xmm9
vsubps xmm5,xmm8,xmm3
vaddps xmm8,xmm8,xmm3
vaddps xmm3,xmm7,xmm2
vsubps xmm7,xmm7,xmm2
vshufps xmm15,xmm15,xmm4,0D8
vshufps xmm13,xmm13,xmm1,0D8
vshufps xmm8,xmm8,xmm5,0D8
vshufps xmm7,xmm7,xmm3,0D8
vpermilps xmm15,xmm15,0D8
vpermilps xmm13,xmm13,0D8
vpermilps xmm8,xmm8,0D8
vpermilps xmm7,xmm7,0D8
vdpps xmm0,xmm15,xmm0,0F1
vandps xmm1,xmm0,[7FFE0A193C80]
vmovss xmm2,dword ptr [7FFE0A193C90]
vucomiss xmm2,xmm1
jbe near ptr M02_L00
vxorps ymm0,ymm0,ymm0
vmovdqu ymmword ptr [rsp],ymm0
vmovdqu ymmword ptr [rsp+20],ymm0
vmovss xmm0,dword ptr [7FFE0A193C94]
vmovss dword ptr [rsp],xmm0
vmovss dword ptr [rsp+4],xmm0
vmovss dword ptr [rsp+8],xmm0
vmovss dword ptr [rsp+0C],xmm0
vmovss dword ptr [rsp+10],xmm0
vmovss dword ptr [rsp+14],xmm0
vmovss dword ptr [rsp+18],xmm0
vmovss dword ptr [rsp+1C],xmm0
vmovss dword ptr [rsp+20],xmm0
vmovss dword ptr [rsp+24],xmm0
vmovss dword ptr [rsp+28],xmm0
vmovss dword ptr [rsp+2C],xmm0
vmovss dword ptr [rsp+30],xmm0
vmovss dword ptr [rsp+34],xmm0
vmovss dword ptr [rsp+38],xmm0
vmovss dword ptr [rsp+3C],xmm0
vmovdqu ymm0,ymmword ptr [rsp]
vmovdqu ymmword ptr [rdx],ymm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rdx+20],ymm0
xor eax,eax
vmovaps xmm6,[rsp+100]
vmovaps xmm7,[rsp+0F0]
vmovaps xmm8,[rsp+0E0]
vmovaps xmm9,[rsp+0D0]
vmovaps xmm10,[rsp+0C0]
vmovaps xmm11,[rsp+0B0]
vmovaps xmm12,[rsp+0A0]
vmovaps xmm13,[rsp+90]
vmovaps xmm14,[rsp+80]
vmovaps xmm15,[rsp+70]
add rsp,118
ret
M02_L00:
vbroadcastss xmm0,xmm0
vmovupd xmm1,[7FFE0A193CA0]
vdivps xmm0,xmm1,xmm0
vmulps xmm1,xmm15,xmm0
vmulps xmm2,xmm13,xmm0
vmulps xmm3,xmm8,xmm0
vmulps xmm0,xmm7,xmm0
vmovupd [rdx],xmm1
vmovupd [rdx+10],xmm2
vmovupd [rdx+20],xmm3
vmovupd [rdx+30],xmm0
mov eax,1
vmovaps xmm6,[rsp+100]
vmovaps xmm7,[rsp+0F0]
vmovaps xmm8,[rsp+0E0]
vmovaps xmm9,[rsp+0D0]
vmovaps xmm10,[rsp+0C0]
vmovaps xmm11,[rsp+0B0]
vmovaps xmm12,[rsp+0A0]
vmovaps xmm13,[rsp+90]
vmovaps xmm14,[rsp+80]
vmovaps xmm15,[rsp+70]
add rsp,118
ret
; Total bytes of code 1056 DocsProfiling workflow for dotnet/runtime repository Run Information
Improvements in System.Linq.Tests.Perf_Enumerable
Reprogit clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Linq.Tests.Perf_Enumerable*' PayloadsHistogramSystem.Linq.Tests.Perf_Enumerable.WhereSingle_LastElementMatches(input: List)
Description of detection logic
; System.Linq.Enumerable.Where[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, System.Func`2<Int32,Boolean>)
push rdi
push rsi
push rbp
push rbx
sub rsp,28
mov rsi,rcx
mov rdi,rdx
test rsi,rsi
je near ptr M01_L05
test rdi,rdi
je near ptr M01_L06
mov rdx,rsi
mov rcx,offset MT_System.Linq.Enumerable+Iterator`1[[System.Int32, System.Private.CoreLib]]
call qword ptr [7FF88E4CD828]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfClass(Void*, System.Object)
test rax,rax
je short M01_L00
mov rcx,rax
mov rdx,rdi
mov rax,[rax]
mov rax,[rax+48]
add rsp,28
pop rbx
pop rbp
pop rsi
pop rdi
jmp qword ptr [rax+10]
M01_L00:
mov rdx,rsi
mov rcx,offset MT_System.Int32[]
call qword ptr [7FF88E4CD7F8]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfAny(Void*, System.Object)
mov rbx,rax
test rbx,rbx
je short M01_L02
cmp dword ptr [rbx+8],0
je short M01_L01
mov rcx,offset MT_System.Linq.Enumerable+WhereArrayIterator`1[[System.Int32, System.Private.CoreLib]]
call CORINFO_HELP_NEWSFAST
mov rbp,rax
call CORINFO_HELP_GETCURRENTMANAGEDTHREADID
mov [rbp+8],eax
lea rcx,[rbp+18]
mov rdx,rbx
call CORINFO_HELP_ASSIGN_REF
lea rcx,[rbp+20]
mov rdx,rdi
call CORINFO_HELP_ASSIGN_REF
jmp near ptr M01_L04
M01_L01:
mov rcx,7FF88EB26628
mov edx,4
call CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS
mov rdx,1BF32807CA0
mov rbp,[rdx]
jmp near ptr M01_L04
M01_L02:
mov rdx,rsi
mov rcx,offset MT_System.Collections.Generic.List`1[[System.Int32, System.Private.CoreLib]]
call qword ptr [7FF88E4CD828]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfClass(Void*, System.Object)
mov rbp,rax
test rbp,rbp
je short M01_L03
mov rcx,offset MT_System.Linq.Enumerable+WhereListIterator`1[[System.Int32, System.Private.CoreLib]]
call CORINFO_HELP_NEWSFAST
mov rsi,rax
call CORINFO_HELP_GETCURRENTMANAGEDTHREADID
mov [rsi+8],eax
lea rcx,[rsi+18]
mov rdx,rbp
call CORINFO_HELP_ASSIGN_REF
lea rcx,[rsi+20]
mov rdx,rdi
call CORINFO_HELP_ASSIGN_REF
mov rbp,rsi
jmp short M01_L04
M01_L03:
mov rcx,offset MT_System.Linq.Enumerable+WhereEnumerableIterator`1[[System.Int32, System.Private.CoreLib]]
call CORINFO_HELP_NEWSFAST
mov rbp,rax
call CORINFO_HELP_GETCURRENTMANAGEDTHREADID
mov [rbp+8],eax
lea rcx,[rbp+18]
mov rdx,rsi
call CORINFO_HELP_ASSIGN_REF
lea rcx,[rbp+20]
mov rdx,rdi
call CORINFO_HELP_ASSIGN_REF
M01_L04:
mov rax,rbp
add rsp,28
pop rbx
pop rbp
pop rsi
pop rdi
ret
M01_L05:
mov ecx,10
call qword ptr [7FF88EC27798]
int 3
M01_L06:
mov ecx,0C
call qword ptr [7FF88EC27798]
int 3
; Total bytes of code 375 ; System.Linq.Enumerable.TryGetSingle[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, Boolean ByRef)
push rbp
push rdi
push rsi
push rbx
sub rsp,38
lea rbp,[rsp+50]
mov [rbp-30],rsp
mov rdi,rcx
mov rsi,rdx
test rdi,rdi
je near ptr M02_L05
mov rdx,rdi
mov rcx,offset MT_System.Collections.Generic.IList`1[[System.Int32, System.Private.CoreLib]]
call qword ptr [7FF88E4CD810]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfInterface(Void*, System.Object)
mov rbx,rax
test rbx,rbx
je short M02_L01
mov rcx,rbx
mov r11,7FF88E3205F8
call qword ptr [r11]
test eax,eax
je short M02_L00
cmp eax,1
jne near ptr M02_L06
mov byte ptr [rsi],1
mov rcx,rbx
mov r11,7FF88E320600
xor edx,edx
call qword ptr [r11]
nop
add rsp,38
pop rbx
pop rsi
pop rdi
pop rbp
ret
M02_L00:
mov byte ptr [rsi],0
xor eax,eax
add rsp,38
pop rbx
pop rsi
pop rdi
pop rbp
ret
M02_L01:
mov rcx,rdi
mov r11,7FF88E3205D0
call qword ptr [r11]
mov rcx,rax
mov [rbp-28],rcx
mov r11,7FF88E3205D8
call qword ptr [r11]
test eax,eax
jne short M02_L02
mov byte ptr [rsi],0
xor ecx,ecx
mov [rbp-1C],ecx
jmp short M02_L04
M02_L02:
mov rcx,[rbp-28]
mov r11,7FF88E3205E0
call qword ptr [r11]
mov edi,eax
mov rcx,[rbp-28]
mov r11,7FF88E3205E8
call qword ptr [r11]
test eax,eax
jne short M02_L03
mov byte ptr [rsi],1
mov [rbp-1C],edi
jmp short M02_L04
M02_L03:
mov rcx,[rbp-28]
mov r11,7FF88E3205F0
call qword ptr [r11]
jmp short M02_L06
M02_L04:
mov rcx,rsp
call M02_L07
nop
mov eax,[rbp-1C]
add rsp,38
pop rbx
pop rsi
pop rdi
pop rbp
ret
M02_L05:
mov ecx,10
call qword ptr [7FF88EC27798]
int 3
M02_L06:
mov byte ptr [rsi],0
call qword ptr [7FF88EC277C8]
int 3
M02_L07:
push rbp
push rdi
push rsi
push rbx
sub rsp,28
mov rbp,[rcx+20]
mov [rsp+20],rbp
lea rbp,[rbp+50]
mov rcx,[rbp-28]
test rcx,rcx
je short M02_L08
mov r11,7FF88E3205F0
call qword ptr [r11]
M02_L08:
nop
add rsp,28
pop rbx
pop rsi
pop rdi
pop rbp
ret
; Total bytes of code 346 Compare Jit Disasm; System.Linq.Tests.Perf_Enumerable.WhereSingle_LastElementMatches(System.Linq.Tests.LinqTestData)
push rdi
push rsi
sub rsp,28
mov rsi,[rdx+8]
mov rcx,19F928071F0
mov rdx,[rcx]
test rdx,rdx
jne short M00_L00
mov rcx,offset MT_System.Func`2[[System.Int32, System.Private.CoreLib],[System.Boolean, System.Private.CoreLib]]
call CORINFO_HELP_NEWSFAST
mov rdi,rax
mov rdx,19F92807190
mov rdx,[rdx]
lea rcx,[rdi+8]
call CORINFO_HELP_ASSIGN_REF
mov rdx,7FFCED238138
mov [rdi+18],rdx
mov rcx,19F928071F0
mov rdx,rdi
call CORINFO_HELP_ASSIGN_REF
mov rdx,rdi
M00_L00:
mov rcx,rsi
call qword ptr [7FFCED239318]; System.Linq.Enumerable.Where[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, System.Func`2<Int32,Boolean>)
mov rcx,rax
lea rdx,[rsp+20]
call qword ptr [7FFCED2395A0]; System.Linq.Enumerable.TryGetSingle[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, Boolean ByRef)
cmp byte ptr [rsp+20],0
je short M00_L01
add rsp,28
pop rsi
pop rdi
ret
M00_L01:
call qword ptr [7FFCED0177F8]
int 3
; Total bytes of code 147 ; System.Linq.Enumerable.Where[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, System.Func`2<Int32,Boolean>)
push rdi
push rsi
push rbp
push rbx
sub rsp,28
mov rsi,rcx
mov rdi,rdx
test rsi,rsi
je near ptr M01_L05
test rdi,rdi
je near ptr M01_L06
mov rdx,rsi
mov rcx,offset MT_System.Linq.Enumerable+Iterator`1[[System.Int32, System.Private.CoreLib]]
call qword ptr [7FFCEC8BD828]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfClass(Void*, System.Object)
test rax,rax
je short M01_L00
mov rcx,rax
mov rdx,rdi
mov rax,[rax]
mov rax,[rax+48]
add rsp,28
pop rbx
pop rbp
pop rsi
pop rdi
jmp qword ptr [rax+10]
M01_L00:
mov rdx,rsi
mov rcx,offset MT_System.Int32[]
call qword ptr [7FFCEC8BD7F8]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfAny(Void*, System.Object)
mov rbx,rax
test rbx,rbx
je short M01_L02
cmp dword ptr [rbx+8],0
je short M01_L01
mov rcx,offset MT_System.Linq.Enumerable+WhereArrayIterator`1[[System.Int32, System.Private.CoreLib]]
call CORINFO_HELP_NEWSFAST
mov rbp,rax
call CORINFO_HELP_GETCURRENTMANAGEDTHREADID
mov [rbp+8],eax
lea rcx,[rbp+18]
mov rdx,rbx
call CORINFO_HELP_ASSIGN_REF
lea rcx,[rbp+20]
mov rdx,rdi
call CORINFO_HELP_ASSIGN_REF
jmp near ptr M01_L04
M01_L01:
mov rcx,7FFCECF16620
mov edx,4
call CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS
mov rdx,19F92807CA8
mov rbp,[rdx]
jmp near ptr M01_L04
M01_L02:
mov rdx,rsi
mov rcx,offset MT_System.Collections.Generic.List`1[[System.Int32, System.Private.CoreLib]]
call qword ptr [7FFCEC8BD828]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfClass(Void*, System.Object)
mov rbp,rax
test rbp,rbp
je short M01_L03
mov rcx,offset MT_System.Linq.Enumerable+WhereListIterator`1[[System.Int32, System.Private.CoreLib]]
call CORINFO_HELP_NEWSFAST
mov rsi,rax
call CORINFO_HELP_GETCURRENTMANAGEDTHREADID
mov [rsi+8],eax
lea rcx,[rsi+18]
mov rdx,rbp
call CORINFO_HELP_ASSIGN_REF
lea rcx,[rsi+20]
mov rdx,rdi
call CORINFO_HELP_ASSIGN_REF
mov rbp,rsi
jmp short M01_L04
M01_L03:
mov rcx,offset MT_System.Linq.Enumerable+WhereEnumerableIterator`1[[System.Int32, System.Private.CoreLib]]
call CORINFO_HELP_NEWSFAST
mov rbp,rax
call CORINFO_HELP_GETCURRENTMANAGEDTHREADID
mov [rbp+8],eax
lea rcx,[rbp+18]
mov rdx,rsi
call CORINFO_HELP_ASSIGN_REF
lea rcx,[rbp+20]
mov rdx,rdi
call CORINFO_HELP_ASSIGN_REF
M01_L04:
mov rax,rbp
add rsp,28
pop rbx
pop rbp
pop rsi
pop rdi
ret
M01_L05:
mov ecx,10
call qword ptr [7FFCED017798]
int 3
M01_L06:
mov ecx,0C
call qword ptr [7FFCED017798]
int 3
; Total bytes of code 375 ; System.Linq.Enumerable.TryGetSingle[[System.Int32, System.Private.CoreLib]](System.Collections.Generic.IEnumerable`1<Int32>, Boolean ByRef)
push rbp
push rdi
push rsi
push rbx
sub rsp,38
lea rbp,[rsp+50]
mov [rbp-30],rsp
mov rdi,rcx
mov rsi,rdx
test rdi,rdi
je near ptr M02_L05
mov rdx,rdi
mov rcx,offset MT_System.Collections.Generic.IList`1[[System.Int32, System.Private.CoreLib]]
call qword ptr [7FFCEC8BD810]; System.Runtime.CompilerServices.CastHelpers.IsInstanceOfInterface(Void*, System.Object)
mov rbx,rax
test rbx,rbx
je short M02_L01
mov rcx,rbx
mov r11,7FFCEC7105F8
call qword ptr [r11]
test eax,eax
je short M02_L00
cmp eax,1
jne near ptr M02_L06
mov byte ptr [rsi],1
mov rcx,rbx
mov r11,7FFCEC710600
xor edx,edx
call qword ptr [r11]
nop
add rsp,38
pop rbx
pop rsi
pop rdi
pop rbp
ret
M02_L00:
mov byte ptr [rsi],0
xor eax,eax
add rsp,38
pop rbx
pop rsi
pop rdi
pop rbp
ret
M02_L01:
mov rcx,rdi
mov r11,7FFCEC7105D0
call qword ptr [r11]
mov rcx,rax
mov [rbp-28],rcx
mov r11,7FFCEC7105D8
call qword ptr [r11]
test eax,eax
jne short M02_L02
mov byte ptr [rsi],0
xor ecx,ecx
mov [rbp-1C],ecx
jmp short M02_L04
M02_L02:
mov rcx,[rbp-28]
mov r11,7FFCEC7105E0
call qword ptr [r11]
mov edi,eax
mov rcx,[rbp-28]
mov r11,7FFCEC7105E8
call qword ptr [r11]
test eax,eax
jne short M02_L03
mov byte ptr [rsi],1
mov [rbp-1C],edi
jmp short M02_L04
M02_L03:
mov rcx,[rbp-28]
mov r11,7FFCEC7105F0
call qword ptr [r11]
jmp short M02_L06
M02_L04:
mov rcx,rsp
call M02_L07
nop
mov eax,[rbp-1C]
add rsp,38
pop rbx
pop rsi
pop rdi
pop rbp
ret
M02_L05:
mov ecx,10
call qword ptr [7FFCED017798]
int 3
M02_L06:
mov byte ptr [rsi],0
call qword ptr [7FFCED0177C8]
int 3
M02_L07:
push rbp
push rdi
push rsi
push rbx
sub rsp,28
mov rbp,[rcx+20]
mov [rsp+20],rbp
lea rbp,[rbp+50]
mov rcx,[rbp-28]
test rcx,rcx
je short M02_L08
mov r11,7FFCEC7105F0
call qword ptr [r11]
M02_L08:
nop
add rsp,28
pop rbx
pop rsi
pop rdi
pop rbp
ret
; Total bytes of code 346 DocsProfiling workflow for dotnet/runtime repository Run Information
Improvements in System.Collections.ContainsKeyFalse<Int32, Int32>
Reprogit clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Collections.ContainsKeyFalse<Int32, Int32>*' PayloadsHistogramSystem.Collections.ContainsKeyFalse<Int32, Int32>.IDictionary(Size: 512)
Description of detection logic
DocsProfiling workflow for dotnet/runtime repository |
Run Information
Improvements in System.Collections.IterateForEach<String>
Reprogit clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Collections.IterateForEach<String>*' PayloadsHistogramSystem.Collections.IterateForEach<String>.IEnumerable(Size: 512)
Description of detection logic
Description of detection logic
DocsProfiling workflow for dotnet/runtime repository
Improvements in IfStatements.IfStatements
Reprogit clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'IfStatements.IfStatements*' PayloadsHistogramIfStatements.IfStatements.And
Description of detection logic
; IfStatements.IfStatements.AndInner(Int32, Int32)
sub rsp,28
mov r8d,ecx
and r8d,1
mov r9d,edx
and r9d,1
or r8d,r9d
jne short M01_L00
mov ecx,5
M01_L00:
xor r8d,r8d
xor r9d,r9d
call qword ptr [7FF952BF36A8]; IfStatements.IfStatements.Consume(Int32, Int32, Int32, Int32)
nop
add rsp,28
ret
; Total bytes of code 46 Compare Jit Disasm; IfStatements.IfStatements.And()
push rdi
push rsi
sub rsp,28
xor esi,esi
mov rdi,28046406218
M00_L00:
mov rcx,[rdi]
mov rdx,rcx
mov eax,[rdx+8]
cmp esi,eax
jae short M00_L01
mov r8d,esi
mov edx,[rdx+r8*4+10]
inc esi
cmp esi,eax
jae short M00_L01
mov eax,esi
mov eax,[rcx+rax*4+10]
mov ecx,edx
mov edx,eax
call qword ptr [7FFC29203708]; IfStatements.IfStatements.AndInner(Int32, Int32)
cmp esi,2710
jl short M00_L00
add rsp,28
pop rsi
pop rdi
ret
M00_L01:
call CORINFO_HELP_RNGCHKFAIL
int 3
; Total bytes of code 82 ; IfStatements.IfStatements.AndInner(Int32, Int32)
sub rsp,28
mov r8d,ecx
and r8d,1
mov r9d,edx
and r9d,1
or r8d,r9d
jne short M01_L00
mov ecx,5
M01_L00:
xor r8d,r8d
xor r9d,r9d
call qword ptr [7FFC292036A8]; IfStatements.IfStatements.Consume(Int32, Int32, Int32, Int32)
nop
add rsp,28
ret
; Total bytes of code 46 DocsProfiling workflow for dotnet/runtime repository |
Run Information
Improvements in System.Numerics.Tests.Perf_Matrix4x4
Test Report
Repro
Payloads
Baseline
Compare
Histogram
System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark
Description of detection logic
Compare Jit Disasm
System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark
Description of detection logic
Compare Jit Disasm
System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark
Description of detection logic
Compare Jit Disasm
System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark
Description of detection logic
Compare Jit Disasm
Docs
Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository
The text was updated successfully, but these errors were encountered: