Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(win/video): don't offload chroma subsampling math to texture sampler when downscaling #3014

Merged
merged 5 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 112 additions & 77 deletions src/platform/windows/display_vram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@
blob_t convert_yuv420_packed_uv_type0_ps_linear_hlsl;
blob_t convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl;
blob_t convert_yuv420_packed_uv_type0_vs_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_linear_hlsl;
blob_t convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl;
blob_t convert_yuv420_packed_uv_type0s_vs_hlsl;
blob_t convert_yuv420_planar_y_ps_hlsl;
blob_t convert_yuv420_planar_y_ps_linear_hlsl;
blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl;
Expand Down Expand Up @@ -488,6 +492,110 @@
frame_texture->AddRef();
output_texture.reset(frame_texture);

HRESULT status = S_OK;

Check warning on line 495 in src/platform/windows/display_vram.cpp

View check run for this annotation

Codecov / codecov/patch

src/platform/windows/display_vram.cpp#L495

Added line #L495 was not covered by tests

#define create_vertex_shader_helper(x, y) \
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
return -1; \
}
#define create_pixel_shader_helper(x, y) \
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
return -1; \
}

const bool downscaling = display->width > width || display->height > height;

switch (format) {
case DXGI_FORMAT_NV12:

Check warning on line 511 in src/platform/windows/display_vram.cpp

View check run for this annotation

Codecov / codecov/patch

src/platform/windows/display_vram.cpp#L511

Added line #L511 was not covered by tests
// Semi-planar 8-bit YUV 4:2:0
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
if (downscaling) {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
}
else {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
break;

case DXGI_FORMAT_P010:

Check warning on line 528 in src/platform/windows/display_vram.cpp

View check run for this annotation

Codecov / codecov/patch

src/platform/windows/display_vram.cpp#L528

Added line #L528 was not covered by tests
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
if (downscaling) {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
}
}
else {
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
}
break;

case DXGI_FORMAT_R16_UINT:

Check warning on line 560 in src/platform/windows/display_vram.cpp

View check run for this annotation

Codecov / codecov/patch

src/platform/windows/display_vram.cpp#L560

Added line #L560 was not covered by tests
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;

case DXGI_FORMAT_AYUV:

Check warning on line 572 in src/platform/windows/display_vram.cpp

View check run for this annotation

Codecov / codecov/patch

src/platform/windows/display_vram.cpp#L572

Added line #L572 was not covered by tests
// Packed 8-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
break;

case DXGI_FORMAT_Y410:

Check warning on line 579 in src/platform/windows/display_vram.cpp

View check run for this annotation

Codecov / codecov/patch

src/platform/windows/display_vram.cpp#L579

Added line #L579 was not covered by tests
// Packed 10-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;

default:

Check warning on line 591 in src/platform/windows/display_vram.cpp

View check run for this annotation

Codecov / codecov/patch

src/platform/windows/display_vram.cpp#L591

Added line #L591 was not covered by tests
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
return -1;

Check warning on line 593 in src/platform/windows/display_vram.cpp

View check run for this annotation

Codecov / codecov/patch

src/platform/windows/display_vram.cpp#L593

Added line #L593 was not covered by tests
}

#undef create_vertex_shader_helper
#undef create_pixel_shader_helper

auto out_width = width;
auto out_height = height;

Expand Down Expand Up @@ -676,83 +784,6 @@
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
}

#define create_vertex_shader_helper(x, y) \
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
return -1; \
}
#define create_pixel_shader_helper(x, y) \
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
return -1; \
}

switch (format) {
case DXGI_FORMAT_NV12:
// Semi-planar 8-bit YUV 4:2:0
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
break;

case DXGI_FORMAT_P010:
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
break;

case DXGI_FORMAT_R16_UINT:
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;

case DXGI_FORMAT_AYUV:
// Packed 8-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
break;

case DXGI_FORMAT_Y410:
// Packed 10-bit YUV 4:4:4
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
if (display->is_hdr()) {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
}
break;

default:
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
return -1;
}

#undef create_vertex_shader_helper
#undef create_pixel_shader_helper

auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
if (!default_color_vectors) {
BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
Expand Down Expand Up @@ -1923,6 +1954,10 @@
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer);
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear);
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer);
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear);
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset.x, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "include/convert_base.hlsl"

#define LEFT_SUBSAMPLING_SCALE

#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "include/convert_linear_base.hlsl"

#define LEFT_SUBSAMPLING_SCALE

#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "include/convert_perceptual_quantizer_base.hlsl"

#define LEFT_SUBSAMPLING_SCALE

#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
cbuffer subsample_offset_cbuffer : register(b0) {
float2 subsample_offset;
};

cbuffer rotate_texture_steps_cbuffer : register(b1) {
int rotate_texture_steps;
};

#define LEFT_SUBSAMPLING_SCALE
#include "include/base_vs.hlsl"

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cbuffer color_matrix_cbuffer : register(b3) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps);
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, float2(0, 0), rotate_texture_steps);

output.viewport = vertex_id / 3;

Expand Down
2 changes: 1 addition & 1 deletion src_assets/windows/assets/shaders/directx/cursor_vs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b2) {

vertex_t main_vs(uint vertex_id : SV_VertexID)
{
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
}
23 changes: 15 additions & 8 deletions src_assets/windows/assets/shaders/directx/include/base_vs.hlsl
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
#include "include/base_vs_types.hlsl"

#if defined(LEFT_SUBSAMPLING)
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float subsample_offset, int rotate_texture_steps)
#elif defined(TOPLEFT_SUBSAMPLING)
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 subsample_offset, int rotate_texture_steps)
#else
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_steps)
#endif
{
vertex_t output;
float2 tex_coord;
Expand All @@ -30,11 +24,24 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_
sin(rotation_radians), cos(rotation_radians) };
float2 rotation_center = { 0.5, 0.5 };
tex_coord = round(rotation_center + mul(rotation_matrix, tex_coord - rotation_center));

if (rotate_texture_steps % 2) {
subsample_offset.xy = subsample_offset.yx;
}
}

#if defined(LEFT_SUBSAMPLING)
output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset, tex_coord.y);
#elif defined (TOPLEFT_SUBSAMPLING)
output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y);
#elif defined(LEFT_SUBSAMPLING_SCALE)
float2 halfsample_offset = subsample_offset / 2;
float3 right_center_left = float3(tex_coord.x + halfsample_offset.x,
tex_coord.x - halfsample_offset.x,
tex_coord.x - 3 * halfsample_offset.x);
float2 top_bottom = float2(tex_coord.y - halfsample_offset.y,
tex_coord.y + halfsample_offset.y);
output.tex_right_center_left_top = float4(right_center_left, top_bottom.x);
output.tex_right_center_left_bottom = float4(right_center_left, top_bottom.y);
#elif defined(TOPLEFT_SUBSAMPLING)
output.tex_right_left_top = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y - subsample_offset.y);
output.tex_right_left_bottom = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y);
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ struct vertex_t
float4 viewpoint_pos : SV_Position;
#if defined(LEFT_SUBSAMPLING)
float3 tex_right_left_center : TEXCOORD;
#elif defined (TOPLEFT_SUBSAMPLING)
float3 tex_right_left_top : TEXCOORD;
float3 tex_right_left_bottom : TEXCOORD;
#elif defined(LEFT_SUBSAMPLING_SCALE)
float4 tex_right_center_left_top : TEXCOORD0;
float4 tex_right_center_left_bottom : TEXCOORD1;
#elif defined(TOPLEFT_SUBSAMPLING)
float3 tex_right_left_top : TEXCOORD0;
float3 tex_right_left_bottom : TEXCOORD1;
#else
float2 tex_coord : TEXCOORD;
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ float2 main_ps(vertex_t input) : SV_Target
float3 rgb_left = image.Sample(def_sampler, input.tex_right_left_center.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, input.tex_right_left_center.yz).rgb;
float3 rgb = CONVERT_FUNCTION((rgb_left + rgb_right) * 0.5);
#elif defined(LEFT_SUBSAMPLING_SCALE)
float3 rgb = image.Sample(def_sampler, input.tex_right_center_left_top.yw).rgb; // top-center
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.yw).rgb; // bottom-center
rgb *= 2;
rgb += image.Sample(def_sampler, input.tex_right_center_left_top.xw).rgb; // top-right
rgb += image.Sample(def_sampler, input.tex_right_center_left_top.zw).rgb; // top-left
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.xw).rgb; // bottom-right
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.zw).rgb; // bottom-left
rgb = CONVERT_FUNCTION(rgb * (1./8));
#elif defined(TOPLEFT_SUBSAMPLING)
float3 rgb_top_left = image.Sample(def_sampler, input.tex_right_left_top.xz).rgb;
float3 rgb_top_right = image.Sample(def_sampler, input.tex_right_left_top.yz).rgb;
Expand Down
Loading