From 2490f8316ad5c01f447dfaf28da375ffbcf78de2 Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Tue, 13 Aug 2024 20:10:14 +0300 Subject: [PATCH 1/5] Don't use sampler math for chroma if downscaling --- src/platform/windows/display_vram.cpp | 189 +++++++++++------- .../convert_yuv420_packed_uv_type0s_ps.hlsl | 5 + ...ert_yuv420_packed_uv_type0s_ps_linear.hlsl | 5 + ...ked_uv_type0s_ps_perceptual_quantizer.hlsl | 5 + .../convert_yuv420_packed_uv_type0s_vs.hlsl | 15 ++ .../shaders/directx/include/base_vs.hlsl | 12 +- .../directx/include/base_vs_types.hlsl | 9 +- .../convert_yuv420_packed_uv_ps_base.hlsl | 8 + 8 files changed, 167 insertions(+), 81 deletions(-) create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_linear.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 96ddff84258..4d40a24b4a3 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -107,6 +107,10 @@ namespace platf::dxgi { blob_t convert_yuv420_packed_uv_type0_ps_linear_hlsl; blob_t convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl; blob_t convert_yuv420_packed_uv_type0_vs_hlsl; + blob_t convert_yuv420_packed_uv_type0s_ps_hlsl; + blob_t convert_yuv420_packed_uv_type0s_ps_linear_hlsl; + blob_t convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl; + blob_t convert_yuv420_packed_uv_type0s_vs_hlsl; blob_t convert_yuv420_planar_y_ps_hlsl; blob_t convert_yuv420_planar_y_ps_linear_hlsl; blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl; @@ -488,6 +492,110 @@ namespace platf::dxgi { frame_texture->AddRef(); output_texture.reset(frame_texture); + HRESULT status = S_OK; + +#define create_vertex_shader_helper(x, y) \ + if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ + BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \ + return -1; \ + } +#define create_pixel_shader_helper(x, y) \ + if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ + BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \ + return -1; \ + } + + const bool downscaling = display->width != width || display->height != height; + + switch (format) { + case DXGI_FORMAT_NV12: + // Semi-planar 8-bit YUV 4:2:0 + create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + if (downscaling) { + create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps); + } + else { + create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + } + break; + + case DXGI_FORMAT_P010: + // Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value + create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + } + if (downscaling) { + create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps); + } + } + else { + create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + } + } + break; + + case DXGI_FORMAT_R16_UINT: + // Planar 16-bit YUV 4:4:4, 10 most significant bits store the value + create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + } + break; + + case DXGI_FORMAT_AYUV: + // Packed 8-bit YUV 4:4:4 + create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps); + create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + break; + + case DXGI_FORMAT_Y410: + // Packed 10-bit YUV 4:4:4 + create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + } + break; + + default: + BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format"; + return -1; + } + +#undef create_vertex_shader_helper +#undef create_pixel_shader_helper + auto out_width = width; auto out_height = height; @@ -676,83 +784,6 @@ namespace platf::dxgi { BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance."; } -#define create_vertex_shader_helper(x, y) \ - if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ - BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \ - return -1; \ - } -#define create_pixel_shader_helper(x, y) \ - if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ - BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \ - return -1; \ - } - - switch (format) { - case DXGI_FORMAT_NV12: - // Semi-planar 8-bit YUV 4:2:0 - create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); - break; - - case DXGI_FORMAT_P010: - // Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value - create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); - create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); - if (display->is_hdr()) { - create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); - } - else { - create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); - } - break; - - case DXGI_FORMAT_R16_UINT: - // Planar 16-bit YUV 4:4:4, 10 most significant bits store the value - create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps); - if (display->is_hdr()) { - create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); - } - else { - create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - } - break; - - case DXGI_FORMAT_AYUV: - // Packed 8-bit YUV 4:4:4 - create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps); - create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - break; - - case DXGI_FORMAT_Y410: - // Packed 10-bit YUV 4:4:4 - create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps); - if (display->is_hdr()) { - create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); - } - else { - create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - } - break; - - default: - BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format"; - return -1; - } - -#undef create_vertex_shader_helper -#undef create_pixel_shader_helper - auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false); if (!default_color_vectors) { BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv; @@ -1923,6 +1954,10 @@ namespace platf::dxgi { compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear); compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer); compile_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs); + compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps); + compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear); + compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer); + compile_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs); compile_pixel_shader_helper(convert_yuv420_planar_y_ps); compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear); compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer); diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps.hlsl new file mode 100644 index 00000000000..73fd423c415 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_base.hlsl" + +#define LEFT_SUBSAMPLING_SCALE + +#include "include/convert_yuv420_packed_uv_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_linear.hlsl new file mode 100644 index 00000000000..c451dc19d4a --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_linear.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_linear_base.hlsl" + +#define LEFT_SUBSAMPLING_SCALE + +#include "include/convert_yuv420_packed_uv_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer.hlsl new file mode 100644 index 00000000000..9156257f1f1 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_perceptual_quantizer_base.hlsl" + +#define LEFT_SUBSAMPLING_SCALE + +#include "include/convert_yuv420_packed_uv_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl new file mode 100644 index 00000000000..c6df6b49bad --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl @@ -0,0 +1,15 @@ +cbuffer subsample_offset_cbuffer : register(b0) { + float2 subsample_offset; +}; + +cbuffer rotate_texture_steps_cbuffer : register(b1) { + int rotate_texture_steps; +}; + +#define LEFT_SUBSAMPLING_SCALE +#include "include/base_vs.hlsl" + +vertex_t main_vs(uint vertex_id : SV_VertexID) +{ + return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset / 2, rotate_texture_steps); +} diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl index c39e7c6f80b..287d252be83 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl @@ -2,6 +2,8 @@ #if defined(LEFT_SUBSAMPLING) vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float subsample_offset, int rotate_texture_steps) +#elif defined(LEFT_SUBSAMPLING_SCALE) +vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 halfsample_offset, int rotate_texture_steps) #elif defined(TOPLEFT_SUBSAMPLING) vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 subsample_offset, int rotate_texture_steps) #else @@ -34,7 +36,15 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_ #if defined(LEFT_SUBSAMPLING) output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset, tex_coord.y); -#elif defined (TOPLEFT_SUBSAMPLING) +#elif defined(LEFT_SUBSAMPLING_SCALE) + float3 right_center_left = float3(tex_coord.x + halfsample_offset.x, + tex_coord.x - halfsample_offset.x, + tex_coord.x - 3 * halfsample_offset.x); + float2 top_bottom = float2(tex_coord.y - halfsample_offset.y, + tex_coord.y + halfsample_offset.y); + output.tex_right_center_left_top = float4(right_center_left, top_bottom.x); + output.tex_right_center_left_bottom = float4(right_center_left, top_bottom.y); +#elif defined(TOPLEFT_SUBSAMPLING) output.tex_right_left_top = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y - subsample_offset.y); output.tex_right_left_bottom = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y); #else diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl index cf755c5a0ff..fabc52bb777 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl @@ -3,9 +3,12 @@ struct vertex_t float4 viewpoint_pos : SV_Position; #if defined(LEFT_SUBSAMPLING) float3 tex_right_left_center : TEXCOORD; -#elif defined (TOPLEFT_SUBSAMPLING) - float3 tex_right_left_top : TEXCOORD; - float3 tex_right_left_bottom : TEXCOORD; +#elif defined(LEFT_SUBSAMPLING_SCALE) + float4 tex_right_center_left_top : TEXCOORD0; + float4 tex_right_center_left_bottom : TEXCOORD1; +#elif defined(TOPLEFT_SUBSAMPLING) + float3 tex_right_left_top : TEXCOORD0; + float3 tex_right_left_bottom : TEXCOORD1; #else float2 tex_coord : TEXCOORD; #endif diff --git a/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl b/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl index c21dccd7ed2..ad69c2ac0ef 100644 --- a/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl @@ -17,6 +17,14 @@ float2 main_ps(vertex_t input) : SV_Target float3 rgb_left = image.Sample(def_sampler, input.tex_right_left_center.xz).rgb; float3 rgb_right = image.Sample(def_sampler, input.tex_right_left_center.yz).rgb; float3 rgb = CONVERT_FUNCTION((rgb_left + rgb_right) * 0.5); +#elif defined(LEFT_SUBSAMPLING_SCALE) + float3 rgb = image.Sample(def_sampler, input.tex_right_center_left_top.xw).rgb; // top-right + rgb += image.Sample(def_sampler, input.tex_right_center_left_top.yw).rgb; // top-center + rgb += image.Sample(def_sampler, input.tex_right_center_left_top.zw).rgb; // top-left + rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.xw).rgb; // bottom-right + rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.yw).rgb; // bottom-center + rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.zw).rgb; // bottom-left + rgb = CONVERT_FUNCTION(rgb * (1./6)); #elif defined(TOPLEFT_SUBSAMPLING) float3 rgb_top_left = image.Sample(def_sampler, input.tex_right_left_top.xz).rgb; float3 rgb_top_right = image.Sample(def_sampler, input.tex_right_left_top.yz).rgb; From f690ced1c6e860af425f28ac4ff206fc3671e663 Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Sat, 31 Aug 2024 13:16:39 +0300 Subject: [PATCH 2/5] Update src/platform/windows/display_vram.cpp --- src/platform/windows/display_vram.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 4d40a24b4a3..ba2b0685187 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -505,7 +505,7 @@ namespace platf::dxgi { return -1; \ } - const bool downscaling = display->width != width || display->height != height; + const bool downscaling = display->width > width || display->height > height; switch (format) { case DXGI_FORMAT_NV12: From d617df8adcb846d8486964aa0226ad5ff214b665 Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Sat, 31 Aug 2024 13:16:45 +0300 Subject: [PATCH 3/5] Update src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl --- .../include/convert_yuv420_packed_uv_ps_base.hlsl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl b/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl index ad69c2ac0ef..17924fc8f00 100644 --- a/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl @@ -18,13 +18,14 @@ float2 main_ps(vertex_t input) : SV_Target float3 rgb_right = image.Sample(def_sampler, input.tex_right_left_center.yz).rgb; float3 rgb = CONVERT_FUNCTION((rgb_left + rgb_right) * 0.5); #elif defined(LEFT_SUBSAMPLING_SCALE) - float3 rgb = image.Sample(def_sampler, input.tex_right_center_left_top.xw).rgb; // top-right - rgb += image.Sample(def_sampler, input.tex_right_center_left_top.yw).rgb; // top-center + float3 rgb = image.Sample(def_sampler, input.tex_right_center_left_top.yw).rgb; // top-center + rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.yw).rgb; // bottom-center + rgb *= 2; + rgb += image.Sample(def_sampler, input.tex_right_center_left_top.xw).rgb; // top-right rgb += image.Sample(def_sampler, input.tex_right_center_left_top.zw).rgb; // top-left rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.xw).rgb; // bottom-right - rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.yw).rgb; // bottom-center rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.zw).rgb; // bottom-left - rgb = CONVERT_FUNCTION(rgb * (1./6)); + rgb = CONVERT_FUNCTION(rgb * (1./12)); #elif defined(TOPLEFT_SUBSAMPLING) float3 rgb_top_left = image.Sample(def_sampler, input.tex_right_left_top.xz).rgb; float3 rgb_top_right = image.Sample(def_sampler, input.tex_right_left_top.yz).rgb; From 77c1dc6fda105434858244eb7969a7874e3fb8ea Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Sat, 31 Aug 2024 14:09:50 +0300 Subject: [PATCH 4/5] Update src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl --- .../directx/include/convert_yuv420_packed_uv_ps_base.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl b/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl index 17924fc8f00..190c7a0ea14 100644 --- a/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl @@ -25,7 +25,7 @@ float2 main_ps(vertex_t input) : SV_Target rgb += image.Sample(def_sampler, input.tex_right_center_left_top.zw).rgb; // top-left rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.xw).rgb; // bottom-right rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.zw).rgb; // bottom-left - rgb = CONVERT_FUNCTION(rgb * (1./12)); + rgb = CONVERT_FUNCTION(rgb * (1./8)); #elif defined(TOPLEFT_SUBSAMPLING) float3 rgb_top_left = image.Sample(def_sampler, input.tex_right_left_top.xz).rgb; float3 rgb_top_right = image.Sample(def_sampler, input.tex_right_left_top.yz).rgb; From bc2bd06d51bc6e0bbc42d756ee899997cae35e04 Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Sat, 31 Aug 2024 15:10:46 +0300 Subject: [PATCH 5/5] Correct portrait rotation offsets --- .../convert_yuv420_packed_uv_type0_vs.hlsl | 2 +- .../convert_yuv420_packed_uv_type0s_vs.hlsl | 2 +- .../directx/convert_yuv420_planar_y_vs.hlsl | 2 +- .../shaders/directx/convert_yuv444_packed_vs.hlsl | 2 +- .../shaders/directx/convert_yuv444_planar_vs.hlsl | 2 +- .../windows/assets/shaders/directx/cursor_vs.hlsl | 2 +- .../assets/shaders/directx/include/base_vs.hlsl | 15 ++++++--------- 7 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0_vs.hlsl index eb9068b4187..fcbe2774475 100644 --- a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0_vs.hlsl @@ -11,5 +11,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) { vertex_t main_vs(uint vertex_id : SV_VertexID) { - return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset.x, rotate_texture_steps); + return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps); } diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl index c6df6b49bad..fec99ca34aa 100644 --- a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl @@ -11,5 +11,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) { vertex_t main_vs(uint vertex_id : SV_VertexID) { - return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset / 2, rotate_texture_steps); + return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps); } diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_planar_y_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_planar_y_vs.hlsl index 33e481453ed..f9c774eb5cf 100644 --- a/src_assets/windows/assets/shaders/directx/convert_yuv420_planar_y_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_planar_y_vs.hlsl @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) { vertex_t main_vs(uint vertex_id : SV_VertexID) { - return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps); + return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps); } diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl index 33e481453ed..f9c774eb5cf 100644 --- a/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) { vertex_t main_vs(uint vertex_id : SV_VertexID) { - return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps); + return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps); } diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl index 566da5d8cf8..e8fa1370167 100644 --- a/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl @@ -15,7 +15,7 @@ cbuffer color_matrix_cbuffer : register(b3) { vertex_t main_vs(uint vertex_id : SV_VertexID) { - vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps); + vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, float2(0, 0), rotate_texture_steps); output.viewport = vertex_id / 3; diff --git a/src_assets/windows/assets/shaders/directx/cursor_vs.hlsl b/src_assets/windows/assets/shaders/directx/cursor_vs.hlsl index cf737ede7e4..2d62120ccf8 100644 --- a/src_assets/windows/assets/shaders/directx/cursor_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/cursor_vs.hlsl @@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b2) { vertex_t main_vs(uint vertex_id : SV_VertexID) { - return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps); + return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps); } diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl index 287d252be83..8e6f2d92431 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl @@ -1,14 +1,6 @@ #include "include/base_vs_types.hlsl" -#if defined(LEFT_SUBSAMPLING) -vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float subsample_offset, int rotate_texture_steps) -#elif defined(LEFT_SUBSAMPLING_SCALE) -vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 halfsample_offset, int rotate_texture_steps) -#elif defined(TOPLEFT_SUBSAMPLING) vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 subsample_offset, int rotate_texture_steps) -#else -vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_steps) -#endif { vertex_t output; float2 tex_coord; @@ -32,11 +24,16 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_ sin(rotation_radians), cos(rotation_radians) }; float2 rotation_center = { 0.5, 0.5 }; tex_coord = round(rotation_center + mul(rotation_matrix, tex_coord - rotation_center)); + + if (rotate_texture_steps % 2) { + subsample_offset.xy = subsample_offset.yx; + } } #if defined(LEFT_SUBSAMPLING) - output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset, tex_coord.y); + output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y); #elif defined(LEFT_SUBSAMPLING_SCALE) + float2 halfsample_offset = subsample_offset / 2; float3 right_center_left = float3(tex_coord.x + halfsample_offset.x, tex_coord.x - halfsample_offset.x, tex_coord.x - 3 * halfsample_offset.x);