diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 3df49136f..075279848 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -208,10 +208,11 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id ext_inst_glsl_std_450); // Converts the depth value externally clamped to the representable [0, 2) // range to 20e4 floating point, with zeros in bits 24:31, rounding to the - // nearest even. If remap_from_0_to_0_5 is true, it's assumed that 0...1 is - // pre-remapped to 0...0.5 in the input. + // nearest even or towards zero. If remap_from_0_to_0_5 is true, it's assumed + // that 0...1 is pre-remapped to 0...0.5 in the input. static spv::Id PreClampedDepthTo20e4(spv::Builder& builder, spv::Id f32_scalar, + bool round_to_nearest_even, bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450); // Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index 4cb260bdd..8282016b5 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -230,8 +230,8 @@ spv::Id SpirvShaderTranslator::Float7e3To32(spv::Builder& builder, } spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4( - spv::Builder& builder, spv::Id f32_scalar, bool remap_from_0_to_0_5, - spv::Id ext_inst_glsl_std_450) { + spv::Builder& builder, spv::Id f32_scalar, bool round_to_nearest_even, + bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450) { // CFloat24 from d3dref9.dll + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp // Assuming the value is already clamped to [0, 2) (in all places, the depth @@ -305,18 +305,20 @@ spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4( builder.makeUintConstant(0x38800000 - (remap_bias << 23))), denormal_biased_f32, normal_biased_f32); - // Build the 20e4 number rounding to the nearest even. - // ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF - return builder.createTriOp( - spv::OpBitFieldUExtract, type_uint, - builder.createBinOp( - spv::OpIAdd, type_uint, - builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, - builder.makeUintConstant(3)), - builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, - builder.makeUintConstant(3), - builder.makeUintConstant(1))), - builder.makeUintConstant(3), builder.makeUintConstant(24)); + // Build the 20e4 number rounding to the nearest even or towards zero. + if (round_to_nearest_even) { + // biased_f32 += 3 + ((biased_f32 >> 3) & 1) + biased_f32 = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, + builder.makeUintConstant(3)), + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(3), + builder.makeUintConstant(1))); + } + return builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(3), + builder.makeUintConstant(24)); } spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder, diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 4d021ca7a..4d8545fd0 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -416,6 +416,8 @@ bool VulkanRenderTargetCache::Initialize() { // TODO(Triang3l): All paths (FSI). + depth_float24_round_ = cvars::depth_float24_round; + // TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in // transfers. if (cvars::native_2x_msaa) { @@ -3037,7 +3039,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( } break; case xenos::DepthRenderTargetFormat::kD24FS8: { depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( - builder, source_depth_float[i], true, ext_inst_glsl_std_450); + builder, source_depth_float[i], depth_float24_round(), true, + ext_inst_glsl_std_450); } break; } // Merge depth and stencil. @@ -3353,7 +3356,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( } break; case xenos::DepthRenderTargetFormat::kD24FS8: { packed = SpirvShaderTranslator::PreClampedDepthTo20e4( - builder, source_depth_float[0], true, ext_inst_glsl_std_450); + builder, source_depth_float[0], depth_float24_round(), true, + ext_inst_glsl_std_450); } break; } if (mode.output == TransferOutput::kDepth) { @@ -3855,7 +3859,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( } break; case xenos::DepthRenderTargetFormat::kD24FS8: { host_depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( - builder, host_depth32, true, ext_inst_glsl_std_450); + builder, host_depth32, depth_float24_round(), true, + ext_inst_glsl_std_450); } break; } assert_true(host_depth24 != spv::NoResult); @@ -5548,7 +5553,8 @@ VkPipeline VulkanRenderTargetCache::GetDumpPipeline(DumpPipelineKey key) { } break; case xenos::DepthRenderTargetFormat::kD24FS8: { packed[0] = SpirvShaderTranslator::PreClampedDepthTo20e4( - builder, source_depth32, true, ext_inst_glsl_std_450); + builder, source_depth32, depth_float24_round(), true, + ext_inst_glsl_std_450); } break; } id_vector_temp.clear(); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index 10b2c1aed..2857fde1f 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -128,6 +128,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache { return last_update_framebuffer_; } + bool depth_float24_round() const { return depth_float24_round_; } + bool msaa_2x_attachments_supported() const { return msaa_2x_attachments_supported_; } @@ -824,6 +826,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache { bool gamma_render_target_as_srgb_ = false; + bool depth_float24_round_ = false; + bool msaa_2x_attachments_supported_ = false; bool msaa_2x_no_attachments_supported_ = false;