From 5a809c5f724514baa377a03089a50f917f73669b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 17 Dec 2025 09:29:44 +0300 Subject: [PATCH] rsx/fp: Stop using m_ctrl and use the correct object from m_prog --- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 20 +++++++-------- .../RSX/Program/FragmentProgramDecompiler.cpp | 5 ++-- .../RSX/Program/FragmentProgramDecompiler.h | 1 - rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 25 ++++++++++--------- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 66f062c323..6ef26db1e0 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -93,13 +93,13 @@ void GLFragmentDecompilerThread::insertOutputs(std::stringstream & OS) { const std::pair table[] = { - { "ocol0", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r0" : "h0" }, - { "ocol1", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r2" : "h4" }, - { "ocol2", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r3" : "h6" }, - { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, + { "ocol0", m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r0" : "h0" }, + { "ocol1", m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r2" : "h4" }, + { "ocol2", m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r3" : "h6" }, + { "ocol3", m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, }; - const bool float_type = (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) || !device_props.has_native_half_support; + const bool float_type = (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) || !device_props.has_native_half_support; const auto reg_type = float_type ? "vec4" : getHalfTypeName(4); for (uint i = 0; i < std::size(table); ++i) { @@ -244,7 +244,7 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) { std::set output_registers; - if (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) + if (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) { output_registers = { "r0", "r2", "r3", "r4" }; } @@ -253,7 +253,7 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) output_registers = { "h0", "h4", "h6", "h8" }; } - if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + if (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { output_registers.insert("r1"); } @@ -335,8 +335,8 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "\n" << " fs_main();\n\n"; - if ((m_ctrl & RSX_SHADER_CONTROL_DISABLE_EARLY_Z) && - !(m_ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) && + if ((m_prog.ctrl & RSX_SHADER_CONTROL_DISABLE_EARLY_Z) && + !(m_prog.ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) && g_cfg.video.shader_precision != gpu_preset_level::low) { // This is effectively unreachable code, but good enough to trick the GPU to skip early Z @@ -348,7 +348,7 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) glsl::insert_rop(OS, m_shader_props); - if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + if (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { if (m_parr.HasParam(PF_PARAM_NONE, "vec4", "r1")) { diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index 1ba30e45b3..8c5c163f06 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -84,7 +84,6 @@ std::vector get_fragment_program_output_set(u32 ctrl, u32 mrt_count FragmentProgramDecompiler::FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size) : m_size(size) , m_prog(prog) - , m_ctrl(prog.ctrl) { m_size = 0; } @@ -805,7 +804,7 @@ std::string FragmentProgramDecompiler::BuildCode() // Shader validation // Shader must at least write to one output for the body to be considered valid - const bool fp16_out = !(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); + const bool fp16_out = !(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); const std::string float4_type = (fp16_out && device_props.has_native_half_support)? getHalfTypeName(4) : getFloatTypeName(4); const std::string init_value = float4_type + "(0.)"; std::array output_register_names; @@ -814,7 +813,7 @@ std::string FragmentProgramDecompiler::BuildCode() std::stringstream main_epilogue; // Check depth export - if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + if (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { // Hw tests show that the depth export register is default-initialized to 0 and not wpos.z!! m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "r1", init_value); diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index 09a02804c3..8e22a21a93 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -99,7 +99,6 @@ class FragmentProgramDecompiler protected: const RSXFragmentProgram &m_prog; - u32 m_ctrl = 0; /** returns the type name of float vectors. */ diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 0e8e318464..8ecc427f73 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -144,15 +144,15 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) { const std::pair table[] = { - { "ocol0", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r0" : "h0" }, - { "ocol1", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r2" : "h4" }, - { "ocol2", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r3" : "h6" }, - { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, + { "ocol0", m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r0" : "h0" }, + { "ocol1", m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r2" : "h4" }, + { "ocol2", m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r3" : "h6" }, + { "ocol3", m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, }; // NOTE: We do not skip outputs, the only possible combinations are a(0), b(0), ab(0,1), abc(0,1,2), abcd(0,1,2,3) u8 output_index = 0; - const bool float_type = (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) || !device_props.has_native_half_support; + const bool float_type = (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) || !device_props.has_native_half_support; const auto reg_type = float_type ? "vec4" : getHalfTypeName(4); for (uint i = 0; i < std::size(table); ++i) { @@ -363,7 +363,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) { std::set output_registers; - if (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) + if (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) { output_registers = { "r0", "r2", "r3", "r4" }; } @@ -372,7 +372,7 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) output_registers = { "h0", "h4", "h6", "h8" }; } - if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + if (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { output_registers.insert("r1"); } @@ -471,20 +471,21 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "\n" << " fs_main();\n\n"; - if ((m_ctrl & RSX_SHADER_CONTROL_DISABLE_EARLY_Z) && - !(m_ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) && + if ((m_prog.ctrl & RSX_SHADER_CONTROL_DISABLE_EARLY_Z) && + !(m_prog.ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) && g_cfg.video.shader_precision != gpu_preset_level::low) { // This is effectively unreachable code, but good enough to trick the GPU to skip early Z OS << "// Insert NOP sequence to disable early-Z\n" - "if (isnan(gl_FragCoord.z))\n" - " discard;\n\n"; + " const uint rop_control = fs_contexts[_fs_context_offset].rop_control;\n" + " if (_test_bit(rop_control, 0))\n" + " discard;\n\n"; } glsl::insert_rop(OS, m_shader_props); - if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + if (m_prog.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { if (m_parr.HasParam(PF_PARAM_NONE, "vec4", "r1")) {