From e0e0d1ee0bfd27810495c6f28fbb2b2432d9e834 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 27 Jul 2025 02:36:58 +0300 Subject: [PATCH] rsx/vk: Convert fragment constants from static struct to array of vec4 - Also allows vulkan to use full-range binding model --- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 82 +++++++------------ rpcs3/Emu/RSX/GL/GLFragmentProgram.h | 2 +- .../RSX/Program/FragmentProgramDecompiler.cpp | 26 +++--- .../RSX/Program/FragmentProgramDecompiler.h | 3 + rpcs3/Emu/RSX/Program/ProgramStateCache.cpp | 8 +- rpcs3/Emu/RSX/Program/ProgramStateCache.h | 6 +- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 72 +++++++--------- rpcs3/Emu/RSX/VK/VKFragmentProgram.h | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 21 +++-- rpcs3/Emu/RSX/VK/VKGSRender.h | 1 + 10 files changed, 91 insertions(+), 132 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 3c95d571d3..d251b425a9 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -158,50 +158,38 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << "\n"; - std::string constants_block; - for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + if (!properties.constant_offsets.empty()) { - if (PT.type == "sampler1D" || - PT.type == "sampler2D" || - PT.type == "sampler3D" || - PT.type == "samplerCube") - continue; - - for (const ParamItem& PI : PT.items) - { - constants_block += " " + PT.type + " " + PI.name + ";\n"; - } + OS << + "layout(std140, binding = " << GL_FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT << ") uniform FragmentConstantsBuffer\n" + "{\n" + " vec4 fc[" << properties.constant_offsets.size() << "];\n" + "};\n" + "#define _fetch_constant(x) fc[x]\n\n"; } - if (!constants_block.empty()) - { - OS << "layout(std140, binding = " << GL_FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT << ") uniform FragmentConstantsBuffer\n"; - OS << "{\n"; - OS << constants_block; - OS << "};\n\n"; - } + OS << + "layout(std140, binding = " << GL_FRAGMENT_STATE_BIND_SLOT << ") uniform FragmentStateBuffer\n" + "{\n" + " float fog_param0;\n" + " float fog_param1;\n" + " uint rop_control;\n" + " float alpha_ref;\n" + " uint reserved;\n" + " uint fog_mode;\n" + " float wpos_scale;\n" + " float wpos_bias;\n" + "};\n\n" - OS << "layout(std140, binding = " << GL_FRAGMENT_STATE_BIND_SLOT << ") uniform FragmentStateBuffer\n"; - OS << "{\n"; - OS << " float fog_param0;\n"; - OS << " float fog_param1;\n"; - OS << " uint rop_control;\n"; - OS << " float alpha_ref;\n"; - OS << " uint reserved;\n"; - OS << " uint fog_mode;\n"; - OS << " float wpos_scale;\n"; - OS << " float wpos_bias;\n"; - OS << "};\n\n"; + "layout(std140, binding = " << GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT << ") uniform TextureParametersBuffer\n" + "{\n" + " sampler_info texture_parameters[16];\n" + "};\n\n" - OS << "layout(std140, binding = " << GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT << ") uniform TextureParametersBuffer\n"; - OS << "{\n"; - OS << " sampler_info texture_parameters[16];\n"; - OS << "};\n\n"; - - OS << "layout(std140, binding = " << GL_RASTERIZER_STATE_BIND_SLOT << ") uniform RasterizerHeap\n"; - OS << "{\n"; - OS << " uvec4 stipple_pattern[8];\n"; - OS << "};\n\n"; + "layout(std140, binding = " << GL_RASTERIZER_STATE_BIND_SLOT << ") uniform RasterizerHeap\n"; + "{\n"; + " uvec4 stipple_pattern[8];\n"; + "};\n\n"; } void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) @@ -373,21 +361,7 @@ void GLFragmentProgram::Decompile(const RSXFragmentProgram& prog) decompiler.Task(); - for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM]) - { - for (const ParamItem& PI : PT.items) - { - if (PT.type == "sampler1D" || - PT.type == "sampler2D" || - PT.type == "sampler3D" || - PT.type == "samplerCube") - continue; - - usz offset = atoi(PI.name.c_str() + 2); - FragmentConstantOffsetCache.push_back(offset); - } - } - + constant_offsets = std::move(decompiler.properties.constant_offsets); shader.create(::glsl::program_domain::glsl_fragment_program, source); id = shader.id(); } diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.h b/rpcs3/Emu/RSX/GL/GLFragmentProgram.h index c6b5244389..e8fc2ed6f1 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.h +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.h @@ -58,7 +58,7 @@ public: ParamArray parr; u32 id; gl::glsl::shader shader; - std::vector FragmentConstantOffsetCache; + std::vector constant_offsets; /** * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index 43e4d866a5..f7c2d57f76 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -232,23 +232,13 @@ std::string FragmentProgramDecompiler::AddCond() std::string FragmentProgramDecompiler::AddConst() { - const std::string name = std::string("fc") + std::to_string(m_size + 4 * 4); - const std::string type = getFloatTypeName(4); - - if (m_parr.HasParam(PF_PARAM_UNIFORM, type, name)) - { - return name; - } - - auto data = reinterpret_cast*>(reinterpret_cast(m_prog.get_data()) + m_size + 4 * sizeof(u32)); + // Skip next instruction, its just a literal m_offset = 2 * 4 * sizeof(u32); - u32 x = GetData(data[0]); - u32 y = GetData(data[1]); - u32 z = GetData(data[2]); - u32 w = GetData(data[3]); - const auto var = fmt::format("%s(%f, %f, %f, %f)", type, std::bit_cast(x), std::bit_cast(y), std::bit_cast(z), std::bit_cast(w)); - return m_parr.AddParam(PF_PARAM_UNIFORM, type, name, var); + // Return the next offset index + const u32 index = ::size32(properties.constant_offsets); + properties.constant_offsets.push_back(m_size + 4 * 4); + return "_fetch_constant(" + std::to_string(index) + ")"; } std::string FragmentProgramDecompiler::AddTex() @@ -847,6 +837,12 @@ std::string FragmentProgramDecompiler::BuildCode() } } + if (!properties.constant_offsets.empty()) + { + const std::string var_name = fmt::format("fc[%llu]", properties.constant_offsets.size()); + m_parr.AddParam(PF_PARAM_CONST, getFloatTypeName(4), var_name); + } + std::stringstream OS; if (!m_is_valid_ucode) diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index dab539f9da..c89eb9db22 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -189,6 +189,9 @@ public: bool has_tex2D = false; bool has_tex3D = false; bool has_texShadowProj = false; + + // Literal offsets + std::vector constant_offsets; } properties; diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp index 67cd710a2c..45b2da4af6 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp @@ -782,10 +782,10 @@ bool fragment_program_compare::compare_properties(const RSXFragmentProgram& bina namespace rsx { #if defined(ARCH_X64) || defined(ARCH_ARM64) - static inline void write_fragment_constants_to_buffer_sse2(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) + static inline void write_fragment_constants_to_buffer_sse2(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) { f32* dst = buffer.data(); - for (usz offset_in_fragment_program : offsets_cache) + for (u32 offset_in_fragment_program : offsets_cache) { const char* data = static_cast(rsx_prog.get_data()) + offset_in_fragment_program; @@ -809,7 +809,7 @@ namespace rsx } } #else - static inline void write_fragment_constants_to_buffer_fallback(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) + static inline void write_fragment_constants_to_buffer_fallback(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) { f32* dst = buffer.data(); @@ -837,7 +837,7 @@ namespace rsx } #endif - void write_fragment_constants_to_buffer(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) + void write_fragment_constants_to_buffer(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) { #if defined(ARCH_X64) || defined(ARCH_ARM64) write_fragment_constants_to_buffer_sse2(buffer, rsx_prog, offsets_cache, sanitize); diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.h b/rpcs3/Emu/RSX/Program/ProgramStateCache.h index 6007ac7f2d..efd5dd326a 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.h @@ -137,7 +137,7 @@ namespace rsx RSXVertexProgram m_cached_vp_properties; }; - void write_fragment_constants_to_buffer(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize = true); + void write_fragment_constants_to_buffer(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize = true); } @@ -447,14 +447,14 @@ public: void fill_fragment_constants_buffer(std::span dst_buffer, const fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize = false) const { - if (dst_buffer.size_bytes() < (fragment_program.FragmentConstantOffsetCache.size() * 16)) + if (dst_buffer.size_bytes() < (fragment_program.constant_offsets.size() * 16)) { // This can happen if CELL alters the shader after it has been loaded by RSX. rsx_log.error("Insufficient constants buffer size passed to fragment program! Corrupt shader?"); return; } - rsx::write_fragment_constants_to_buffer(dst_buffer, rsx_prog, fragment_program.FragmentConstantOffsetCache, sanitize); + rsx::write_fragment_constants_to_buffer(dst_buffer, rsx_prog, fragment_program.constant_offsets, sanitize); } void clear() diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 040d528218..dc92101a2d 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -29,27 +29,19 @@ std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::st void VKFragmentDecompilerThread::prepareBindingTable() { // First check if we have constants and textures as those need extra work - bool has_constants = false, has_textures = false; + bool has_textures = false; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (has_constants && has_textures) - { - break; - } - if (PT.type.starts_with("sampler")) { has_textures = true; - continue; + break; } - - ensure(PT.type.starts_with("vec")); - has_constants = true; } unsigned location = 0; // All bindings must be set from this var vk_prog->binding_table.context_buffer_location = location++; - if (has_constants) + if (!properties.constant_offsets.empty()) { vk_prog->binding_table.cbuf_location = location++; } @@ -233,26 +225,13 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) } } - std::string constants_block; - for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + if (!properties.constant_offsets.empty()) { - if (PT.type.starts_with("sampler")) - { - continue; - } - - for (const ParamItem& PI : PT.items) - { - constants_block += " " + PT.type + " " + PI.name + ";\n"; - } - } - - if (!constants_block.empty()) - { - OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") readonly buffer FragmentConstantsBuffer\n"; OS << "{\n"; - OS << constants_block; - OS << "};\n\n"; + OS << " vec4 fc[];\n"; + OS << "};\n"; + OS << "#define _fetch_constant(x) fc[x + fs_constants_offset]\n\n"; } OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n"; @@ -280,19 +259,20 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) vk::glsl::program_input in { .domain = glsl::glsl_fragment_program, - .type = vk::glsl::input_type_uniform_buffer, .set = vk::glsl::binding_set_index_fragment }; - if (!constants_block.empty()) + if (!properties.constant_offsets.empty()) { in.location = vk_prog->binding_table.cbuf_location; in.name = "FragmentConstantsBuffer"; + in.type = vk::glsl::input_type_storage_buffer, inputs.push_back(in); } in.location = vk_prog->binding_table.context_buffer_location; in.name = "FragmentStateBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; inputs.push_back(in); in.location = vk_prog->binding_table.tex_param_location; @@ -302,6 +282,23 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) in.location = vk_prog->binding_table.polygon_stipple_params_location; in.name = "RasterizerHeap"; inputs.push_back(in); + + OS << + "layout(push_constant) uniform push_constants_block\n" + "{\n" + " uint fs_constants_offset;\n" + "};\n\n"; + + const vk::glsl::program_input push_constants + { + .domain = glsl::glsl_fragment_program, + .type = vk::glsl::input_type_push_constant, + .bound_data = vk::glsl::push_constant_ref{.offset = 12, .size = 4 }, + .set = vk::glsl::binding_set_index_vertex, + .location = umax, + .name = "fs_push_constants_block" + }; + inputs.push_back(push_constants); } void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) @@ -478,19 +475,8 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) decompiler.device_props.has_low_precision_rounding = vk::is_NVIDIA(vk::get_driver_vendor()); decompiler.Task(); + constant_offsets = std::move(decompiler.properties.constant_offsets); shader.create(::glsl::program_domain::glsl_fragment_program, source); - - for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM]) - { - for (const ParamItem& PI : PT.items) - { - if (PT.type.starts_with("sampler")) - continue; - - usz offset = atoi(PI.name.c_str() + 2); - FragmentConstantOffsetCache.push_back(offset); - } - } } void VKFragmentProgram::Compile() diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index c51b81b8fc..50469aeca4 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -62,7 +62,7 @@ public: VkShaderModule handle = nullptr; u32 id; vk::glsl::shader shader; - std::vector FragmentConstantOffsetCache; + std::vector constant_offsets; std::array output_color_masks{ {} }; std::vector uniforms; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index c74dbff549..63c3d578e2 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -514,7 +514,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer"); m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer"); m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE); - m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer"); + m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer"); m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); @@ -552,11 +552,12 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) // Initialize optional allocation information with placeholders m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, 0, VK_WHOLE_SIZE }; - m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, 16 }; + m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, VK_WHOLE_SIZE }; m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, 0, 16 }; m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, 0, 16 }; m_raster_env_buffer_info = { m_raster_env_ring_info.heap->value, 0, 128 }; m_vertex_layout_stream_info = { m_vertex_layout_ring_info.heap->value, 0, VK_WHOLE_SIZE }; + m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, 0, VK_WHOLE_SIZE }; const auto& limits = m_device->gpu().get_limits(); m_texbuffer_view_size = std::min(limits.maxTexelBufferElements, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000u); @@ -2007,18 +2008,13 @@ void VKGSRender::load_program_env() // Fragment constants if (fragment_constants_size) { - auto mem = m_fragment_constants_ring_info.alloc<256>(fragment_constants_size); - auto buf = m_fragment_constants_ring_info.map(mem, fragment_constants_size); + m_fragment_constants_dynamic_offset = m_fragment_constants_ring_info.alloc<16>(fragment_constants_size); + auto buf = m_fragment_constants_ring_info.map(m_fragment_constants_dynamic_offset, fragment_constants_size); m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), fragment_constants_size }, *ensure(m_fragment_prog), current_fragment_program, true); m_fragment_constants_ring_info.unmap(); - m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, mem, fragment_constants_size }; - } - else - { - m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, 0, 32 }; } } @@ -2194,6 +2190,7 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ u32 xform_constants_offset; u32 vs_context_offset; u32 vs_attrib_layout_offset; + u32 fs_constants_offset; }; struct rsx_prog_vertex_layout_entry_t @@ -2206,15 +2203,17 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ }; // Actual allocation must have been done previously - const u32 constant_id_offset = static_cast(m_xform_constants_dynamic_offset) / 16u; + const u32 vs_constant_id_offset = static_cast(m_xform_constants_dynamic_offset) / 16u; const u32 vertex_context_offset = static_cast(m_vertex_env_dynamic_offset) / 128u; const u32 vertex_layout_offset = static_cast(m_vertex_layout_dynamic_offset) / 144u; + const u32 fs_constant_id_offset = static_cast(m_fragment_constants_dynamic_offset) / 16u; // Pack rsx_prog_push_constants_block_t push_constants; - push_constants.xform_constants_offset = constant_id_offset; + push_constants.xform_constants_offset = vs_constant_id_offset; push_constants.vs_context_offset = vertex_context_offset; push_constants.vs_attrib_layout_offset = vertex_layout_offset + id; + push_constants.fs_constants_offset = fs_constant_id_offset; vkCmdPushConstants( *m_current_command_buffer, diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 83519acfbd..77f3e1e838 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -153,6 +153,7 @@ private: u64 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants. u64 m_vertex_env_dynamic_offset = 0; u64 m_vertex_layout_dynamic_offset = 0; + u64 m_fragment_constants_dynamic_offset = 0; std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage