From 5839ad9c7057dcf3f026221426e99ded520ffc40 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 27 Apr 2026 13:00:48 +0300 Subject: [PATCH] vk: Use UBOs for critical per-fragment data --- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 18 +++++++---- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 18 ++++++++--- rpcs3/Emu/RSX/VK/vkutils/device.cpp | 45 ++++++++++++++------------ 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 02709d9b5c..853f9f8683 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -97,7 +97,11 @@ void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) { prepareBindingTable(); - std::vector required_extensions; + std::vector required_extensions = + { + "GL_EXT_scalar_block_layout", + "GL_EXT_uniform_buffer_unsized_array" + }; if (device_props.has_native_half_support) { @@ -251,7 +255,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) if (!properties.constant_offsets.empty()) { - OS << "layout(std430, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") readonly buffer FragmentConstantsBuffer\n"; + OS << "layout(std430, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n"; OS << "{\n"; OS << " vec4 fc[];\n"; OS << "};\n"; @@ -259,12 +263,12 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) } OS << - "layout(std430, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") readonly buffer FragmentStateBuffer\n" + "layout(std430, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n" "{\n" " fragment_context_t fs_contexts[];\n" "};\n\n"; - OS << "layout(std430, set=1, binding=" << vk_prog->binding_table.tex_param_location << ") readonly buffer TextureParametersBuffer\n"; + OS << "layout(std430, set=1, binding=" << vk_prog->binding_table.tex_param_location << ") uniform TextureParametersBuffer\n"; OS << "{\n"; OS << " sampler_info texture_parameters[];\n"; OS << "};\n\n"; @@ -284,18 +288,18 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) { in.location = vk_prog->binding_table.cbuf_location; in.name = "FragmentConstantsBuffer"; - in.type = vk::glsl::input_type_storage_buffer, + in.type = vk::glsl::input_type_uniform_buffer, inputs.push_back(in); } in.location = vk_prog->binding_table.context_buffer_location; in.name = "FragmentStateBuffer"; - in.type = vk::glsl::input_type_storage_buffer; + in.type = vk::glsl::input_type_uniform_buffer; inputs.push_back(in); in.location = vk_prog->binding_table.tex_param_location; in.name = "TextureParametersBuffer"; - in.type = vk::glsl::input_type_storage_buffer; + in.type = vk::glsl::input_type_uniform_buffer; inputs.push_back(in); in.location = vk_prog->binding_table.polygon_stipple_params_location; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 885bc7cea2..1fd277effb 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -508,11 +508,11 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) // VRAM allocation // This first set is bound persistently, so grow notifications are enabled. m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_default, "attrib buffer", 0x400000, VK_TRUE); - m_fragment_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment env buffer", 0x10000, VK_TRUE); + m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment env buffer", 0x10000, VK_TRUE); m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "vertex env buffer", 0x10000, VK_TRUE); - m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment texture params buffer", 0x10000, VK_TRUE); + m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment texture params buffer", 0x10000, VK_TRUE); m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "vertex layout buffer", 0x10000, VK_TRUE); - m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment constants buffer", 0x10000, VK_TRUE); + m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment constants buffer", 0x10000, VK_TRUE); m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, vk::heap_pool_default, "transform constants buffer", 0x10000, VK_TRUE); m_raster_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "raster env buffer", 0x10000, VK_TRUE); // Below here, we do not bind these persistently. Each draw call specifies the range manually so we do not need heap_grow notifications. @@ -1934,6 +1934,7 @@ void VKGSRender::load_program_env() } const auto& ctx = REGS(m_ctx); + const auto& gpu_limits = m_device->gpu().get_limits(); const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length; const bool is_interpreter = m_shader_interpreter.is_interpreter(m_program); @@ -2022,6 +2023,9 @@ void VKGSRender::load_program_env() *ensure(m_fragment_prog), current_fragment_program, true); m_fragment_constants_ring_info.unmap(); + + m_fragment_constants_buffer_info = m_fragment_constants_ring_info.window<16>(m_fragment_constants_dynamic_offset, fragment_constants_size, gpu_limits.maxUniformBufferRange); + m_fragment_constants_dynamic_offset -= m_fragment_constants_buffer_info.offset; } } @@ -2032,15 +2036,21 @@ void VKGSRender::load_program_env() m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program); m_fragment_env_ring_info.unmap(); + + m_fragment_env_buffer_info = m_fragment_env_ring_info.window<32>(m_fragment_env_dynamic_offset, 32, gpu_limits.maxUniformBufferRange); + m_fragment_env_dynamic_offset -= m_fragment_env_buffer_info.offset; } if (update_fragment_texture_env) { - m_texture_parameters_dynamic_offset = m_fragment_texture_params_ring_info.static_alloc<16, 768>(); + m_texture_parameters_dynamic_offset = m_fragment_texture_params_ring_info.static_alloc<256, 768>(); auto buf = m_fragment_texture_params_ring_info.map(m_texture_parameters_dynamic_offset, 768); current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask); m_fragment_texture_params_ring_info.unmap(); + + m_fragment_texture_params_buffer_info = m_fragment_texture_params_ring_info.window<768>(m_texture_parameters_dynamic_offset, 768, gpu_limits.maxUniformBufferRange); + m_texture_parameters_dynamic_offset -= m_fragment_texture_params_buffer_info.offset; } if (update_raster_env) diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index f7b7ffb19c..2c37298440 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -703,26 +703,15 @@ namespace vk device.ppEnabledExtensionNames = requested_extensions.data(); device.pEnabledFeatures = &enabled_features; - VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{}; - if (pgpu->shader_types_support.allow_float16) - { - // Allow use of f16 type in shaders if possible - shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; - shader_support_info.shaderFloat16 = VK_TRUE; - shader_support_info.pNext = const_cast(device.pNext); - device.pNext = &shader_support_info; + VkPhysicalDeviceVulkan12Features vulkan12_features{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES }; + vulkan12_features.runtimeDescriptorArray = VK_TRUE; + vulkan12_features.uniformBufferStandardLayout = VK_TRUE; + vulkan12_features.pNext = const_cast(device.pNext); + device.pNext = &vulkan12_features; - rsx_log.notice("GPU/driver supports float16 data types natively. Using native float16_t variables if possible."); - } - else - { - rsx_log.notice("GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t."); - } - - VkPhysicalDeviceDescriptorIndexingFeatures indexing_features{}; if (pgpu->descriptor_indexing_support) { -#define SET_DESCRIPTOR_BITFLAG(field, bit) if (pgpu->descriptor_indexing_support.update_after_bind_mask & (1ull << bit)) indexing_features.field = VK_TRUE +#define SET_DESCRIPTOR_BITFLAG(field, bit) if (pgpu->descriptor_indexing_support.update_after_bind_mask & (1ull << bit)) vulkan12_features.field = VK_TRUE SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE); @@ -731,12 +720,26 @@ namespace vk SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); #undef SET_DESCRIPTOR_BITFLAG - - indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; - indexing_features.pNext = const_cast(device.pNext); - device.pNext = &indexing_features; } + if (pgpu->shader_types_support.allow_float16) + { + // Allow use of f16 type in shaders if possible + vulkan12_features.shaderFloat16 = VK_TRUE; + rsx_log.notice("GPU/driver supports float16 data types natively. Using native float16_t variables if possible."); + } + else + { + rsx_log.notice("GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t."); + } + + // FIXME: Fall back to something. Idk how that would even work though, this really is a hard requirement + VkPhysicalDeviceShaderUniformBufferUnsizedArrayFeaturesEXT ubo_unsized_array_feature{}; + ubo_unsized_array_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_UNIFORM_BUFFER_UNSIZED_ARRAY_FEATURES_EXT; + ubo_unsized_array_feature.shaderUniformBufferUnsizedArray = VK_TRUE; + ubo_unsized_array_feature.pNext = const_cast(device.pNext); + device.pNext = &ubo_unsized_array_feature; + VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_color_features{}; if (pgpu->custom_border_color_support) {