vk: Use UBOs for critical per-fragment data

This commit is contained in:
kd-11 2026-04-27 13:00:48 +03:00 committed by kd-11
parent 9c725935dc
commit 5839ad9c70
3 changed files with 49 additions and 32 deletions

View file

@ -97,7 +97,11 @@ void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS)
{
prepareBindingTable();
std::vector<const char*> required_extensions;
std::vector<const char*> required_extensions =
{
"GL_EXT_scalar_block_layout",
"GL_EXT_uniform_buffer_unsized_array"
};
if (device_props.has_native_half_support)
{
@ -251,7 +255,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
if (!properties.constant_offsets.empty())
{
OS << "layout(std430, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") readonly buffer FragmentConstantsBuffer\n";
OS << "layout(std430, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n";
OS << "{\n";
OS << " vec4 fc[];\n";
OS << "};\n";
@ -259,12 +263,12 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
}
OS <<
"layout(std430, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") readonly buffer FragmentStateBuffer\n"
"layout(std430, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n"
"{\n"
" fragment_context_t fs_contexts[];\n"
"};\n\n";
OS << "layout(std430, set=1, binding=" << vk_prog->binding_table.tex_param_location << ") readonly buffer TextureParametersBuffer\n";
OS << "layout(std430, set=1, binding=" << vk_prog->binding_table.tex_param_location << ") uniform TextureParametersBuffer\n";
OS << "{\n";
OS << " sampler_info texture_parameters[];\n";
OS << "};\n\n";
@ -284,18 +288,18 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
{
in.location = vk_prog->binding_table.cbuf_location;
in.name = "FragmentConstantsBuffer";
in.type = vk::glsl::input_type_storage_buffer,
in.type = vk::glsl::input_type_uniform_buffer,
inputs.push_back(in);
}
in.location = vk_prog->binding_table.context_buffer_location;
in.name = "FragmentStateBuffer";
in.type = vk::glsl::input_type_storage_buffer;
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
in.location = vk_prog->binding_table.tex_param_location;
in.name = "TextureParametersBuffer";
in.type = vk::glsl::input_type_storage_buffer;
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
in.location = vk_prog->binding_table.polygon_stipple_params_location;

View file

@ -508,11 +508,11 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
// VRAM allocation
// This first set is bound persistently, so grow notifications are enabled.
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_default, "attrib buffer", 0x400000, VK_TRUE);
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment env buffer", 0x10000, VK_TRUE);
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment env buffer", 0x10000, VK_TRUE);
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "vertex env buffer", 0x10000, VK_TRUE);
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment texture params buffer", 0x10000, VK_TRUE);
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment texture params buffer", 0x10000, VK_TRUE);
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "vertex layout buffer", 0x10000, VK_TRUE);
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment constants buffer", 0x10000, VK_TRUE);
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment constants buffer", 0x10000, VK_TRUE);
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, vk::heap_pool_default, "transform constants buffer", 0x10000, VK_TRUE);
m_raster_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "raster env buffer", 0x10000, VK_TRUE);
// Below here, we do not bind these persistently. Each draw call specifies the range manually so we do not need heap_grow notifications.
@ -1934,6 +1934,7 @@ void VKGSRender::load_program_env()
}
const auto& ctx = REGS(m_ctx);
const auto& gpu_limits = m_device->gpu().get_limits();
const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
const bool is_interpreter = m_shader_interpreter.is_interpreter(m_program);
@ -2022,6 +2023,9 @@ void VKGSRender::load_program_env()
*ensure(m_fragment_prog), current_fragment_program, true);
m_fragment_constants_ring_info.unmap();
m_fragment_constants_buffer_info = m_fragment_constants_ring_info.window<16>(m_fragment_constants_dynamic_offset, fragment_constants_size, gpu_limits.maxUniformBufferRange);
m_fragment_constants_dynamic_offset -= m_fragment_constants_buffer_info.offset;
}
}
@ -2032,15 +2036,21 @@ void VKGSRender::load_program_env()
m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program);
m_fragment_env_ring_info.unmap();
m_fragment_env_buffer_info = m_fragment_env_ring_info.window<32>(m_fragment_env_dynamic_offset, 32, gpu_limits.maxUniformBufferRange);
m_fragment_env_dynamic_offset -= m_fragment_env_buffer_info.offset;
}
if (update_fragment_texture_env)
{
m_texture_parameters_dynamic_offset = m_fragment_texture_params_ring_info.static_alloc<16, 768>();
m_texture_parameters_dynamic_offset = m_fragment_texture_params_ring_info.static_alloc<256, 768>();
auto buf = m_fragment_texture_params_ring_info.map(m_texture_parameters_dynamic_offset, 768);
current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask);
m_fragment_texture_params_ring_info.unmap();
m_fragment_texture_params_buffer_info = m_fragment_texture_params_ring_info.window<768>(m_texture_parameters_dynamic_offset, 768, gpu_limits.maxUniformBufferRange);
m_texture_parameters_dynamic_offset -= m_fragment_texture_params_buffer_info.offset;
}
if (update_raster_env)

View file

@ -703,26 +703,15 @@ namespace vk
device.ppEnabledExtensionNames = requested_extensions.data();
device.pEnabledFeatures = &enabled_features;
VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};
if (pgpu->shader_types_support.allow_float16)
{
// Allow use of f16 type in shaders if possible
shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
shader_support_info.shaderFloat16 = VK_TRUE;
shader_support_info.pNext = const_cast<void*>(device.pNext);
device.pNext = &shader_support_info;
VkPhysicalDeviceVulkan12Features vulkan12_features{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES };
vulkan12_features.runtimeDescriptorArray = VK_TRUE;
vulkan12_features.uniformBufferStandardLayout = VK_TRUE;
vulkan12_features.pNext = const_cast<void*>(device.pNext);
device.pNext = &vulkan12_features;
rsx_log.notice("GPU/driver supports float16 data types natively. Using native float16_t variables if possible.");
}
else
{
rsx_log.notice("GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t.");
}
VkPhysicalDeviceDescriptorIndexingFeatures indexing_features{};
if (pgpu->descriptor_indexing_support)
{
#define SET_DESCRIPTOR_BITFLAG(field, bit) if (pgpu->descriptor_indexing_support.update_after_bind_mask & (1ull << bit)) indexing_features.field = VK_TRUE
#define SET_DESCRIPTOR_BITFLAG(field, bit) if (pgpu->descriptor_indexing_support.update_after_bind_mask & (1ull << bit)) vulkan12_features.field = VK_TRUE
SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
@ -731,12 +720,26 @@ namespace vk
SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
#undef SET_DESCRIPTOR_BITFLAG
indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
indexing_features.pNext = const_cast<void*>(device.pNext);
device.pNext = &indexing_features;
}
if (pgpu->shader_types_support.allow_float16)
{
// Allow use of f16 type in shaders if possible
vulkan12_features.shaderFloat16 = VK_TRUE;
rsx_log.notice("GPU/driver supports float16 data types natively. Using native float16_t variables if possible.");
}
else
{
rsx_log.notice("GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t.");
}
// FIXME: Fall back to something. Idk how that would even work though, this really is a hard requirement
VkPhysicalDeviceShaderUniformBufferUnsizedArrayFeaturesEXT ubo_unsized_array_feature{};
ubo_unsized_array_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_UNIFORM_BUFFER_UNSIZED_ARRAY_FEATURES_EXT;
ubo_unsized_array_feature.shaderUniformBufferUnsizedArray = VK_TRUE;
ubo_unsized_array_feature.pNext = const_cast<void*>(device.pNext);
device.pNext = &ubo_unsized_array_feature;
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_color_features{};
if (pgpu->custom_border_color_support)
{