From ecc0fe46782d5f944b5ad9ae361a53cceb1bae47 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 20 Jul 2025 17:27:47 +0300 Subject: [PATCH] vk: Move draw-time constants to vertex layout stream and make it SSBO --- .../GLSLSnippets/RSXProg/RSXDefines2.glsl | 11 +++- .../GLSLSnippets/RSXProg/RSXVertexFetch.glsl | 7 ++- rpcs3/Emu/RSX/VK/VKDraw.cpp | 17 +------ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 50 ++++++++----------- rpcs3/Emu/RSX/VK/VKGSRender.h | 2 +- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 28 ++++++++--- 6 files changed, 61 insertions(+), 54 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl index 930ec553c7..a8900d7dc3 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl @@ -19,7 +19,16 @@ struct vertex_context_t float point_size; float z_near; float z_far; - // float reserved[3]; + float reserved[3]; +}; + +struct vertex_layout_t +{ + uint vertex_base_index; + uint vertex_index_offset; + uint draw_id; + uint reserved; + uvec2 attrib_data[16]; }; )" diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexFetch.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexFetch.glsl index 66c4a6072b..152fb01c43 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexFetch.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXVertexFetch.glsl @@ -156,7 +156,7 @@ attribute_desc fetch_desc(const in int location) #ifdef VULKAN // Fetch parameters streamed separately from draw parameters - uvec2 attrib = texelFetch(vertex_layout_stream, location + int(layout_ptr_offset)).xy; + uvec2 attrib = vertex_layouts[vs_attrib_layout_offset].attrib_data[location]; #else // Data is packed into a ubo const int block = (location >> 1); @@ -178,6 +178,11 @@ attribute_desc fetch_desc(const in int location) return result; } +#ifdef VULKAN +#define vertex_index_offset vertex_layouts[vs_attrib_layout_offset].vertex_index_offset +#define vertex_base_index vertex_layouts[vs_attrib_layout_offset].vertex_base_index +#endif + vec4 read_location(const in int location) { int vertex_id; diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 2a6d195bef..ee7448bc22 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -820,30 +820,17 @@ void VKGSRender::emit_geometry(u32 sub_index) update_descriptors = true; // Allocate stream layout memory for this batch - m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128; - m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range); - - if (vk::test_status_interrupt(vk::heap_changed)) - { - if (m_vertex_layout_storage && - m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value) - { - vk::get_resource_manager()->dispose(m_vertex_layout_storage); - } - - vk::clear_status_interrupt(vk::heap_changed); - } + const u64 alloc_size = rsx::method_registers.current_draw_clause.pass_count() * 144; + m_vertex_layout_dynamic_offset = m_vertex_layout_ring_info.alloc<16>(alloc_size); } // Update vertex fetch parameters update_vertex_env(sub_index, upload_info); - ensure(m_vertex_layout_storage); if (update_descriptors) { m_program->bind_uniform(persistent_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location); m_program->bind_uniform(volatile_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 1); - m_program->bind_uniform(m_vertex_layout_storage->value, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 2); } bool reload_state = (!m_current_draw.subdraw_id++); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 3a14c6e7ec..c74dbff549 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -513,7 +513,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer"); m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer"); m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer"); - m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE); + m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE); m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer"); m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); @@ -556,6 +556,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, 0, 16 }; m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, 0, 16 }; m_raster_env_buffer_info = { m_raster_env_ring_info.heap->value, 0, 128 }; + m_vertex_layout_stream_info = { m_vertex_layout_ring_info.heap->value, 0, VK_WHOLE_SIZE }; const auto& limits = m_device->gpu().get_limits(); m_texbuffer_view_size = std::min(limits.maxTexelBufferElements, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000u); @@ -812,7 +813,6 @@ VKGSRender::~VKGSRender() m_persistent_attribute_storage.reset(); m_volatile_attribute_storage.reset(); - m_vertex_layout_storage.reset(); // Upscaler (references some global resources) m_upscaler.reset(); @@ -2095,6 +2095,7 @@ void VKGSRender::load_program_env() } m_program->bind_uniform(m_vertex_env_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->context_buffer_location); + m_program->bind_uniform(m_vertex_layout_stream_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 2); m_program->bind_uniform(m_fragment_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->context_buffer_location); m_program->bind_uniform(m_fragment_texture_params_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->tex_param_location); m_program->bind_uniform(m_raster_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->polygon_stipple_params_location); @@ -2189,43 +2190,31 @@ void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer) void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info) { struct rsx_prog_push_constants_block_t + { + u32 xform_constants_offset; + u32 vs_context_offset; + u32 vs_attrib_layout_offset; + }; + + struct rsx_prog_vertex_layout_entry_t { u32 vertex_base_index; u32 vertex_index_offset; u32 draw_id; - u32 layout_ptr_offset; - u32 xform_constants_offset; - u32 vs_context_offset; + u32 reserved; + s32 attrib_data[1]; }; // Actual allocation must have been done previously - u32 base_offset; - const u32 offset32 = static_cast(m_vertex_layout_stream_info.offset); - const u32 range32 = static_cast(m_vertex_layout_stream_info.range); - - if (!m_vertex_layout_storage || !m_vertex_layout_storage->in_range(offset32, range32, base_offset)) - { - ensure(m_texbuffer_view_size >= m_vertex_layout_stream_info.range); - vk::get_resource_manager()->dispose(m_vertex_layout_storage); - - const usz alloc_addr = m_vertex_layout_stream_info.offset; - const usz view_size = (alloc_addr + m_texbuffer_view_size) > m_vertex_layout_ring_info.size() ? m_vertex_layout_ring_info.size() - alloc_addr : m_texbuffer_view_size; - m_vertex_layout_storage = std::make_unique(*m_device, m_vertex_layout_ring_info.heap->value, VK_FORMAT_R32G32_UINT, alloc_addr, view_size); - base_offset = 0; - } - - const u32 vertex_layout_offset = (id * 16) + (base_offset / 8); const u32 constant_id_offset = static_cast(m_xform_constants_dynamic_offset) / 16u; const u32 vertex_context_offset = static_cast(m_vertex_env_dynamic_offset) / 128u; + const u32 vertex_layout_offset = static_cast(m_vertex_layout_dynamic_offset) / 144u; // Pack rsx_prog_push_constants_block_t push_constants; - push_constants.vertex_base_index = vertex_info.vertex_index_base; - push_constants.vertex_index_offset = vertex_info.vertex_index_offset; - push_constants.draw_id = id; - push_constants.layout_ptr_offset = vertex_layout_offset; push_constants.xform_constants_offset = constant_id_offset; push_constants.vs_context_offset = vertex_context_offset; + push_constants.vs_attrib_layout_offset = vertex_layout_offset + id; vkCmdPushConstants( *m_current_command_buffer, @@ -2235,15 +2224,20 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ sizeof(push_constants), &push_constants); - const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset; - auto dst = m_vertex_layout_ring_info.map(data_offset, 128); + // Now actually fill in the data + auto buf = m_vertex_layout_ring_info.map(m_vertex_layout_dynamic_offset + (144u * id), 144); + auto dst = reinterpret_cast(buf); + dst->vertex_base_index = vertex_info.vertex_index_base; + dst->vertex_index_offset = vertex_info.vertex_index_offset; + dst->draw_id = id; + dst->reserved = 0; m_draw_processor.fill_vertex_layout_state( m_vertex_layout, current_vp_metadata, vertex_info.first_vertex, vertex_info.allocated_vertex_count, - static_cast(dst), + dst->attrib_data, vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 979b9cd4f2..83519acfbd 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -79,7 +79,6 @@ private: std::unique_ptr m_persistent_attribute_storage; std::unique_ptr m_volatile_attribute_storage; - std::unique_ptr m_vertex_layout_storage; VkDependencyInfoKHR m_async_compute_dependency_info {}; VkMemoryBarrier2KHR m_async_compute_memory_barrier {}; @@ -153,6 +152,7 @@ private: rsx::simple_array m_multidraw_parameters_buffer; u64 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants. u64 m_vertex_env_dynamic_offset = 0; + u64 m_vertex_layout_dynamic_offset = 0; std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 17ef1f2bb9..fd6e73b69e 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -118,21 +118,34 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) } OS << - "layout(push_constant) uniform VertexLayoutBuffer\n" + "layout(std430, set=0, binding=" << vk_prog->binding_table.vertex_buffers_location + 2 << ") readonly buffer VertexLayoutBuffer\n" + "{\n" + " vertex_layout_t vertex_layouts[];\n" + "};\n\n"; + + const vk::glsl::program_input layouts_input + { + .domain = glsl::glsl_vertex_program, + .type = vk::glsl::input_type_storage_buffer, + .set = vk::glsl::binding_set_index_vertex, + .location = vk_prog->binding_table.vertex_buffers_location + 2, + .name = "VertexLayoutBuffer" + }; + inputs.push_back(layouts_input); + + OS << + "layout(push_constant) uniform push_constants_block\n" "{\n" - " uint vertex_base_index;\n" - " uint vertex_index_offset;\n" - " uint draw_id;\n" - " uint layout_ptr_offset;\n" " uint xform_constants_offset;\n" " uint vs_context_offset;\n" + " uint vs_attrib_layout_offset;\n" "};\n\n"; const vk::glsl::program_input push_constants { .domain = glsl::glsl_vertex_program, .type = vk::glsl::input_type_push_constant, - .bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = 24 }, + .bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = 12 }, .set = vk::glsl::binding_set_index_vertex, .location = umax, .name = "push_constants_block" @@ -145,8 +158,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream& OS, const std::ve static const char* input_streams[] = { "persistent_input_stream", // Data stream with persistent vertex data (cacheable) - "volatile_input_stream", // Data stream with per-draw data (registers and immediate draw data) - "vertex_layout_stream" // Data stream defining vertex data layout" + "volatile_input_stream" // Data stream with per-draw data (registers and immediate draw data) }; u32 location = vk_prog->binding_table.vertex_buffers_location;