From 3b1abec40593b1f0bceb69b0413cc232af71149a Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 29 Apr 2026 00:04:26 +0300 Subject: [PATCH] rsx/vk: Implement bulk aligned allocator - Avoids wasting space and allows use of natural arrays in shaders --- rpcs3/Emu/RSX/Common/ring_buffer_helper.h | 329 ++++++++++++---------- rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp | 6 +- rpcs3/Emu/RSX/VK/vkutils/data_heap.h | 15 +- 3 files changed, 195 insertions(+), 155 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h index 2aebd546c3..94b8da6e8f 100644 --- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h +++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h @@ -3,167 +3,200 @@ #include "Utilities/StrFmt.h" #include "util/asm.hpp" -/** - * Ring buffer memory helper : - * There are 2 "pointers" (offset inside a memory buffer to be provided by class derivative) - * PUT pointer "points" to the start of allocatable space. - * GET pointer "points" to the start of memory in use by the GPU. - * Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten. - * User has to update the GET pointer when synchronisation happens. - */ -class data_heap +namespace rsx { -protected: /** - * Internal implementation of allocation test - * Does alloc cross get position? - */ - bool can_alloc_impl(usz aligned_put_pos, usz aligned_alloc_size) const - { - const usz alloc_end = aligned_put_pos + aligned_alloc_size; - if (alloc_end < m_size) [[ likely ]] - { - // Range before get - if (alloc_end < m_get_pos) - return true; - - // Range after get - if (aligned_put_pos > m_get_pos) - return true; - - return false; - } - - // ..]....[..get.. - if (aligned_put_pos < m_get_pos) - return false; - - // ..get..]...[... - // Actually all resources extending beyond heap space starts at 0 - if (aligned_alloc_size > m_get_pos) - return false; - - return true; - } - - /** - * Does alloc cross get position? - */ - template - bool can_alloc(usz size) const - { - const usz alloc_size = utils::align(size, Alignment); - const usz aligned_put_pos = utils::align(m_put_pos, Alignment); - return can_alloc_impl(aligned_put_pos, alloc_size); - } - - // Grow the buffer to hold at least size bytes - virtual bool grow(usz /*size*/) - { - // Stub - return false; - } - - usz m_size; - usz m_put_pos; // Start of free space - usz m_get_pos; // End of free space - usz m_min_guard_size; // If an allocation touches the guard region, reset the heap to avoid going over budget - - char* m_name; -public: - data_heap() = default; - ~data_heap() = default; - data_heap(const data_heap&) = delete; - data_heap(data_heap&&) = delete; - - void init(usz heap_size, const char* buffer_name = nullptr, usz min_guard_size=0x10000) - { - m_name = const_cast(buffer_name ? buffer_name : ""); - - m_size = heap_size; - m_put_pos = 0; - m_get_pos = heap_size - 1; - - // Allocation stats - m_min_guard_size = min_guard_size; - } - - template - usz alloc(usz size) - { - const usz alloc_size = utils::align(size, Alignment); - const usz aligned_put_pos = utils::align(m_put_pos, Alignment); - - if (!can_alloc(size) && !grow(alloc_size)) - { - fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d", - m_name, m_size, size, m_min_guard_size); - } - - const usz alloc_end = aligned_put_pos + alloc_size; - if (alloc_end < m_size) - { - m_put_pos = alloc_end; - return aligned_put_pos; - } - - m_put_pos = alloc_size; - return 0; - } - - /* - * For use in cases where we take a fixed amount each time + * Ring buffer memory helper : + * There are 2 "pointers" (offset inside a memory buffer to be provided by class derivative) + * PUT pointer "points" to the start of allocatable space. + * GET pointer "points" to the start of memory in use by the GPU. + * Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten. + * User has to update the GET pointer when synchronisation happens. */ - template - usz static_alloc() + class data_heap { - static_assert((Size & (Alignment - 1)) == 0); - ensure((m_put_pos & (Alignment - 1)) == 0); - - if (!can_alloc_impl(m_put_pos, Size) && !grow(Size)) + protected: + /** + * Internal implementation of allocation test + * Does alloc cross get position? + */ + bool can_alloc_impl(usz aligned_put_pos, usz aligned_alloc_size) const { - fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d", + const usz alloc_end = aligned_put_pos + aligned_alloc_size; + if (alloc_end < m_size) [[ likely ]] + { + // Range before get + if (alloc_end < m_get_pos) + return true; + + // Range after get + if (aligned_put_pos > m_get_pos) + return true; + + return false; + } + + // ..]....[..get.. + if (aligned_put_pos < m_get_pos) + return false; + + // ..get..]...[... + // Actually all resources extending beyond heap space starts at 0 + if (aligned_alloc_size > m_get_pos) + return false; + + return true; + } + + /** + * Does alloc cross get position? + */ + template + bool can_alloc(usz size) const + { + const usz alloc_size = utils::align(size, Alignment); + const usz aligned_put_pos = utils::align(m_put_pos, Alignment); + return can_alloc_impl(aligned_put_pos, alloc_size); + } + + // Grow the buffer to hold at least size bytes + virtual bool grow(usz /*size*/) + { + // Stub + return false; + } + + usz m_size; + usz m_put_pos; // Start of free space + usz m_get_pos; // End of free space + usz m_min_guard_size; // If an allocation touches the guard region, reset the heap to avoid going over budget + + char* m_name; + public: + data_heap() = default; + ~data_heap() = default; + data_heap(const data_heap&) = delete; + data_heap(data_heap&&) = delete; + + void init(usz heap_size, const char* buffer_name = nullptr, usz min_guard_size = 0x10000) + { + m_name = const_cast(buffer_name ? buffer_name : ""); + + m_size = heap_size; + m_put_pos = 0; + m_get_pos = heap_size - 1; + + // Allocation stats + m_min_guard_size = min_guard_size; + } + + template + usz alloc(usz size) + { + const usz alloc_size = utils::align(size, Alignment); + const usz aligned_put_pos = utils::align(m_put_pos, Alignment); + + if (!can_alloc(size) && !grow(alloc_size)) + { + fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d", + m_name, m_size, size, m_min_guard_size); + } + + const usz alloc_end = aligned_put_pos + alloc_size; + if (alloc_end < m_size) + { + m_put_pos = alloc_end; + return aligned_put_pos; + } + + m_put_pos = alloc_size; + return 0; + } + + /* + * For use in cases where we take a fixed amount each time + */ + template + usz static_alloc() + { + static_assert((Size & (Alignment - 1)) == 0); + ensure((m_put_pos & (Alignment - 1)) == 0); + + if (!can_alloc_impl(m_put_pos, Size) && !grow(Size)) + { + fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d", m_name, m_size, Size, m_min_guard_size); + } + + const usz alloc_end = m_put_pos + Size; + if (alloc_end < m_size) + { + const auto ret_pos = m_put_pos; + m_put_pos = alloc_end; + return ret_pos; + } + + m_put_pos = Size; + return 0; } - const usz alloc_end = m_put_pos + Size; - if (alloc_end < m_size) + /** + * return current putpos - 1 + */ + usz get_current_put_pos_minus_one() const { - const auto ret_pos = m_put_pos; - m_put_pos = alloc_end; - return ret_pos; + return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1; } - m_put_pos = Size; - return 0; - } + inline void set_get_pos(usz value) + { + m_get_pos = value; + } - /** - * return current putpos - 1 - */ - usz get_current_put_pos_minus_one() const - { - return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1; - } + void reset_allocation_stats() + { + m_get_pos = get_current_put_pos_minus_one(); + } - inline void set_get_pos(usz value) - { - m_get_pos = value; - } + // Updates the current_allocated_size metrics + inline void notify() + { + // @unused + } - void reset_allocation_stats() - { - m_get_pos = get_current_put_pos_minus_one(); - } + usz size() const + { + return m_size; + } - // Updates the current_allocated_size metrics - inline void notify() - { - // @unused - } + // Bulk static allocator. Allows to allocate one large block and subdivide + // [ 0, 1, 2, 3 ] [ 4, 5, 6, 7 ] ... + template + struct bulk_allocator + { + bulk_allocator(data_heap& container, u32 batch_size = 1) + : m_container(container) + , m_batch_size(batch_size) + {} - usz size() const - { - return m_size; - } -}; + usz alloc() + { + if (!m_capacity) + { + m_address = m_container.alloc(ElementSize * m_batch_size); + m_capacity = m_batch_size; + } + + m_capacity--; + return std::exchange(m_address, m_address + ElementSize); + } + + private: + data_heap& m_container; + usz m_address = 0; + + u32 m_capacity = 0; + u32 m_batch_size = 1; + }; + }; +} diff --git a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp index 1ba47abf8b..9534b5f57c 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp @@ -15,7 +15,7 @@ namespace vk void data_heap::create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard, VkBool32 notify) { - ::data_heap::init(size, name, guard); + rsx::data_heap::init(size, name, guard); const auto& memory_map = g_render_device->get_memory_mapping(); @@ -135,7 +135,7 @@ namespace vk auto memory_index = m_prefer_writethrough ? memory_map.device_bar : memory_map.host_visible_coherent; // Update heap information and reset the allocator - ::data_heap::init(aligned_new_size, m_name, m_min_guard_size); + rsx::data_heap::init(aligned_new_size, m_name, m_min_guard_size); // Discard old heap and create a new one. Old heap will be garbage collected when no longer needed auto gc = get_resource_manager(); @@ -188,7 +188,7 @@ namespace vk return after_usage < limit; } - void* data_heap::map(usz offset, usz size) + void* data_heap::map_impl(usz offset, usz size) { if (!_ptr) { diff --git a/rpcs3/Emu/RSX/VK/vkutils/data_heap.h b/rpcs3/Emu/RSX/VK/vkutils/data_heap.h index a8aa8e5141..0e219714be 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/data_heap.h +++ b/rpcs3/Emu/RSX/VK/vkutils/data_heap.h @@ -20,7 +20,7 @@ namespace vk heap_pool_force_vram_shadow = (1 << 2), }; - class data_heap : public ::data_heap + class data_heap : public rsx::data_heap { private: usz initial_size = 0; @@ -41,6 +41,8 @@ namespace vk bool grow(usz size) override; bool can_allocate_heap(const vk::memory_type_info& target_heap, usz size, int max_usage_percent); + void* map_impl(usz offset, usz size); + public: std::unique_ptr heap; @@ -51,9 +53,16 @@ namespace vk void create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard = 0x10000, VkBool32 notify = VK_FALSE); void destroy(); - void* map(usz offset, usz size); + template + T* map(usz offset, usz size) + { + return reinterpret_cast(map_impl(offset, size)); + } + void unmap(bool force = false); + void sync(const vk::command_buffer& cmd); + template requires std::is_trivially_destructible_v std::pair alloc_and_map(usz count) @@ -63,8 +72,6 @@ namespace vk return { addr, reinterpret_cast(map(addr, size_bytes)) }; } - void sync(const vk::command_buffer& cmd); - template VkDescriptorBufferInfoEx window(usz offset, usz range, u64 window_size) const {