rsx/vk: Implement bulk aligned allocator

- Avoids wasting space and allows use of natural arrays in shaders
2026-05-07 13:37:46 +00:00 · 2026-04-29 00:04:26 +03:00 · 2026-04-29 00:04:26 +03:00 · 3b1abec405
commit 3b1abec405
parent 8b02f46e67
3 changed files with 195 additions and 155 deletions
--- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
+++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
@ -3,167 +3,200 @@
 #include "Utilities/StrFmt.h"
 #include "util/asm.hpp"

-/**
- * Ring buffer memory helper :
- * There are 2 "pointers" (offset inside a memory buffer to be provided by class derivative)
- * PUT pointer "points" to the start of allocatable space.
- * GET pointer "points" to the start of memory in use by the GPU.
- * Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
- * User has to update the GET pointer when synchronisation happens.
- */
-class data_heap
+namespace rsx
 {
-protected:
 	/**
-	* Internal implementation of allocation test
-	* Does alloc cross get position?
-	*/
-	bool can_alloc_impl(usz aligned_put_pos, usz aligned_alloc_size) const
-	{
-		const usz alloc_end = aligned_put_pos + aligned_alloc_size;
-		if (alloc_end < m_size) [[ likely ]]
-		{
-			// Range before get
-			if (alloc_end < m_get_pos)
-				return true;
-
-			// Range after get
-			if (aligned_put_pos > m_get_pos)
-				return true;
-
-			return false;
-		}
-
-		// ..]....[..get..
-		if (aligned_put_pos < m_get_pos)
-			return false;
-
-		// ..get..]...[...
-		// Actually all resources extending beyond heap space starts at 0
-		if (aligned_alloc_size > m_get_pos)
-			return false;
-
-		return true;
-	}
-
-	/**
-	* Does alloc cross get position?
-	*/
-	template<int Alignment>
-	bool can_alloc(usz size) const
-	{
-		const usz alloc_size = utils::align(size, Alignment);
-		const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
-		return can_alloc_impl(aligned_put_pos, alloc_size);
-	}
-
-	// Grow the buffer to hold at least size bytes
-	virtual bool grow(usz /*size*/)
-	{
-		// Stub
-		return false;
-	}
-
-	usz m_size;
-	usz m_put_pos;                 // Start of free space
-	usz m_get_pos;                 // End of free space
-	usz m_min_guard_size;          // If an allocation touches the guard region, reset the heap to avoid going over budget
-
-	char* m_name;
-public:
-	data_heap() = default;
-	~data_heap() = default;
-	data_heap(const data_heap&) = delete;
-	data_heap(data_heap&&) = delete;
-
-	void init(usz heap_size, const char* buffer_name = nullptr, usz min_guard_size=0x10000)
-	{
-		m_name = const_cast<char*>(buffer_name ? buffer_name : "<unnamed>");
-
-		m_size = heap_size;
-		m_put_pos = 0;
-		m_get_pos = heap_size - 1;
-
-		// Allocation stats
-		m_min_guard_size = min_guard_size;
-	}
-
-	template<int Alignment>
-	usz alloc(usz size)
-	{
-		const usz alloc_size = utils::align(size, Alignment);
-		const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
-
-		if (!can_alloc<Alignment>(size) && !grow(alloc_size))
-		{
-			fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
-					m_name, m_size, size, m_min_guard_size);
-		}
-
-		const usz alloc_end = aligned_put_pos + alloc_size;
-		if (alloc_end < m_size)
-		{
-			m_put_pos = alloc_end;
-			return aligned_put_pos;
-		}
-
-		m_put_pos = alloc_size;
-		return 0;
-	}
-
-	/*
-	 * For use in cases where we take a fixed amount each time
+	 * Ring buffer memory helper :
+	 * There are 2 "pointers" (offset inside a memory buffer to be provided by class derivative)
+	 * PUT pointer "points" to the start of allocatable space.
+	 * GET pointer "points" to the start of memory in use by the GPU.
+	 * Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
+	 * User has to update the GET pointer when synchronisation happens.
 	 */
-	template<int Alignment, usz Size = Alignment>
-	usz static_alloc()
+	class data_heap
 	{
-		static_assert((Size & (Alignment - 1)) == 0);
-		ensure((m_put_pos & (Alignment - 1)) == 0);
-
-		if (!can_alloc_impl(m_put_pos, Size) && !grow(Size))
+	protected:
+		/**
+		* Internal implementation of allocation test
+		* Does alloc cross get position?
+		*/
+		bool can_alloc_impl(usz aligned_put_pos, usz aligned_alloc_size) const
 		{
-			fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
+			const usz alloc_end = aligned_put_pos + aligned_alloc_size;
+			if (alloc_end < m_size) [[ likely ]]
+			{
+				// Range before get
+				if (alloc_end < m_get_pos)
+					return true;
+
+				// Range after get
+				if (aligned_put_pos > m_get_pos)
+					return true;
+
+				return false;
+			}
+
+			// ..]....[..get..
+			if (aligned_put_pos < m_get_pos)
+				return false;
+
+			// ..get..]...[...
+			// Actually all resources extending beyond heap space starts at 0
+			if (aligned_alloc_size > m_get_pos)
+				return false;
+
+			return true;
+		}
+
+		/**
+		* Does alloc cross get position?
+		*/
+		template<int Alignment>
+		bool can_alloc(usz size) const
+		{
+			const usz alloc_size = utils::align(size, Alignment);
+			const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
+			return can_alloc_impl(aligned_put_pos, alloc_size);
+		}
+
+		// Grow the buffer to hold at least size bytes
+		virtual bool grow(usz /*size*/)
+		{
+			// Stub
+			return false;
+		}
+
+		usz m_size;
+		usz m_put_pos;                 // Start of free space
+		usz m_get_pos;                 // End of free space
+		usz m_min_guard_size;          // If an allocation touches the guard region, reset the heap to avoid going over budget
+
+		char* m_name;
+	public:
+		data_heap() = default;
+		~data_heap() = default;
+		data_heap(const data_heap&) = delete;
+		data_heap(data_heap&&) = delete;
+
+		void init(usz heap_size, const char* buffer_name = nullptr, usz min_guard_size = 0x10000)
+		{
+			m_name = const_cast<char*>(buffer_name ? buffer_name : "<unnamed>");
+
+			m_size = heap_size;
+			m_put_pos = 0;
+			m_get_pos = heap_size - 1;
+
+			// Allocation stats
+			m_min_guard_size = min_guard_size;
+		}
+
+		template<int Alignment>
+		usz alloc(usz size)
+		{
+			const usz alloc_size = utils::align(size, Alignment);
+			const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
+
+			if (!can_alloc<Alignment>(size) && !grow(alloc_size))
+			{
+				fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
+					m_name, m_size, size, m_min_guard_size);
+			}
+
+			const usz alloc_end = aligned_put_pos + alloc_size;
+			if (alloc_end < m_size)
+			{
+				m_put_pos = alloc_end;
+				return aligned_put_pos;
+			}
+
+			m_put_pos = alloc_size;
+			return 0;
+		}
+
+		/*
+		 * For use in cases where we take a fixed amount each time
+		 */
+		template<int Alignment, usz Size = Alignment>
+		usz static_alloc()
+		{
+			static_assert((Size & (Alignment - 1)) == 0);
+			ensure((m_put_pos & (Alignment - 1)) == 0);
+
+			if (!can_alloc_impl(m_put_pos, Size) && !grow(Size))
+			{
+				fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
 					m_name, m_size, Size, m_min_guard_size);
+			}
+
+			const usz alloc_end = m_put_pos + Size;
+			if (alloc_end < m_size)
+			{
+				const auto ret_pos = m_put_pos;
+				m_put_pos = alloc_end;
+				return ret_pos;
+			}
+
+			m_put_pos = Size;
+			return 0;
 		}

-		const usz alloc_end = m_put_pos + Size;
-		if (alloc_end < m_size)
+		/**
+		* return current putpos - 1
+		*/
+		usz get_current_put_pos_minus_one() const
 		{
-			const auto ret_pos = m_put_pos;
-			m_put_pos = alloc_end;
-			return ret_pos;
+			return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
 		}

-		m_put_pos = Size;
-		return 0;
-	}
+		inline void set_get_pos(usz value)
+		{
+			m_get_pos = value;
+		}

-	/**
-	* return current putpos - 1
-	*/
-	usz get_current_put_pos_minus_one() const
-	{
-		return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
-	}
+		void reset_allocation_stats()
+		{
+			m_get_pos = get_current_put_pos_minus_one();
+		}

-	inline void set_get_pos(usz value)
-	{
-		m_get_pos = value;
-	}
+		// Updates the current_allocated_size metrics
+		inline void notify()
+		{
+			// @unused
+		}

-	void reset_allocation_stats()
-	{
-		m_get_pos = get_current_put_pos_minus_one();
-	}
+		usz size() const
+		{
+			return m_size;
+		}

-	// Updates the current_allocated_size metrics
-	inline void notify()
-	{
-		// @unused
-	}
+		// Bulk static allocator. Allows to allocate one large block and subdivide
+		// [ 0, 1, 2, 3 ] <pad> [ 4, 5, 6, 7 ] ...
+		template <usz Alignment, usz ElementSize = Alignment>
+		struct bulk_allocator
+		{
+			bulk_allocator(data_heap& container, u32 batch_size = 1)
+				: m_container(container)
+				, m_batch_size(batch_size)
+			{}

-	usz size() const
-	{
-		return m_size;
-	}
-};
+			usz alloc()
+			{
+				if (!m_capacity)
+				{
+					m_address = m_container.alloc<Alignment>(ElementSize * m_batch_size);
+					m_capacity = m_batch_size;
+				}
+
+				m_capacity--;
+				return std::exchange(m_address, m_address + ElementSize);
+			}
+
+		private:
+			data_heap& m_container;
+			usz m_address = 0;
+
+			u32 m_capacity = 0;
+			u32 m_batch_size = 1;
+		};
+	};
+}
--- a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp
+++ b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp
@ -15,7 +15,7 @@ namespace vk

 	void data_heap::create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard, VkBool32 notify)
 	{
-		::data_heap::init(size, name, guard);
+		rsx::data_heap::init(size, name, guard);

 		const auto& memory_map = g_render_device->get_memory_mapping();

@ -135,7 +135,7 @@ namespace vk
 		auto memory_index = m_prefer_writethrough ? memory_map.device_bar : memory_map.host_visible_coherent;

 		// Update heap information and reset the allocator
-		::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
+		rsx::data_heap::init(aligned_new_size, m_name, m_min_guard_size);

 		// Discard old heap and create a new one. Old heap will be garbage collected when no longer needed
 		auto gc = get_resource_manager();
@ -188,7 +188,7 @@ namespace vk
 		return after_usage < limit;
 	}

-	void* data_heap::map(usz offset, usz size)
+	void* data_heap::map_impl(usz offset, usz size)
 	{
 		if (!_ptr)
 		{
--- a/rpcs3/Emu/RSX/VK/vkutils/data_heap.h
+++ b/rpcs3/Emu/RSX/VK/vkutils/data_heap.h
@ -20,7 +20,7 @@ namespace vk
 		heap_pool_force_vram_shadow  = (1 << 2),
 	};

-	class data_heap : public ::data_heap
+	class data_heap : public rsx::data_heap
 	{
 	private:
 		usz initial_size = 0;
@ -41,6 +41,8 @@ namespace vk
 		bool grow(usz size) override;
 		bool can_allocate_heap(const vk::memory_type_info& target_heap, usz size, int max_usage_percent);

+		void* map_impl(usz offset, usz size);
+
 	public:
 		std::unique_ptr<buffer> heap;

@ -51,9 +53,16 @@ namespace vk
 		void create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard = 0x10000, VkBool32 notify = VK_FALSE);
 		void destroy();

-		void* map(usz offset, usz size);
+		template <typename T = void>
+		T* map(usz offset, usz size)
+		{
+			return reinterpret_cast<T*>(map_impl(offset, size));
+		}
+
 		void unmap(bool force = false);

+		void sync(const vk::command_buffer& cmd);
+
 		template<int Alignment, typename T = char>
 			requires std::is_trivially_destructible_v<T>
 		std::pair<usz, T*> alloc_and_map(usz count)
@ -63,8 +72,6 @@ namespace vk
 			return { addr, reinterpret_cast<T*>(map(addr, size_bytes)) };
 		}

-		void sync(const vk::command_buffer& cmd);
-
 		template <usz Alignment>
 		VkDescriptorBufferInfoEx window(usz offset, usz range, u64 window_size) const
 		{