rsx/vk: Implement bulk aligned allocator

- Avoids wasting space and allows use of natural arrays in shaders
This commit is contained in:
kd-11 2026-04-29 00:04:26 +03:00 committed by kd-11
parent 8b02f46e67
commit 3b1abec405
3 changed files with 195 additions and 155 deletions

View file

@ -3,167 +3,200 @@
#include "Utilities/StrFmt.h"
#include "util/asm.hpp"
/**
* Ring buffer memory helper :
* There are 2 "pointers" (offset inside a memory buffer to be provided by class derivative)
* PUT pointer "points" to the start of allocatable space.
* GET pointer "points" to the start of memory in use by the GPU.
* Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
* User has to update the GET pointer when synchronisation happens.
*/
class data_heap
namespace rsx
{
protected:
/**
* Internal implementation of allocation test
* Does alloc cross get position?
*/
bool can_alloc_impl(usz aligned_put_pos, usz aligned_alloc_size) const
{
const usz alloc_end = aligned_put_pos + aligned_alloc_size;
if (alloc_end < m_size) [[ likely ]]
{
// Range before get
if (alloc_end < m_get_pos)
return true;
// Range after get
if (aligned_put_pos > m_get_pos)
return true;
return false;
}
// ..]....[..get..
if (aligned_put_pos < m_get_pos)
return false;
// ..get..]...[...
// Actually all resources extending beyond heap space starts at 0
if (aligned_alloc_size > m_get_pos)
return false;
return true;
}
/**
* Does alloc cross get position?
*/
template<int Alignment>
bool can_alloc(usz size) const
{
const usz alloc_size = utils::align(size, Alignment);
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
return can_alloc_impl(aligned_put_pos, alloc_size);
}
// Grow the buffer to hold at least size bytes
virtual bool grow(usz /*size*/)
{
// Stub
return false;
}
usz m_size;
usz m_put_pos; // Start of free space
usz m_get_pos; // End of free space
usz m_min_guard_size; // If an allocation touches the guard region, reset the heap to avoid going over budget
char* m_name;
public:
data_heap() = default;
~data_heap() = default;
data_heap(const data_heap&) = delete;
data_heap(data_heap&&) = delete;
void init(usz heap_size, const char* buffer_name = nullptr, usz min_guard_size=0x10000)
{
m_name = const_cast<char*>(buffer_name ? buffer_name : "<unnamed>");
m_size = heap_size;
m_put_pos = 0;
m_get_pos = heap_size - 1;
// Allocation stats
m_min_guard_size = min_guard_size;
}
template<int Alignment>
usz alloc(usz size)
{
const usz alloc_size = utils::align(size, Alignment);
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
if (!can_alloc<Alignment>(size) && !grow(alloc_size))
{
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
m_name, m_size, size, m_min_guard_size);
}
const usz alloc_end = aligned_put_pos + alloc_size;
if (alloc_end < m_size)
{
m_put_pos = alloc_end;
return aligned_put_pos;
}
m_put_pos = alloc_size;
return 0;
}
/*
* For use in cases where we take a fixed amount each time
* Ring buffer memory helper :
* There are 2 "pointers" (offset inside a memory buffer to be provided by class derivative)
* PUT pointer "points" to the start of allocatable space.
* GET pointer "points" to the start of memory in use by the GPU.
* Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
* User has to update the GET pointer when synchronisation happens.
*/
template<int Alignment, usz Size = Alignment>
usz static_alloc()
class data_heap
{
static_assert((Size & (Alignment - 1)) == 0);
ensure((m_put_pos & (Alignment - 1)) == 0);
if (!can_alloc_impl(m_put_pos, Size) && !grow(Size))
protected:
/**
* Internal implementation of allocation test
* Does alloc cross get position?
*/
bool can_alloc_impl(usz aligned_put_pos, usz aligned_alloc_size) const
{
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
const usz alloc_end = aligned_put_pos + aligned_alloc_size;
if (alloc_end < m_size) [[ likely ]]
{
// Range before get
if (alloc_end < m_get_pos)
return true;
// Range after get
if (aligned_put_pos > m_get_pos)
return true;
return false;
}
// ..]....[..get..
if (aligned_put_pos < m_get_pos)
return false;
// ..get..]...[...
// Actually all resources extending beyond heap space starts at 0
if (aligned_alloc_size > m_get_pos)
return false;
return true;
}
/**
* Does alloc cross get position?
*/
template<int Alignment>
bool can_alloc(usz size) const
{
const usz alloc_size = utils::align(size, Alignment);
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
return can_alloc_impl(aligned_put_pos, alloc_size);
}
// Grow the buffer to hold at least size bytes
virtual bool grow(usz /*size*/)
{
// Stub
return false;
}
usz m_size;
usz m_put_pos; // Start of free space
usz m_get_pos; // End of free space
usz m_min_guard_size; // If an allocation touches the guard region, reset the heap to avoid going over budget
char* m_name;
public:
data_heap() = default;
~data_heap() = default;
data_heap(const data_heap&) = delete;
data_heap(data_heap&&) = delete;
void init(usz heap_size, const char* buffer_name = nullptr, usz min_guard_size = 0x10000)
{
m_name = const_cast<char*>(buffer_name ? buffer_name : "<unnamed>");
m_size = heap_size;
m_put_pos = 0;
m_get_pos = heap_size - 1;
// Allocation stats
m_min_guard_size = min_guard_size;
}
template<int Alignment>
usz alloc(usz size)
{
const usz alloc_size = utils::align(size, Alignment);
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
if (!can_alloc<Alignment>(size) && !grow(alloc_size))
{
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
m_name, m_size, size, m_min_guard_size);
}
const usz alloc_end = aligned_put_pos + alloc_size;
if (alloc_end < m_size)
{
m_put_pos = alloc_end;
return aligned_put_pos;
}
m_put_pos = alloc_size;
return 0;
}
/*
* For use in cases where we take a fixed amount each time
*/
template<int Alignment, usz Size = Alignment>
usz static_alloc()
{
static_assert((Size & (Alignment - 1)) == 0);
ensure((m_put_pos & (Alignment - 1)) == 0);
if (!can_alloc_impl(m_put_pos, Size) && !grow(Size))
{
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
m_name, m_size, Size, m_min_guard_size);
}
const usz alloc_end = m_put_pos + Size;
if (alloc_end < m_size)
{
const auto ret_pos = m_put_pos;
m_put_pos = alloc_end;
return ret_pos;
}
m_put_pos = Size;
return 0;
}
const usz alloc_end = m_put_pos + Size;
if (alloc_end < m_size)
/**
* return current putpos - 1
*/
usz get_current_put_pos_minus_one() const
{
const auto ret_pos = m_put_pos;
m_put_pos = alloc_end;
return ret_pos;
return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
}
m_put_pos = Size;
return 0;
}
inline void set_get_pos(usz value)
{
m_get_pos = value;
}
/**
* return current putpos - 1
*/
usz get_current_put_pos_minus_one() const
{
return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
}
void reset_allocation_stats()
{
m_get_pos = get_current_put_pos_minus_one();
}
inline void set_get_pos(usz value)
{
m_get_pos = value;
}
// Updates the current_allocated_size metrics
inline void notify()
{
// @unused
}
void reset_allocation_stats()
{
m_get_pos = get_current_put_pos_minus_one();
}
usz size() const
{
return m_size;
}
// Updates the current_allocated_size metrics
inline void notify()
{
// @unused
}
// Bulk static allocator. Allows to allocate one large block and subdivide
// [ 0, 1, 2, 3 ] <pad> [ 4, 5, 6, 7 ] ...
template <usz Alignment, usz ElementSize = Alignment>
struct bulk_allocator
{
bulk_allocator(data_heap& container, u32 batch_size = 1)
: m_container(container)
, m_batch_size(batch_size)
{}
usz size() const
{
return m_size;
}
};
usz alloc()
{
if (!m_capacity)
{
m_address = m_container.alloc<Alignment>(ElementSize * m_batch_size);
m_capacity = m_batch_size;
}
m_capacity--;
return std::exchange(m_address, m_address + ElementSize);
}
private:
data_heap& m_container;
usz m_address = 0;
u32 m_capacity = 0;
u32 m_batch_size = 1;
};
};
}

View file

@ -15,7 +15,7 @@ namespace vk
void data_heap::create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard, VkBool32 notify)
{
::data_heap::init(size, name, guard);
rsx::data_heap::init(size, name, guard);
const auto& memory_map = g_render_device->get_memory_mapping();
@ -135,7 +135,7 @@ namespace vk
auto memory_index = m_prefer_writethrough ? memory_map.device_bar : memory_map.host_visible_coherent;
// Update heap information and reset the allocator
::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
rsx::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
// Discard old heap and create a new one. Old heap will be garbage collected when no longer needed
auto gc = get_resource_manager();
@ -188,7 +188,7 @@ namespace vk
return after_usage < limit;
}
void* data_heap::map(usz offset, usz size)
void* data_heap::map_impl(usz offset, usz size)
{
if (!_ptr)
{

View file

@ -20,7 +20,7 @@ namespace vk
heap_pool_force_vram_shadow = (1 << 2),
};
class data_heap : public ::data_heap
class data_heap : public rsx::data_heap
{
private:
usz initial_size = 0;
@ -41,6 +41,8 @@ namespace vk
bool grow(usz size) override;
bool can_allocate_heap(const vk::memory_type_info& target_heap, usz size, int max_usage_percent);
void* map_impl(usz offset, usz size);
public:
std::unique_ptr<buffer> heap;
@ -51,9 +53,16 @@ namespace vk
void create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard = 0x10000, VkBool32 notify = VK_FALSE);
void destroy();
void* map(usz offset, usz size);
template <typename T = void>
T* map(usz offset, usz size)
{
return reinterpret_cast<T*>(map_impl(offset, size));
}
void unmap(bool force = false);
void sync(const vk::command_buffer& cmd);
template<int Alignment, typename T = char>
requires std::is_trivially_destructible_v<T>
std::pair<usz, T*> alloc_and_map(usz count)
@ -63,8 +72,6 @@ namespace vk
return { addr, reinterpret_cast<T*>(map(addr, size_bytes)) };
}
void sync(const vk::command_buffer& cmd);
template <usz Alignment>
VkDescriptorBufferInfoEx window(usz offset, usz range, u64 window_size) const
{