mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-05-07 13:37:46 +00:00
rsx/vk: Implement bulk aligned allocator
- Avoids wasting space and allows use of natural arrays in shaders
This commit is contained in:
parent
8b02f46e67
commit
3b1abec405
3 changed files with 195 additions and 155 deletions
|
|
@ -3,167 +3,200 @@
|
|||
#include "Utilities/StrFmt.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
/**
|
||||
* Ring buffer memory helper :
|
||||
* There are 2 "pointers" (offset inside a memory buffer to be provided by class derivative)
|
||||
* PUT pointer "points" to the start of allocatable space.
|
||||
* GET pointer "points" to the start of memory in use by the GPU.
|
||||
* Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
|
||||
* User has to update the GET pointer when synchronisation happens.
|
||||
*/
|
||||
class data_heap
|
||||
namespace rsx
|
||||
{
|
||||
protected:
|
||||
/**
|
||||
* Internal implementation of allocation test
|
||||
* Does alloc cross get position?
|
||||
*/
|
||||
bool can_alloc_impl(usz aligned_put_pos, usz aligned_alloc_size) const
|
||||
{
|
||||
const usz alloc_end = aligned_put_pos + aligned_alloc_size;
|
||||
if (alloc_end < m_size) [[ likely ]]
|
||||
{
|
||||
// Range before get
|
||||
if (alloc_end < m_get_pos)
|
||||
return true;
|
||||
|
||||
// Range after get
|
||||
if (aligned_put_pos > m_get_pos)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// ..]....[..get..
|
||||
if (aligned_put_pos < m_get_pos)
|
||||
return false;
|
||||
|
||||
// ..get..]...[...
|
||||
// Actually all resources extending beyond heap space starts at 0
|
||||
if (aligned_alloc_size > m_get_pos)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does alloc cross get position?
|
||||
*/
|
||||
template<int Alignment>
|
||||
bool can_alloc(usz size) const
|
||||
{
|
||||
const usz alloc_size = utils::align(size, Alignment);
|
||||
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||
return can_alloc_impl(aligned_put_pos, alloc_size);
|
||||
}
|
||||
|
||||
// Grow the buffer to hold at least size bytes
|
||||
virtual bool grow(usz /*size*/)
|
||||
{
|
||||
// Stub
|
||||
return false;
|
||||
}
|
||||
|
||||
usz m_size;
|
||||
usz m_put_pos; // Start of free space
|
||||
usz m_get_pos; // End of free space
|
||||
usz m_min_guard_size; // If an allocation touches the guard region, reset the heap to avoid going over budget
|
||||
|
||||
char* m_name;
|
||||
public:
|
||||
data_heap() = default;
|
||||
~data_heap() = default;
|
||||
data_heap(const data_heap&) = delete;
|
||||
data_heap(data_heap&&) = delete;
|
||||
|
||||
void init(usz heap_size, const char* buffer_name = nullptr, usz min_guard_size=0x10000)
|
||||
{
|
||||
m_name = const_cast<char*>(buffer_name ? buffer_name : "<unnamed>");
|
||||
|
||||
m_size = heap_size;
|
||||
m_put_pos = 0;
|
||||
m_get_pos = heap_size - 1;
|
||||
|
||||
// Allocation stats
|
||||
m_min_guard_size = min_guard_size;
|
||||
}
|
||||
|
||||
template<int Alignment>
|
||||
usz alloc(usz size)
|
||||
{
|
||||
const usz alloc_size = utils::align(size, Alignment);
|
||||
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||
|
||||
if (!can_alloc<Alignment>(size) && !grow(alloc_size))
|
||||
{
|
||||
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
|
||||
m_name, m_size, size, m_min_guard_size);
|
||||
}
|
||||
|
||||
const usz alloc_end = aligned_put_pos + alloc_size;
|
||||
if (alloc_end < m_size)
|
||||
{
|
||||
m_put_pos = alloc_end;
|
||||
return aligned_put_pos;
|
||||
}
|
||||
|
||||
m_put_pos = alloc_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For use in cases where we take a fixed amount each time
|
||||
* Ring buffer memory helper :
|
||||
* There are 2 "pointers" (offset inside a memory buffer to be provided by class derivative)
|
||||
* PUT pointer "points" to the start of allocatable space.
|
||||
* GET pointer "points" to the start of memory in use by the GPU.
|
||||
* Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
|
||||
* User has to update the GET pointer when synchronisation happens.
|
||||
*/
|
||||
template<int Alignment, usz Size = Alignment>
|
||||
usz static_alloc()
|
||||
class data_heap
|
||||
{
|
||||
static_assert((Size & (Alignment - 1)) == 0);
|
||||
ensure((m_put_pos & (Alignment - 1)) == 0);
|
||||
|
||||
if (!can_alloc_impl(m_put_pos, Size) && !grow(Size))
|
||||
protected:
|
||||
/**
|
||||
* Internal implementation of allocation test
|
||||
* Does alloc cross get position?
|
||||
*/
|
||||
bool can_alloc_impl(usz aligned_put_pos, usz aligned_alloc_size) const
|
||||
{
|
||||
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
|
||||
const usz alloc_end = aligned_put_pos + aligned_alloc_size;
|
||||
if (alloc_end < m_size) [[ likely ]]
|
||||
{
|
||||
// Range before get
|
||||
if (alloc_end < m_get_pos)
|
||||
return true;
|
||||
|
||||
// Range after get
|
||||
if (aligned_put_pos > m_get_pos)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// ..]....[..get..
|
||||
if (aligned_put_pos < m_get_pos)
|
||||
return false;
|
||||
|
||||
// ..get..]...[...
|
||||
// Actually all resources extending beyond heap space starts at 0
|
||||
if (aligned_alloc_size > m_get_pos)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does alloc cross get position?
|
||||
*/
|
||||
template<int Alignment>
|
||||
bool can_alloc(usz size) const
|
||||
{
|
||||
const usz alloc_size = utils::align(size, Alignment);
|
||||
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||
return can_alloc_impl(aligned_put_pos, alloc_size);
|
||||
}
|
||||
|
||||
// Grow the buffer to hold at least size bytes
|
||||
virtual bool grow(usz /*size*/)
|
||||
{
|
||||
// Stub
|
||||
return false;
|
||||
}
|
||||
|
||||
usz m_size;
|
||||
usz m_put_pos; // Start of free space
|
||||
usz m_get_pos; // End of free space
|
||||
usz m_min_guard_size; // If an allocation touches the guard region, reset the heap to avoid going over budget
|
||||
|
||||
char* m_name;
|
||||
public:
|
||||
data_heap() = default;
|
||||
~data_heap() = default;
|
||||
data_heap(const data_heap&) = delete;
|
||||
data_heap(data_heap&&) = delete;
|
||||
|
||||
void init(usz heap_size, const char* buffer_name = nullptr, usz min_guard_size = 0x10000)
|
||||
{
|
||||
m_name = const_cast<char*>(buffer_name ? buffer_name : "<unnamed>");
|
||||
|
||||
m_size = heap_size;
|
||||
m_put_pos = 0;
|
||||
m_get_pos = heap_size - 1;
|
||||
|
||||
// Allocation stats
|
||||
m_min_guard_size = min_guard_size;
|
||||
}
|
||||
|
||||
template<int Alignment>
|
||||
usz alloc(usz size)
|
||||
{
|
||||
const usz alloc_size = utils::align(size, Alignment);
|
||||
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||
|
||||
if (!can_alloc<Alignment>(size) && !grow(alloc_size))
|
||||
{
|
||||
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
|
||||
m_name, m_size, size, m_min_guard_size);
|
||||
}
|
||||
|
||||
const usz alloc_end = aligned_put_pos + alloc_size;
|
||||
if (alloc_end < m_size)
|
||||
{
|
||||
m_put_pos = alloc_end;
|
||||
return aligned_put_pos;
|
||||
}
|
||||
|
||||
m_put_pos = alloc_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For use in cases where we take a fixed amount each time
|
||||
*/
|
||||
template<int Alignment, usz Size = Alignment>
|
||||
usz static_alloc()
|
||||
{
|
||||
static_assert((Size & (Alignment - 1)) == 0);
|
||||
ensure((m_put_pos & (Alignment - 1)) == 0);
|
||||
|
||||
if (!can_alloc_impl(m_put_pos, Size) && !grow(Size))
|
||||
{
|
||||
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
|
||||
m_name, m_size, Size, m_min_guard_size);
|
||||
}
|
||||
|
||||
const usz alloc_end = m_put_pos + Size;
|
||||
if (alloc_end < m_size)
|
||||
{
|
||||
const auto ret_pos = m_put_pos;
|
||||
m_put_pos = alloc_end;
|
||||
return ret_pos;
|
||||
}
|
||||
|
||||
m_put_pos = Size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const usz alloc_end = m_put_pos + Size;
|
||||
if (alloc_end < m_size)
|
||||
/**
|
||||
* return current putpos - 1
|
||||
*/
|
||||
usz get_current_put_pos_minus_one() const
|
||||
{
|
||||
const auto ret_pos = m_put_pos;
|
||||
m_put_pos = alloc_end;
|
||||
return ret_pos;
|
||||
return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
|
||||
}
|
||||
|
||||
m_put_pos = Size;
|
||||
return 0;
|
||||
}
|
||||
inline void set_get_pos(usz value)
|
||||
{
|
||||
m_get_pos = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* return current putpos - 1
|
||||
*/
|
||||
usz get_current_put_pos_minus_one() const
|
||||
{
|
||||
return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
|
||||
}
|
||||
void reset_allocation_stats()
|
||||
{
|
||||
m_get_pos = get_current_put_pos_minus_one();
|
||||
}
|
||||
|
||||
inline void set_get_pos(usz value)
|
||||
{
|
||||
m_get_pos = value;
|
||||
}
|
||||
// Updates the current_allocated_size metrics
|
||||
inline void notify()
|
||||
{
|
||||
// @unused
|
||||
}
|
||||
|
||||
void reset_allocation_stats()
|
||||
{
|
||||
m_get_pos = get_current_put_pos_minus_one();
|
||||
}
|
||||
usz size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
// Updates the current_allocated_size metrics
|
||||
inline void notify()
|
||||
{
|
||||
// @unused
|
||||
}
|
||||
// Bulk static allocator. Allows to allocate one large block and subdivide
|
||||
// [ 0, 1, 2, 3 ] <pad> [ 4, 5, 6, 7 ] ...
|
||||
template <usz Alignment, usz ElementSize = Alignment>
|
||||
struct bulk_allocator
|
||||
{
|
||||
bulk_allocator(data_heap& container, u32 batch_size = 1)
|
||||
: m_container(container)
|
||||
, m_batch_size(batch_size)
|
||||
{}
|
||||
|
||||
usz size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
};
|
||||
usz alloc()
|
||||
{
|
||||
if (!m_capacity)
|
||||
{
|
||||
m_address = m_container.alloc<Alignment>(ElementSize * m_batch_size);
|
||||
m_capacity = m_batch_size;
|
||||
}
|
||||
|
||||
m_capacity--;
|
||||
return std::exchange(m_address, m_address + ElementSize);
|
||||
}
|
||||
|
||||
private:
|
||||
data_heap& m_container;
|
||||
usz m_address = 0;
|
||||
|
||||
u32 m_capacity = 0;
|
||||
u32 m_batch_size = 1;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ namespace vk
|
|||
|
||||
void data_heap::create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard, VkBool32 notify)
|
||||
{
|
||||
::data_heap::init(size, name, guard);
|
||||
rsx::data_heap::init(size, name, guard);
|
||||
|
||||
const auto& memory_map = g_render_device->get_memory_mapping();
|
||||
|
||||
|
|
@ -135,7 +135,7 @@ namespace vk
|
|||
auto memory_index = m_prefer_writethrough ? memory_map.device_bar : memory_map.host_visible_coherent;
|
||||
|
||||
// Update heap information and reset the allocator
|
||||
::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
|
||||
rsx::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
|
||||
|
||||
// Discard old heap and create a new one. Old heap will be garbage collected when no longer needed
|
||||
auto gc = get_resource_manager();
|
||||
|
|
@ -188,7 +188,7 @@ namespace vk
|
|||
return after_usage < limit;
|
||||
}
|
||||
|
||||
void* data_heap::map(usz offset, usz size)
|
||||
void* data_heap::map_impl(usz offset, usz size)
|
||||
{
|
||||
if (!_ptr)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ namespace vk
|
|||
heap_pool_force_vram_shadow = (1 << 2),
|
||||
};
|
||||
|
||||
class data_heap : public ::data_heap
|
||||
class data_heap : public rsx::data_heap
|
||||
{
|
||||
private:
|
||||
usz initial_size = 0;
|
||||
|
|
@ -41,6 +41,8 @@ namespace vk
|
|||
bool grow(usz size) override;
|
||||
bool can_allocate_heap(const vk::memory_type_info& target_heap, usz size, int max_usage_percent);
|
||||
|
||||
void* map_impl(usz offset, usz size);
|
||||
|
||||
public:
|
||||
std::unique_ptr<buffer> heap;
|
||||
|
||||
|
|
@ -51,9 +53,16 @@ namespace vk
|
|||
void create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard = 0x10000, VkBool32 notify = VK_FALSE);
|
||||
void destroy();
|
||||
|
||||
void* map(usz offset, usz size);
|
||||
template <typename T = void>
|
||||
T* map(usz offset, usz size)
|
||||
{
|
||||
return reinterpret_cast<T*>(map_impl(offset, size));
|
||||
}
|
||||
|
||||
void unmap(bool force = false);
|
||||
|
||||
void sync(const vk::command_buffer& cmd);
|
||||
|
||||
template<int Alignment, typename T = char>
|
||||
requires std::is_trivially_destructible_v<T>
|
||||
std::pair<usz, T*> alloc_and_map(usz count)
|
||||
|
|
@ -63,8 +72,6 @@ namespace vk
|
|||
return { addr, reinterpret_cast<T*>(map(addr, size_bytes)) };
|
||||
}
|
||||
|
||||
void sync(const vk::command_buffer& cmd);
|
||||
|
||||
template <usz Alignment>
|
||||
VkDescriptorBufferInfoEx window(usz offset, usz range, u64 window_size) const
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue