vk: Use BAR memory pool for low-latency resource uploads

This commit is contained in:
kd-11 2025-08-24 20:58:55 +03:00 committed by kd-11
parent 21809731e2
commit 20a7297ac4
4 changed files with 33 additions and 20 deletions

View file

@ -509,17 +509,17 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
// VRAM allocation // VRAM allocation
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000, VK_TRUE); m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_default, "attrib buffer", 0x400000, VK_TRUE);
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer"); m_fragment_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment env buffer");
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer"); m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "vertex env buffer");
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer"); m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment texture params buffer");
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE); m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "vertex layout buffer", 0x10000, VK_TRUE);
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer"); m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "fragment constants buffer");
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "transform constants buffer");
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_default, "index buffer");
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_default, "texture upload buffer", 32 * 0x100000);
m_raster_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "raster env buffer"); m_raster_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "raster env buffer");
m_instancing_buffer_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "instancing data buffer"); m_instancing_buffer_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, vk::heap_pool_low_latency, "instancing data buffer");
vk::data_heap_manager::register_ring_buffers vk::data_heap_manager::register_ring_buffers
({ ({
@ -540,8 +540,8 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
if (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only) if (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only)
{ {
m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "vertex instructions buffer", 512 * 16); m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, vk::heap_pool_low_latency, "vertex instructions buffer", 512 * 16);
m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "fragment instructions buffer", 2048); m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, vk::heap_pool_low_latency, "fragment instructions buffer", 2048);
vk::data_heap_manager::register_ring_buffers vk::data_heap_manager::register_ring_buffers
({ ({

View file

@ -42,12 +42,12 @@ namespace vk
{ {
if (!m_vao.heap) if (!m_vao.heap)
{ {
m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, "overlays VAO", 128); m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, vk::heap_pool_default, "overlays VAO", 128);
} }
if (!m_ubo.heap && m_num_uniform_buffers > 0) if (!m_ubo.heap && m_num_uniform_buffers > 0)
{ {
m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, "overlays UBO", 128); m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, vk::heap_pool_default, "overlays UBO", 128);
} }
} }

View file

@ -13,14 +13,20 @@ namespace vk
{ {
data_heap g_upload_heap; data_heap g_upload_heap;
void data_heap::create(VkBufferUsageFlags usage, usz size, const char* name, usz guard, VkBool32 notify) void data_heap::create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard, VkBool32 notify)
{ {
::data_heap::init(size, name, guard); ::data_heap::init(size, name, guard);
const auto& memory_map = g_render_device->get_memory_mapping(); const auto& memory_map = g_render_device->get_memory_mapping();
if (flags & heap_pool_low_latency)
{
// Prefer uploading to BAR if low latency is desired.
m_prefer_writethrough = memory_map.device_bar_total_bytes > (2048ull * 0x100000);
}
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
auto memory_index = memory_map.host_visible_coherent; auto memory_index = m_prefer_writethrough ? memory_map.device_bar : memory_map.host_visible_coherent;
if (!(get_heap_compatible_buffer_types() & usage)) if (!(get_heap_compatible_buffer_types() & usage))
{ {
@ -75,7 +81,7 @@ namespace vk
const auto& memory_map = g_render_device->get_memory_mapping(); const auto& memory_map = g_render_device->get_memory_mapping();
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
auto memory_index = memory_map.host_visible_coherent; auto memory_index = m_prefer_writethrough ? memory_map.device_bar : memory_map.host_visible_coherent;
// Update heap information and reset the allocator // Update heap information and reset the allocator
::data_heap::init(aligned_new_size, m_name, m_min_guard_size); ::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
@ -163,7 +169,7 @@ namespace vk
{ {
if (!g_upload_heap.heap) if (!g_upload_heap.heap)
{ {
g_upload_heap.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 64 * 0x100000, "auxilliary upload heap", 0x100000); g_upload_heap.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 64 * 0x100000, vk::heap_pool_default, "auxilliary upload heap", 0x100000);
} }
return &g_upload_heap; return &g_upload_heap;

View file

@ -11,6 +11,12 @@
namespace vk namespace vk
{ {
enum data_heap_pool_flags
{
heap_pool_default = 0,
heap_pool_low_latency = 1,
};
class data_heap : public ::data_heap class data_heap : public ::data_heap
{ {
private: private:
@ -19,6 +25,7 @@ namespace vk
void* _ptr = nullptr; void* _ptr = nullptr;
bool notify_on_grow = false; bool notify_on_grow = false;
bool m_prefer_writethrough = false;
std::unique_ptr<buffer> shadow; std::unique_ptr<buffer> shadow;
std::vector<VkBufferCopy> dirty_ranges; std::vector<VkBufferCopy> dirty_ranges;
@ -33,7 +40,7 @@ namespace vk
// Avoid mapping/unmapping to keep these drivers from stalling // Avoid mapping/unmapping to keep these drivers from stalling
// NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either // NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either
void create(VkBufferUsageFlags usage, usz size, const char* name, usz guard = 0x10000, VkBool32 notify = VK_FALSE); void create(VkBufferUsageFlags usage, usz size, rsx::flags32_t flags, const char* name, usz guard = 0x10000, VkBool32 notify = VK_FALSE);
void destroy(); void destroy();
void* map(usz offset, usz size); void* map(usz offset, usz size);