diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 4cace24ba..5b9f8a182 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -22,6 +22,9 @@ namespace vulkan { using xe::ui::vulkan::CheckResult; +// Space kept between tail and head when wrapping. +constexpr VkDeviceSize kDeadZone = 4 * 1024; + BufferCache::BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, size_t capacity) : register_file_(register_file), @@ -187,6 +190,42 @@ BufferCache::~BufferCache() { VkDeviceSize BufferCache::UploadConstantRegisters( const Shader::ConstantRegisterMap& constant_register_map) { + // Fat struct, including all registers: + // struct { + // vec4 float[512]; + // uint bool[8]; + // uint loop[32]; + // }; + size_t total_size = xe::round_up( + static_cast((512 * 4 * 4) + (32 * 4) + (8 * 4)), + uniform_buffer_alignment_); + auto offset = AllocateTransientData(uniform_buffer_alignment_, total_size); + if (offset == VK_WHOLE_SIZE) { + // OOM. + return VK_WHOLE_SIZE; + } + + // Copy over all the registers. + const auto& values = register_file_->values; + uint8_t* dest_ptr = + reinterpret_cast(transient_buffer_data_) + offset; + std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32, + (512 * 4 * 4)); + dest_ptr += 512 * 4 * 4; + std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + 8 * 4); + dest_ptr += 8 * 4; + std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32, + 32 * 4); + dest_ptr += 32 * 4; + + return offset; + +// Packed upload code. +// This is not currently supported by the shaders, but would be awesome. +// We should be able to use this for any shader that does not do dynamic +// constant indexing. +#if 0 // Allocate space in the buffer for our data. auto offset = AllocateTransientData(uniform_buffer_alignment_, constant_register_map.packed_byte_length); @@ -230,6 +269,7 @@ VkDeviceSize BufferCache::UploadConstantRegisters( } return offset; +#endif // 0 } std::pair BufferCache::UploadIndexBuffer( @@ -282,17 +322,53 @@ std::pair BufferCache::UploadVertexBuffer( return {transient_vertex_buffer_, offset}; } -VkDeviceSize BufferCache::AllocateTransientData(size_t alignment, - size_t length) { - // Try to add to end, wrapping if required. - - // Check to ensure there is space. - if (false) { - // Consume all fences. +VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize alignment, + VkDeviceSize length) { + // Try fast path (if we have space). + VkDeviceSize offset = TryAllocateTransientData(alignment, length); + if (offset != VK_WHOLE_SIZE) { + return offset; } - // Slice off our bit. + // Ran out of easy allocations. + // Try consuming fences before we panic. + assert_always("Reclamation not yet implemented"); + // Try again. It may still fail if we didn't get enough space back. + return TryAllocateTransientData(alignment, length); +} + +VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize alignment, + VkDeviceSize length) { + if (transient_tail_offset_ >= transient_head_offset_) { + // Tail follows head, so things are easy: + // | H----T | + if (transient_tail_offset_ + length <= transient_capacity_) { + // Allocation fits from tail to end of buffer, so grow. + // | H----**T | + VkDeviceSize offset = transient_tail_offset_; + transient_tail_offset_ += length; + return offset; + } else if (length + kDeadZone <= transient_head_offset_) { + // Can't fit at the end, but can fit if we wrap around. + // |**T H----....| + VkDeviceSize offset = 0; + transient_tail_offset_ = length; + return offset; + } + } else { + // Head follows tail, so we're reversed: + // |----T H---| + if (transient_tail_offset_ + length + kDeadZone <= transient_head_offset_) { + // Fits from tail to head. + // |----***T H---| + VkDeviceSize offset = transient_tail_offset_; + transient_tail_offset_ += length; + return offset; + } + } + + // No more space. return VK_WHOLE_SIZE; } diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index 661e30aa7..af42f23d8 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -78,8 +78,14 @@ class BufferCache { private: // Allocates a block of memory in the transient buffer. + // When memory is not available fences are checked and space is reclaimed. // Returns VK_WHOLE_SIZE if requested amount of memory is not available. - VkDeviceSize AllocateTransientData(size_t alignment, size_t length); + VkDeviceSize AllocateTransientData(VkDeviceSize alignment, + VkDeviceSize length); + // Tries to allocate a block of memory in the transient buffer. + // Returns VK_WHOLE_SIZE if requested amount of memory is not available. + VkDeviceSize TryAllocateTransientData(VkDeviceSize alignment, + VkDeviceSize length); RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; @@ -92,8 +98,10 @@ class BufferCache { VkBuffer transient_vertex_buffer_ = nullptr; VkDeviceMemory transient_buffer_memory_ = nullptr; void* transient_buffer_data_ = nullptr; + VkDeviceSize transient_head_offset_ = 0; + VkDeviceSize transient_tail_offset_ = 0; - // Required alignemnts for our various types. + // Required alignments for our various types. // All allocations must start at the appropriate alignment. VkDeviceSize uniform_buffer_alignment_ = 0; VkDeviceSize index_buffer_alignment_ = 0;