vk: Reimplement compliant async texture streaming

- Use CONCURRENT queue access instead of fighting with queue acquire/release via submit chains.
  The minor benefits of forcing EXCLUSIVE mode are buried under the huge penalty of multiple vkQueueSubmit.
  Batching submits does not help alleviate this situation. We simply must avoid interrupting execution.
This commit is contained in:
kd-11 2022-07-24 20:28:57 +03:00 committed by kd-11
parent 0ba0f9d2b9
commit d846142f0c
9 changed files with 95 additions and 61 deletions

View file

@ -7,7 +7,7 @@
namespace vk
{
u64 hash_image_properties(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags)
u64 hash_image_properties(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkSharingMode sharing_mode)
{
/**
* Key layout:
@ -17,7 +17,8 @@ namespace vk
* 40-48: Depth (Max 255)
* 48-54: Mipmaps (Max 63) <- We have some room here, it is not possible to have more than 12 mip levels on PS3 and 16 on PC is pushing it.
* 54-56: Type (Max 3)
* 56-64: Flags (Max 255) <- We have some room here, we only care about a small subset of create flags.
* 56-57: Sharing (Max 1) <- Boolean. Exclusive = 0, shared = 1
* 57-64: Flags (Max 127) <- We have some room here, we only care about a small subset of create flags.
*/
ensure(static_cast<u32>(format) < 0xFF);
return (static_cast<u64>(format) & 0xFF) |
@ -26,7 +27,8 @@ namespace vk
(static_cast<u64>(d) << 40) |
(static_cast<u64>(mipmaps) << 48) |
(static_cast<u64>(type) << 54) |
(static_cast<u64>(create_flags) << 56);
(static_cast<u64>(sharing_mode) << 56) |
(static_cast<u64>(create_flags) << 57);
}
texture_cache::cached_image_reference_t::cached_image_reference_t(texture_cache* parent, std::unique_ptr<vk::viewable_image>& previous)
@ -44,7 +46,7 @@ namespace vk
data->current_queue_family = VK_QUEUE_FAMILY_IGNORED;
// Move this object to the cached image pool
const auto key = hash_image_properties(data->format(), data->width(), data->height(), data->depth(), data->mipmaps(), data->info.imageType, data->info.flags);
const auto key = hash_image_properties(data->format(), data->width(), data->height(), data->depth(), data->mipmaps(), data->info.imageType, data->info.flags, data->info.sharingMode);
std::lock_guard lock(parent->m_cached_pool_lock);
if (!parent->m_cache_is_exiting)
@ -506,13 +508,13 @@ namespace vk
return result;
}
std::unique_ptr<vk::viewable_image> texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage)
std::unique_ptr<vk::viewable_image> texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkImageType type, VkImageCreateFlags create_flags, VkImageUsageFlags usage, VkSharingMode sharing)
{
reader_lock lock(m_cached_pool_lock);
if (!m_cached_images.empty())
{
const u64 desired_key = hash_image_properties(format, w, h, d, mipmaps, type, create_flags);
const u64 desired_key = hash_image_properties(format, w, h, d, mipmaps, type, create_flags, sharing);
lock.upgrade();
for (auto it = m_cached_images.begin(); it != m_cached_images.end(); ++it)
@ -538,7 +540,7 @@ namespace vk
const VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format);
const u16 layers = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? 6 : 1;
auto image = find_cached_image(dst_format, w, h, d, mips, image_type, image_flags, usage_flags);
auto image = find_cached_image(dst_format, w, h, d, mips, image_type, image_flags, usage_flags, VK_SHARING_MODE_EXCLUSIVE);
if (!image)
{
@ -546,7 +548,7 @@ namespace vk
image_type,
dst_format,
w, h, d, mips, layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags | VK_IMAGE_CREATE_ALLOW_NULL,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags | VK_IMAGE_CREATE_ALLOW_NULL_RPCS3,
VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format));
if (!image->value)
@ -823,7 +825,18 @@ namespace vk
if (region.exists())
{
image = dynamic_cast<vk::viewable_image*>(region.get_raw_texture());
if ((flags & texture_create_flags::do_not_reuse) || !image || region.get_image_type() != type || image->depth() != depth) // TODO
bool reusable = true;
if (flags & texture_create_flags::do_not_reuse)
{
reusable = false;
}
else if (flags & texture_create_flags::shareable)
{
reusable = (image && image->sharing_mode() == VK_SHARING_MODE_CONCURRENT);
}
if (!reusable || !image || region.get_image_type() != type || image->depth() != depth) // TODO
{
// Incompatible view/type
region.destroy();
@ -860,14 +873,20 @@ namespace vk
{
const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap;
const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format);
const VkImageCreateFlags create_flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
VkImageCreateFlags create_flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
VkSharingMode sharing_mode = (flags & texture_create_flags::shareable) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, image_type, create_flags, usage_flags))
if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, image_type, create_flags, usage_flags, sharing_mode))
{
image = found.release();
}
else
{
if (sharing_mode == VK_SHARING_MODE_CONCURRENT)
{
create_flags |= VK_IMAGE_CREATE_SHAREABLE_RPCS3;
}
image = new vk::viewable_image(*m_device,
m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type, vk_format,
@ -946,7 +965,9 @@ namespace vk
}
}
const rsx::flags32_t create_flags = g_fxo->get<AsyncTaskScheduler>().is_host_mode() ? texture_create_flags::do_not_reuse : 0;
const rsx::flags32_t create_flags = g_fxo->get<AsyncTaskScheduler>().is_host_mode()
? (texture_create_flags::shareable | texture_create_flags::do_not_reuse)
: 0;
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled,
rsx::component_order::default_, create_flags);