diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index a5b12473bd..d98b65c908 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -478,7 +478,7 @@ void VKGSRender::load_texture_env() // Sync any async scheduler tasks if (auto ev = async_task_scheduler.get_primary_sync_label()) { - ev->gpu_wait(*m_current_command_buffer); + ev->gpu_wait(*m_current_command_buffer, { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR }); } } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp index 746b267bd4..f24b81ae8f 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp @@ -191,9 +191,23 @@ namespace vk src->pop_layout(cmd); + VkMemoryBarrier2KHR copy_memory_barrier = { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR, + .pNext = nullptr, + .srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT_KHR, + .srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR, + .dstStageMask = VK_PIPELINE_STAGE_2_NONE_KHR, + .dstAccessMask = 0 + }; + // Create event object for this transfer and queue signal op dma_fence = std::make_unique(*m_device, sync_domain::any); - dma_fence->signal(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); + dma_fence->signal(cmd, + { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR, + .memoryBarrierCount = 1, + .pMemoryBarriers = ©_memory_barrier + }); // Set cb flag for queued dma operations cmd.set_flag(vk::command_buffer::cb_has_dma_transfer); diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index de586c8b61..7cbc47213d 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -113,6 +113,7 @@ namespace vk optional_features_support.conditional_rendering = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME); optional_features_support.external_memory_host = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME); optional_features_support.sampler_mirror_clamped = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME); + optional_features_support.synchronization_2 = device_extensions.is_supported(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); optional_features_support.unrestricted_depth_range = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); optional_features_support.debug_utils = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); @@ -486,6 +487,11 @@ namespace vk requested_extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); } + if (pgpu->optional_features_support.synchronization_2) + { + requested_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); + } + enabled_features.robustBufferAccess = VK_TRUE; enabled_features.fullDrawIndexUint32 = VK_TRUE; enabled_features.independentBlend = VK_TRUE; @@ -670,6 +676,14 @@ namespace vk device.pNext = &custom_border_color_features; } + VkPhysicalDeviceSynchronization2FeaturesKHR synchronization2_info{}; + if (pgpu->optional_features_support.synchronization_2) + { + synchronization2_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES; + synchronization2_info.pNext = const_cast(device.pNext); + device.pNext = &synchronization2_info; + } + CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error); // Initialize queues @@ -695,6 +709,12 @@ namespace vk _vkCmdInsertDebugUtilsLabelEXT = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCmdInsertDebugUtilsLabelEXT")); } + if (pgpu->optional_features_support.synchronization_2) + { + _vkCmdSetEvent2KHR = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCmdSetEvent2KHR")); + _vkCmdWaitEvents2KHR = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCmdWaitEvents2KHR")); + } + memory_map = vk::get_memory_mapping(pdev); m_formats_support = vk::get_optimal_tiling_supported_formats(pdev); m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev); diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index fcc74356b2..8c98c94dbb 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -79,6 +79,7 @@ namespace vk bool sampler_mirror_clamped = false; bool shader_stencil_export = false; bool surface_capabilities_2 = false; + bool synchronization_2 = false; bool unrestricted_depth_range = false; } optional_features_support; @@ -135,6 +136,8 @@ namespace vk PFN_vkSetDebugUtilsObjectNameEXT _vkSetDebugUtilsObjectNameEXT = nullptr; PFN_vkQueueInsertDebugUtilsLabelEXT _vkQueueInsertDebugUtilsLabelEXT = nullptr; PFN_vkCmdInsertDebugUtilsLabelEXT _vkCmdInsertDebugUtilsLabelEXT = nullptr; + PFN_vkCmdSetEvent2KHR _vkCmdSetEvent2KHR = nullptr; + PFN_vkCmdWaitEvents2KHR _vkCmdWaitEvents2KHR = nullptr; public: render_device() = default; @@ -168,6 +171,7 @@ namespace vk bool get_framebuffer_loops_support() const { return pgpu->optional_features_support.framebuffer_loops; } bool get_barycoords_support() const { return pgpu->optional_features_support.barycentric_coords; } bool get_custom_border_color_support() const { return pgpu->optional_features_support.custom_border_color; } + bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; } u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; } u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; } diff --git a/rpcs3/Emu/RSX/VK/vkutils/sync.cpp b/rpcs3/Emu/RSX/VK/vkutils/sync.cpp index e751ab7a26..f0e57d2974 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/sync.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/sync.cpp @@ -15,6 +15,106 @@ namespace vk { + // Util + namespace v1_utils + { + VkPipelineStageFlags gather_src_stages(const VkDependencyInfoKHR& dependency) + { + VkPipelineStageFlags stages = VK_PIPELINE_STAGE_NONE; + for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i) + { + stages |= dependency.pBufferMemoryBarriers[i].srcStageMask; + } + for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i) + { + stages |= dependency.pImageMemoryBarriers[i].srcStageMask; + } + for (u32 i = 0; i < dependency.memoryBarrierCount; ++i) + { + stages |= dependency.pMemoryBarriers[i].srcStageMask; + } + return stages; + } + + VkPipelineStageFlags gather_dst_stages(const VkDependencyInfoKHR& dependency) + { + VkPipelineStageFlags stages = VK_PIPELINE_STAGE_NONE; + for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i) + { + stages |= dependency.pBufferMemoryBarriers[i].dstStageMask; + } + for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i) + { + stages |= dependency.pImageMemoryBarriers[i].dstStageMask; + } + for (u32 i = 0; i < dependency.memoryBarrierCount; ++i) + { + stages |= dependency.pMemoryBarriers[i].dstStageMask; + } + return stages; + } + + auto get_memory_barriers(const VkDependencyInfoKHR& dependency) + { + std::vector result; + for (u32 i = 0; i < dependency.memoryBarrierCount; ++i) + { + result.emplace_back + ( + VK_STRUCTURE_TYPE_MEMORY_BARRIER, + nullptr, + static_cast(dependency.pMemoryBarriers[i].srcAccessMask), + static_cast(dependency.pMemoryBarriers[i].dstAccessMask) + ); + } + return result; + } + + auto get_image_memory_barriers(const VkDependencyInfoKHR& dependency) + { + std::vector result; + for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i) + { + result.emplace_back + ( + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + static_cast(dependency.pImageMemoryBarriers[i].srcAccessMask), + static_cast(dependency.pImageMemoryBarriers[i].dstAccessMask), + dependency.pImageMemoryBarriers[i].oldLayout, + dependency.pImageMemoryBarriers[i].newLayout, + dependency.pImageMemoryBarriers[i].srcQueueFamilyIndex, + dependency.pImageMemoryBarriers[i].dstQueueFamilyIndex, + dependency.pImageMemoryBarriers[i].image, + dependency.pImageMemoryBarriers[i].subresourceRange + ); + } + return result; + } + + auto get_buffer_memory_barriers(const VkDependencyInfoKHR& dependency) + { + std::vector result; + for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i) + { + result.emplace_back + ( + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + static_cast(dependency.pBufferMemoryBarriers[i].srcAccessMask), + static_cast(dependency.pBufferMemoryBarriers[i].dstAccessMask), + dependency.pBufferMemoryBarriers[i].srcQueueFamilyIndex, + dependency.pBufferMemoryBarriers[i].dstQueueFamilyIndex, + dependency.pBufferMemoryBarriers[i].buffer, + dependency.pBufferMemoryBarriers[i].offset, + dependency.pBufferMemoryBarriers[i].size + ); + } + return result; + } + } + + // Objects fence::fence(VkDevice dev) { owner = dev; @@ -75,101 +175,78 @@ namespace vk } event::event(const render_device& dev, sync_domain domain) - : m_device(dev) + : m_device(&dev), v2(dev.get_synchronization2_support()) { - const auto vendor = dev.gpu().get_driver_vendor(); - if (domain != sync_domain::gpu && - (vendor == vk::driver_vendor::AMD || vendor == vk::driver_vendor::INTEL)) + VkEventCreateInfo info { - // Work around AMD and INTEL broken event signal synchronization scope - // Will be dropped after transitioning to VK1.3 - m_buffer = std::make_unique - ( - dev, - 4, - dev.get_memory_mapping().host_visible_coherent, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - VK_BUFFER_USAGE_TRANSFER_DST_BIT, - 0, - VMM_ALLOCATION_POOL_SYSTEM - ); - - m_value = reinterpret_cast(m_buffer->map(0, 4)); - *m_value = 0xCAFEBABE; - } - else - { - VkEventCreateInfo info - { - .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, - .pNext = nullptr, - .flags = 0 - }; - vkCreateEvent(dev, &info, nullptr, &m_vk_event); - } + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + .pNext = nullptr, + .flags = 0 + }; + CHECK_RESULT(vkCreateEvent(dev, &info, nullptr, &m_vk_event)); } event::~event() { if (m_vk_event) [[likely]] { - vkDestroyEvent(m_device, m_vk_event, nullptr); - } - else - { - m_buffer->unmap(); - m_buffer.reset(); - m_value = nullptr; + vkDestroyEvent(*m_device, m_vk_event, nullptr); } } - void event::signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access) + void event::signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) { - if (m_vk_event) [[likely]] + if (v2) [[ likely ]] { - vkCmdSetEvent(cmd, m_vk_event, stages); + m_device->_vkCmdSetEvent2KHR(cmd, m_vk_event, &dependency); } else { - insert_global_memory_barrier(cmd, stages, VK_PIPELINE_STAGE_TRANSFER_BIT, access, VK_ACCESS_TRANSFER_WRITE_BIT); - vkCmdFillBuffer(cmd, m_buffer->value, 0, 4, 0xDEADBEEF); + // Legacy fallback. Should be practically unused with the exception of in-development drivers. + const auto stages = v1_utils::gather_src_stages(dependency); + vkCmdSetEvent(cmd, m_vk_event, stages); } } void event::host_signal() const { ensure(m_vk_event); - vkSetEvent(m_device, m_vk_event); + vkSetEvent(*m_device, m_vk_event); } - void event::gpu_wait(const command_buffer& cmd) const + void event::gpu_wait(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) const { ensure(m_vk_event); - vkCmdWaitEvents(cmd, 1, &m_vk_event, 0, 0, 0, nullptr, 0, nullptr, 0, nullptr); + + if (v2) [[ likely ]] + { + m_device->_vkCmdWaitEvents2KHR(cmd, 1, &m_vk_event, &dependency); + } + else + { + const auto src_stages = v1_utils::gather_src_stages(dependency); + const auto dst_stages = v1_utils::gather_dst_stages(dependency); + const auto memory_barriers = v1_utils::get_memory_barriers(dependency); + const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency); + const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency); + + vkCmdWaitEvents(cmd, + 1, &m_vk_event, + src_stages, dst_stages, + ::size32(memory_barriers), memory_barriers.data(), + ::size32(buffer_memory_barriers), buffer_memory_barriers.data(), + ::size32(image_memory_barriers), image_memory_barriers.data()); + } } void event::reset() const { - if (m_vk_event) [[likely]] - { - vkResetEvent(m_device, m_vk_event); - } - else - { - *m_value = 0xCAFEBABE; - } + vkResetEvent(*m_device, m_vk_event); } VkResult event::status() const { - if (m_vk_event) [[likely]] - { - return vkGetEventStatus(m_device, m_vk_event); - } - else - { - return (*m_value == 0xCAFEBABE) ? VK_EVENT_RESET : VK_EVENT_SET; - } + return vkGetEventStatus(*m_device, m_vk_event); } gpu_debug_marker_pool::gpu_debug_marker_pool(const vk::render_device& dev, u32 count) diff --git a/rpcs3/Emu/RSX/VK/vkutils/sync.h b/rpcs3/Emu/RSX/VK/vkutils/sync.h index 457491aa14..37c75af826 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/sync.h +++ b/rpcs3/Emu/RSX/VK/vkutils/sync.h @@ -9,6 +9,7 @@ namespace vk { class command_buffer; + class image; enum class sync_domain { @@ -54,20 +55,18 @@ namespace vk class event { - VkDevice m_device = VK_NULL_HANDLE; + const vk::render_device* m_device = nullptr; VkEvent m_vk_event = VK_NULL_HANDLE; - - std::unique_ptr m_buffer; - volatile u32* m_value = nullptr; + bool v2 = true; public: event(const render_device& dev, sync_domain domain); ~event(); event(const event&) = delete; - void signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access); + void signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency); void host_signal() const; - void gpu_wait(const command_buffer& cmd) const; + void gpu_wait(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) const; VkResult status() const; void reset() const; };