diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index acf258cc56..ebff202303 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1351,7 +1351,7 @@ void GLGSRender::notify_tile_unbound(u32 tile) } } -bool GLGSRender::release_GCM_label(u32 address, u32 args) +bool GLGSRender::release_GCM_label(u32 type, u32 address, u32 args) { if (!backend_config.supports_host_gpu_labels) { @@ -1360,7 +1360,7 @@ bool GLGSRender::release_GCM_label(u32 address, u32 args) auto host_ctx = ensure(m_host_dma_ctrl->host_ctx()); - if (host_ctx->texture_loads_completed()) + if (type == NV4097_TEXTURE_READ_SEMAPHORE_RELEASE && host_ctx->texture_loads_completed()) { // We're about to poll waiting for GPU state, ensure the context is still valid. gl::check_state(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 5627216055..a05eb0bf3d 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -206,7 +206,7 @@ public: void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override; // DMA - bool release_GCM_label(u32 address, u32 data) override; + bool release_GCM_label(u32 type, u32 address, u32 data) override; void enqueue_host_context_write(u32 offset, u32 size, const void* data); void on_guest_texture_read(); diff --git a/rpcs3/Emu/RSX/NV47/HW/nv406e.cpp b/rpcs3/Emu/RSX/NV47/HW/nv406e.cpp index cbb04d140a..4307cc8289 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv406e.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv406e.cpp @@ -86,7 +86,7 @@ namespace rsx RSX(ctx)->performance_counters.idle_time += (get_system_time() - start); } - void semaphore_release(context* ctx, u32 /*reg*/, u32 arg) + void semaphore_release(context* ctx, u32 reg, u32 arg) { const u32 offset = REGS(ctx)->semaphore_offset_406e(); @@ -122,7 +122,7 @@ namespace rsx arg = 1; } - util::write_gcm_label(ctx, addr, arg); + util::write_gcm_label(ctx, reg, addr, arg); } } } diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 929925bcb1..17ee040f8c 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -690,7 +690,7 @@ namespace rsx }); } - void texture_read_semaphore_release(context* ctx, u32 /*reg*/, u32 arg) + void texture_read_semaphore_release(context* ctx, u32 reg, u32 arg) { // Pipeline barrier seems to be equivalent to a SHADER_READ stage barrier. // Ideally the GPU only needs to have cached all textures declared up to this point before writing the label. @@ -715,15 +715,15 @@ namespace rsx if (g_cfg.video.strict_rendering_mode) [[ unlikely ]] { - util::write_gcm_label(ctx, addr, arg); + util::write_gcm_label(ctx, reg, addr, arg); } else { - util::write_gcm_label(ctx, addr, arg); + util::write_gcm_label(ctx, reg, addr, arg); } } - void back_end_write_semaphore_release(context* ctx, u32 /*reg*/, u32 arg) + void back_end_write_semaphore_release(context* ctx, u32 reg, u32 arg) { // Full pipeline barrier. GPU must flush pipeline before writing the label @@ -744,7 +744,7 @@ namespace rsx } const u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff); - util::write_gcm_label(ctx, addr, val); + util::write_gcm_label(ctx, reg, addr, val); } void sync(context* ctx, u32, u32) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp b/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp index 9f39f84c00..fe2522c31c 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp @@ -13,13 +13,13 @@ namespace rsx namespace util { template - static void write_gcm_label(context* ctx, u32 address, u32 data) + static void write_gcm_label(context* ctx, u32 type, u32 address, u32 data) { const bool is_flip_sema = (address == (RSX(ctx)->label_addr + 0x10) || address == (RSX(ctx)->device_addr + 0x30)); if (!is_flip_sema) { // First, queue the GPU work. If it flushes the queue for us, the following routines will be faster. - const bool handled = RSX(ctx)->get_backend_config().supports_host_gpu_labels && RSX(ctx)->release_GCM_label(address, data); + const bool handled = RSX(ctx)->get_backend_config().supports_host_gpu_labels && RSX(ctx)->release_GCM_label(type, address, data); if (vm::_ref(address) == data) { diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 4f965dee80..4b2de0acc4 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -380,7 +380,7 @@ namespace rsx flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional); virtual void write_barrier(u32 /*memory_address*/, u32 /*memory_range*/) {} virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload); - virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; } + virtual bool release_GCM_label(u32 /*type*/, u32 /*address*/, u32 /*value*/) { return false; } protected: diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index cba661a64b..8d90f9a09f 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1541,7 +1541,7 @@ std::pair VKGSRender::map_host_object_data( return { m_host_dma_ctrl->host_ctx(), m_host_object_data->value }; } -bool VKGSRender::release_GCM_label(u32 address, u32 args) +bool VKGSRender::release_GCM_label(u32 type, u32 address, u32 args) { if (!backend_config.supports_host_gpu_labels) { @@ -1550,7 +1550,7 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args) auto host_ctx = ensure(m_host_dma_ctrl->host_ctx()); - if (host_ctx->texture_loads_completed()) + if (type == NV4097_TEXTURE_READ_SEMAPHORE_RELEASE && host_ctx->texture_loads_completed()) { // All texture loads already seen by the host GPU // Wait for all previously submitted labels to be flushed @@ -1572,13 +1572,10 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args) const auto release_event_id = host_ctx->on_label_acquire(); + vk::insert_global_memory_barrier(*m_current_command_buffer); + if (host_ctx->has_unflushed_texture_loads()) { - if (vk::is_renderpass_open(*m_current_command_buffer)) - { - vk::end_renderpass(*m_current_command_buffer); - } - vkCmdUpdateBuffer(*m_current_command_buffer, mapping.second->value, mapping.first, 4, &write_data); flush_command_queue(); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 0c93741fcb..f9feedc35a 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -254,7 +254,7 @@ public: // Sync void write_barrier(u32 address, u32 range) override; void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override; - bool release_GCM_label(u32 address, u32 data) override; + bool release_GCM_label(u32 type, u32 address, u32 data) override; void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;