rsx/sync: Do not allow short-circuiting behavior when releasing GCM labels via host queue

2026-02-13 11:14:50 +01:00 · 2026-02-08 14:35:28 +03:00 · 2026-02-08 14:35:28 +03:00 · aeaa62a28c
parent 93dbdead24
commit aeaa62a28c
8 changed files with 18 additions and 21 deletions
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@ -1351,7 +1351,7 @@ void GLGSRender::notify_tile_unbound(u32 tile)
 	}
 }

-bool GLGSRender::release_GCM_label(u32 address, u32 args)
+bool GLGSRender::release_GCM_label(u32 type, u32 address, u32 args)
 {
 	if (!backend_config.supports_host_gpu_labels)
 	{
@ -1360,7 +1360,7 @@ bool GLGSRender::release_GCM_label(u32 address, u32 args)

 	auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());

-	if (host_ctx->texture_loads_completed())
+	if (type == NV4097_TEXTURE_READ_SEMAPHORE_RELEASE && host_ctx->texture_loads_completed())
 	{
 		// We're about to poll waiting for GPU state, ensure the context is still valid.
 		gl::check_state();
--- a/rpcs3/Emu/RSX/GL/GLGSRender.h
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.h
@ -206,7 +206,7 @@ public:
 	void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;

 	// DMA
-	bool release_GCM_label(u32 address, u32 data) override;
+	bool release_GCM_label(u32 type, u32 address, u32 data) override;
 	void enqueue_host_context_write(u32 offset, u32 size, const void* data);
 	void on_guest_texture_read();

--- a/rpcs3/Emu/RSX/NV47/HW/nv406e.cpp
+++ b/rpcs3/Emu/RSX/NV47/HW/nv406e.cpp
@ -86,7 +86,7 @@ namespace rsx
 			RSX(ctx)->performance_counters.idle_time += (get_system_time() - start);
 		}

-		void semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
+		void semaphore_release(context* ctx, u32 reg, u32 arg)
 		{
 			const u32 offset = REGS(ctx)->semaphore_offset_406e();

@ -122,7 +122,7 @@ namespace rsx
 				arg = 1;
 			}

-			util::write_gcm_label<false, true>(ctx, addr, arg);
+			util::write_gcm_label<false, true>(ctx, reg, addr, arg);
 		}
 	}
 }
--- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp
+++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp
@ -690,7 +690,7 @@ namespace rsx
 			});
 		}

-		void texture_read_semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
+		void texture_read_semaphore_release(context* ctx, u32 reg, u32 arg)
 		{
 			// Pipeline barrier seems to be equivalent to a SHADER_READ stage barrier.
 			// Ideally the GPU only needs to have cached all textures declared up to this point before writing the label.
@ -715,15 +715,15 @@ namespace rsx

 			if (g_cfg.video.strict_rendering_mode) [[ unlikely ]]
 			{
-				util::write_gcm_label<true, true>(ctx, addr, arg);
+				util::write_gcm_label<true, true>(ctx, reg, addr, arg);
 			}
 			else
 			{
-				util::write_gcm_label<true, false>(ctx, addr, arg);
+				util::write_gcm_label<true, false>(ctx, reg, addr, arg);
 			}
 		}

-		void back_end_write_semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
+		void back_end_write_semaphore_release(context* ctx, u32 reg, u32 arg)
 		{
 			// Full pipeline barrier. GPU must flush pipeline before writing the label

@ -744,7 +744,7 @@ namespace rsx
 			}

 			const u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
-			util::write_gcm_label<true, true>(ctx, addr, val);
+			util::write_gcm_label<true, true>(ctx, reg, addr, val);
 		}

 		void sync(context* ctx, u32, u32)
--- a/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp
+++ b/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp
@ -13,13 +13,13 @@ namespace rsx
 	namespace util
 	{
 		template <bool FlushDMA, bool FlushPipe>
-		static void write_gcm_label(context* ctx, u32 address, u32 data)
+		static void write_gcm_label(context* ctx, u32 type, u32 address, u32 data)
 		{
 			const bool is_flip_sema = (address == (RSX(ctx)->label_addr + 0x10) || address == (RSX(ctx)->device_addr + 0x30));
 			if (!is_flip_sema)
 			{
 				// First, queue the GPU work. If it flushes the queue for us, the following routines will be faster.
-				const bool handled = RSX(ctx)->get_backend_config().supports_host_gpu_labels && RSX(ctx)->release_GCM_label(address, data);
+				const bool handled = RSX(ctx)->get_backend_config().supports_host_gpu_labels && RSX(ctx)->release_GCM_label(type, address, data);

 				if (vm::_ref<RsxSemaphore>(address) == data)
 				{
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@ -380,7 +380,7 @@ namespace rsx
 		flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
 		virtual void write_barrier(u32 /*memory_address*/, u32 /*memory_range*/) {}
 		virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload);
-		virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; }
+		virtual bool release_GCM_label(u32 /*type*/, u32 /*address*/, u32 /*value*/) { return false; }

 	protected:

--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -1541,7 +1541,7 @@ std::pair<volatile vk::host_data_t*, VkBuffer> VKGSRender::map_host_object_data(
 	return { m_host_dma_ctrl->host_ctx(), m_host_object_data->value };
 }

-bool VKGSRender::release_GCM_label(u32 address, u32 args)
+bool VKGSRender::release_GCM_label(u32 type, u32 address, u32 args)
 {
 	if (!backend_config.supports_host_gpu_labels)
 	{
@ -1550,7 +1550,7 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)

 	auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());

-	if (host_ctx->texture_loads_completed())
+	if (type == NV4097_TEXTURE_READ_SEMAPHORE_RELEASE && host_ctx->texture_loads_completed())
 	{
 		// All texture loads already seen by the host GPU
 		// Wait for all previously submitted labels to be flushed
@ -1572,13 +1572,10 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)

 	const auto release_event_id = host_ctx->on_label_acquire();

+	vk::insert_global_memory_barrier(*m_current_command_buffer);
+
 	if (host_ctx->has_unflushed_texture_loads())
 	{
-		if (vk::is_renderpass_open(*m_current_command_buffer))
-		{
-			vk::end_renderpass(*m_current_command_buffer);
-		}
-
 		vkCmdUpdateBuffer(*m_current_command_buffer, mapping.second->value, mapping.first, 4, &write_data);
 		flush_command_queue();
 	}
--- a/rpcs3/Emu/RSX/VK/VKGSRender.h
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.h
@ -254,7 +254,7 @@ public:
 	// Sync
 	void write_barrier(u32 address, u32 range) override;
 	void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override;
-	bool release_GCM_label(u32 address, u32 data) override;
+	bool release_GCM_label(u32 type, u32 address, u32 data) override;

 	void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
 	void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;