rsx/sync: Do not allow short-circuiting behavior when releasing GCM labels via host queue
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (0, 51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (1, 8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang ${{ matrix.arch }} (aarch64, clang, clangarm64, ARM64, windows-11-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang ${{ matrix.arch }} (x86_64, clang, clang64, X64, windows-2025) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run

This commit is contained in:
kd-11 2026-02-08 14:35:28 +03:00 committed by kd-11
parent 93dbdead24
commit aeaa62a28c
8 changed files with 18 additions and 21 deletions

View file

@ -1351,7 +1351,7 @@ void GLGSRender::notify_tile_unbound(u32 tile)
}
}
bool GLGSRender::release_GCM_label(u32 address, u32 args)
bool GLGSRender::release_GCM_label(u32 type, u32 address, u32 args)
{
if (!backend_config.supports_host_gpu_labels)
{
@ -1360,7 +1360,7 @@ bool GLGSRender::release_GCM_label(u32 address, u32 args)
auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());
if (host_ctx->texture_loads_completed())
if (type == NV4097_TEXTURE_READ_SEMAPHORE_RELEASE && host_ctx->texture_loads_completed())
{
// We're about to poll waiting for GPU state, ensure the context is still valid.
gl::check_state();

View file

@ -206,7 +206,7 @@ public:
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
// DMA
bool release_GCM_label(u32 address, u32 data) override;
bool release_GCM_label(u32 type, u32 address, u32 data) override;
void enqueue_host_context_write(u32 offset, u32 size, const void* data);
void on_guest_texture_read();

View file

@ -86,7 +86,7 @@ namespace rsx
RSX(ctx)->performance_counters.idle_time += (get_system_time() - start);
}
void semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
void semaphore_release(context* ctx, u32 reg, u32 arg)
{
const u32 offset = REGS(ctx)->semaphore_offset_406e();
@ -122,7 +122,7 @@ namespace rsx
arg = 1;
}
util::write_gcm_label<false, true>(ctx, addr, arg);
util::write_gcm_label<false, true>(ctx, reg, addr, arg);
}
}
}

View file

@ -690,7 +690,7 @@ namespace rsx
});
}
void texture_read_semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
void texture_read_semaphore_release(context* ctx, u32 reg, u32 arg)
{
// Pipeline barrier seems to be equivalent to a SHADER_READ stage barrier.
// Ideally the GPU only needs to have cached all textures declared up to this point before writing the label.
@ -715,15 +715,15 @@ namespace rsx
if (g_cfg.video.strict_rendering_mode) [[ unlikely ]]
{
util::write_gcm_label<true, true>(ctx, addr, arg);
util::write_gcm_label<true, true>(ctx, reg, addr, arg);
}
else
{
util::write_gcm_label<true, false>(ctx, addr, arg);
util::write_gcm_label<true, false>(ctx, reg, addr, arg);
}
}
void back_end_write_semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
void back_end_write_semaphore_release(context* ctx, u32 reg, u32 arg)
{
// Full pipeline barrier. GPU must flush pipeline before writing the label
@ -744,7 +744,7 @@ namespace rsx
}
const u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
util::write_gcm_label<true, true>(ctx, addr, val);
util::write_gcm_label<true, true>(ctx, reg, addr, val);
}
void sync(context* ctx, u32, u32)

View file

@ -13,13 +13,13 @@ namespace rsx
namespace util
{
template <bool FlushDMA, bool FlushPipe>
static void write_gcm_label(context* ctx, u32 address, u32 data)
static void write_gcm_label(context* ctx, u32 type, u32 address, u32 data)
{
const bool is_flip_sema = (address == (RSX(ctx)->label_addr + 0x10) || address == (RSX(ctx)->device_addr + 0x30));
if (!is_flip_sema)
{
// First, queue the GPU work. If it flushes the queue for us, the following routines will be faster.
const bool handled = RSX(ctx)->get_backend_config().supports_host_gpu_labels && RSX(ctx)->release_GCM_label(address, data);
const bool handled = RSX(ctx)->get_backend_config().supports_host_gpu_labels && RSX(ctx)->release_GCM_label(type, address, data);
if (vm::_ref<RsxSemaphore>(address) == data)
{

View file

@ -380,7 +380,7 @@ namespace rsx
flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
virtual void write_barrier(u32 /*memory_address*/, u32 /*memory_range*/) {}
virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload);
virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; }
virtual bool release_GCM_label(u32 /*type*/, u32 /*address*/, u32 /*value*/) { return false; }
protected:

View file

@ -1541,7 +1541,7 @@ std::pair<volatile vk::host_data_t*, VkBuffer> VKGSRender::map_host_object_data(
return { m_host_dma_ctrl->host_ctx(), m_host_object_data->value };
}
bool VKGSRender::release_GCM_label(u32 address, u32 args)
bool VKGSRender::release_GCM_label(u32 type, u32 address, u32 args)
{
if (!backend_config.supports_host_gpu_labels)
{
@ -1550,7 +1550,7 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());
if (host_ctx->texture_loads_completed())
if (type == NV4097_TEXTURE_READ_SEMAPHORE_RELEASE && host_ctx->texture_loads_completed())
{
// All texture loads already seen by the host GPU
// Wait for all previously submitted labels to be flushed
@ -1572,13 +1572,10 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
const auto release_event_id = host_ctx->on_label_acquire();
vk::insert_global_memory_barrier(*m_current_command_buffer);
if (host_ctx->has_unflushed_texture_loads())
{
if (vk::is_renderpass_open(*m_current_command_buffer))
{
vk::end_renderpass(*m_current_command_buffer);
}
vkCmdUpdateBuffer(*m_current_command_buffer, mapping.second->value, mapping.first, 4, &write_data);
flush_command_queue();
}

View file

@ -254,7 +254,7 @@ public:
// Sync
void write_barrier(u32 address, u32 range) override;
void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override;
bool release_GCM_label(u32 address, u32 data) override;
bool release_GCM_label(u32 type, u32 address, u32 data) override;
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;