From dba48d6387c6b4a2b19b1d7d44c24c3413d381c4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 26 Oct 2025 01:15:30 +0300 Subject: [PATCH] rsx: Invalidate surface cache slots that are trampled by buffer writes. --- Utilities/deferred_op.hpp | 27 ++++++++ rpcs3/Emu/RSX/GL/GLGSRender.cpp | 6 ++ rpcs3/Emu/RSX/GL/GLGSRender.h | 4 ++ rpcs3/Emu/RSX/NV47/HW/nv0039.cpp | 111 ++++++++++++++++--------------- rpcs3/Emu/RSX/RSXThread.h | 1 + rpcs3/Emu/RSX/VK/VKGSRender.cpp | 6 ++ rpcs3/Emu/RSX/VK/VKGSRender.h | 2 + rpcs3/emucore.vcxproj | 1 + rpcs3/emucore.vcxproj.filters | 3 + 9 files changed, 108 insertions(+), 53 deletions(-) create mode 100644 Utilities/deferred_op.hpp diff --git a/Utilities/deferred_op.hpp b/Utilities/deferred_op.hpp new file mode 100644 index 0000000000..7186cea2d4 --- /dev/null +++ b/Utilities/deferred_op.hpp @@ -0,0 +1,27 @@ +#pragma once + +// Generic deferred routine wrapper +// Use-case is similar to "defer" statement in other languages, just invokes a callback when the object goes out of scope + +#include + +namespace utils +{ + template + requires std::is_invocable_v + class deferred_op + { + public: + deferred_op(F&& callback) + : m_callback(callback) + {} + + ~deferred_op() + { + m_callback(); + } + + private: + F m_callback; + }; +} diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3441fd438e..acf258cc56 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1411,6 +1411,12 @@ void GLGSRender::on_guest_texture_read() enqueue_host_context_write(::offset32(&rsx::host_gpu_context_t::texture_load_complete_event), 8, &event_id); } +void GLGSRender::write_barrier(u32 address, u32 range) +{ + ensure(is_current_thread()); + m_rtts.invalidate_range(utils::address_range32::start_length(address, range)); +} + void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query) { query->result = 0; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 465fa51584..5627216055 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -192,8 +192,12 @@ public: gl::work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data); + // NV3089 bool scaled_image_from_memory(const rsx::blit_src_info& src_info, const rsx::blit_dst_info& dst_info, bool interpolate) override; + // Sync + void write_barrier(u32 address, u32 range) override; + // ZCULL void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; diff --git a/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp b/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp index a41fae11dc..fcd30c45f8 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp @@ -5,12 +5,44 @@ #include "Emu/RSX/Core/RSXReservationLock.hpp" #include "Emu/RSX/Host/MM.h" +#include "Utilities/deferred_op.hpp" + #include "context_accessors.define.h" namespace rsx { namespace nv0039 { + // Transfer with stride + inline void block2d_copy_with_stride(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch, u8 src_stride, u8 dst_stride) + { + for (u32 row = 0; row < height; ++row) + { + auto dst_ptr = dst; + auto src_ptr = src; + while (src_ptr < src + width) + { + *dst_ptr = *src_ptr; + + src_ptr += src_stride; + dst_ptr += dst_stride; + } + + dst += dst_pitch; + src += src_pitch; + } + } + + inline void block2d_copy(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch) + { + for (u32 i = 0; i < height; ++i) + { + std::memcpy(dst, src, width); + dst += dst_pitch; + src += src_pitch; + } + } + void buffer_notify(context* ctx, u32, u32 arg) { s32 in_pitch = REGS(ctx)->nv0039_input_pitch(); @@ -56,6 +88,13 @@ namespace rsx } } + // Deferred write_barrier on RSX side + utils::deferred_op deferred([&]() + { + RSX(ctx)->write_barrier(write_address, write_length); + // res->release(0); + }); + auto res = ::rsx::reservation_lock(write_address, write_length, read_address, read_length); u8* dst = vm::_ptr(write_address); @@ -81,68 +120,34 @@ namespace rsx // The formats are just input channel strides. You can use this to do cool tricks like gathering channels // Very rare, only seen in use by Destiny // TODO: Hw accel - for (u32 row = 0; row < line_count; ++row) - { - auto dst_ptr = dst; - auto src_ptr = src; - while (src_ptr < src + line_length) - { - *dst_ptr = *src_ptr; - - src_ptr += in_format; - dst_ptr += out_format; - } - - dst += out_pitch; - src += in_pitch; - } + block2d_copy_with_stride(dst, src, line_length, line_count, in_pitch, out_pitch, in_format, out_format); + return; } - else if (is_overlapping) [[ unlikely ]] - { - if (is_block_transfer) - { - std::memmove(dst, src, read_length); - } - else - { - std::vector temp(line_length * line_count); - u8* buf = temp.data(); - for (u32 y = 0; y < line_count; ++y) - { - std::memcpy(buf, src, line_length); - buf += line_length; - src += in_pitch; - } - - buf = temp.data(); - - for (u32 y = 0; y < line_count; ++y) - { - std::memcpy(dst, buf, line_length); - buf += line_length; - dst += out_pitch; - } - } - } - else + if (!is_overlapping) { if (is_block_transfer) { std::memcpy(dst, src, read_length); + return; } - else - { - for (u32 i = 0; i < line_count; ++i) - { - std::memcpy(dst, src, line_length); - dst += out_pitch; - src += in_pitch; - } - } + + block2d_copy(dst, src, line_length, line_count, in_pitch, out_pitch); + return; } - //res->release(0); + if (is_block_transfer) + { + std::memmove(dst, src, read_length); + return; + } + + // Handle overlapping 2D range using double-copy to temp. + std::vector temp(line_length * line_count); + u8* buf = temp.data(); + + block2d_copy(buf, src, line_length, line_count, in_pitch, line_length); + block2d_copy(dst, buf, line_length, line_count, line_length, out_pitch); } } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 73aacf360a..6e180b26fd 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -374,6 +374,7 @@ namespace rsx // sync void sync(); flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional); + virtual void write_barrier(u32 memory_address, u32 memory_range) {} virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload); virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8a3b63eed1..3bc240b768 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1613,6 +1613,12 @@ void VKGSRender::on_guest_texture_read(const vk::command_buffer& cmd) vkCmdUpdateBuffer(cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::texture_load_complete_event), sizeof(u64), &event_id); } +void VKGSRender::write_barrier(u32 address, u32 range) +{ + ensure(is_current_thread()); + m_rtts.invalidate_range(utils::address_range32::start_length(address, range)); +} + void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) { rsx::thread::sync_hint(hint, payload); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 6f52c9c1fc..b0edaa48f6 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -249,6 +249,8 @@ public: void set_scissor(bool clip_viewport); void bind_viewport(); + // Sync + void write_barrier(u32 address, u32 range) override; void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override; bool release_GCM_label(u32 address, u32 data) override; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index eaf4c63193..c5f6673723 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -546,6 +546,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index cf0b6a5a87..9c09dd791a 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2755,6 +2755,9 @@ Emu\GPU\RSX\Common + + Utilities +