rsx: Invalidate surface cache slots that are trampled by buffer writes.

This commit is contained in:
kd-11 2025-10-26 01:15:30 +03:00 committed by kd-11
parent 0403231a0d
commit dba48d6387
9 changed files with 108 additions and 53 deletions

27
Utilities/deferred_op.hpp Normal file
View file

@ -0,0 +1,27 @@
#pragma once
// Generic deferred routine wrapper
// Use-case is similar to "defer" statement in other languages, just invokes a callback when the object goes out of scope
#include <functional>
namespace utils
{
template <typename F>
requires std::is_invocable_v<F>
class deferred_op
{
public:
deferred_op(F&& callback)
: m_callback(callback)
{}
~deferred_op()
{
m_callback();
}
private:
F m_callback;
};
}

View file

@ -1411,6 +1411,12 @@ void GLGSRender::on_guest_texture_read()
enqueue_host_context_write(::offset32(&rsx::host_gpu_context_t::texture_load_complete_event), 8, &event_id); enqueue_host_context_write(::offset32(&rsx::host_gpu_context_t::texture_load_complete_event), 8, &event_id);
} }
void GLGSRender::write_barrier(u32 address, u32 range)
{
ensure(is_current_thread());
m_rtts.invalidate_range(utils::address_range32::start_length(address, range));
}
void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query) void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
{ {
query->result = 0; query->result = 0;

View file

@ -192,8 +192,12 @@ public:
gl::work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data); gl::work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data);
// NV3089
bool scaled_image_from_memory(const rsx::blit_src_info& src_info, const rsx::blit_dst_info& dst_info, bool interpolate) override; bool scaled_image_from_memory(const rsx::blit_src_info& src_info, const rsx::blit_dst_info& dst_info, bool interpolate) override;
// Sync
void write_barrier(u32 address, u32 range) override;
// ZCULL // ZCULL
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;

View file

@ -5,12 +5,44 @@
#include "Emu/RSX/Core/RSXReservationLock.hpp" #include "Emu/RSX/Core/RSXReservationLock.hpp"
#include "Emu/RSX/Host/MM.h" #include "Emu/RSX/Host/MM.h"
#include "Utilities/deferred_op.hpp"
#include "context_accessors.define.h" #include "context_accessors.define.h"
namespace rsx namespace rsx
{ {
namespace nv0039 namespace nv0039
{ {
// Transfer with stride
inline void block2d_copy_with_stride(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch, u8 src_stride, u8 dst_stride)
{
for (u32 row = 0; row < height; ++row)
{
auto dst_ptr = dst;
auto src_ptr = src;
while (src_ptr < src + width)
{
*dst_ptr = *src_ptr;
src_ptr += src_stride;
dst_ptr += dst_stride;
}
dst += dst_pitch;
src += src_pitch;
}
}
inline void block2d_copy(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch)
{
for (u32 i = 0; i < height; ++i)
{
std::memcpy(dst, src, width);
dst += dst_pitch;
src += src_pitch;
}
}
void buffer_notify(context* ctx, u32, u32 arg) void buffer_notify(context* ctx, u32, u32 arg)
{ {
s32 in_pitch = REGS(ctx)->nv0039_input_pitch(); s32 in_pitch = REGS(ctx)->nv0039_input_pitch();
@ -56,6 +88,13 @@ namespace rsx
} }
} }
// Deferred write_barrier on RSX side
utils::deferred_op deferred([&]()
{
RSX(ctx)->write_barrier(write_address, write_length);
// res->release(0);
});
auto res = ::rsx::reservation_lock<true>(write_address, write_length, read_address, read_length); auto res = ::rsx::reservation_lock<true>(write_address, write_length, read_address, read_length);
u8* dst = vm::_ptr<u8>(write_address); u8* dst = vm::_ptr<u8>(write_address);
@ -81,68 +120,34 @@ namespace rsx
// The formats are just input channel strides. You can use this to do cool tricks like gathering channels // The formats are just input channel strides. You can use this to do cool tricks like gathering channels
// Very rare, only seen in use by Destiny // Very rare, only seen in use by Destiny
// TODO: Hw accel // TODO: Hw accel
for (u32 row = 0; row < line_count; ++row) block2d_copy_with_stride(dst, src, line_length, line_count, in_pitch, out_pitch, in_format, out_format);
{ return;
auto dst_ptr = dst;
auto src_ptr = src;
while (src_ptr < src + line_length)
{
*dst_ptr = *src_ptr;
src_ptr += in_format;
dst_ptr += out_format;
} }
dst += out_pitch; if (!is_overlapping)
src += in_pitch;
}
}
else if (is_overlapping) [[ unlikely ]]
{
if (is_block_transfer)
{
std::memmove(dst, src, read_length);
}
else
{
std::vector<u8> temp(line_length * line_count);
u8* buf = temp.data();
for (u32 y = 0; y < line_count; ++y)
{
std::memcpy(buf, src, line_length);
buf += line_length;
src += in_pitch;
}
buf = temp.data();
for (u32 y = 0; y < line_count; ++y)
{
std::memcpy(dst, buf, line_length);
buf += line_length;
dst += out_pitch;
}
}
}
else
{ {
if (is_block_transfer) if (is_block_transfer)
{ {
std::memcpy(dst, src, read_length); std::memcpy(dst, src, read_length);
} return;
else
{
for (u32 i = 0; i < line_count; ++i)
{
std::memcpy(dst, src, line_length);
dst += out_pitch;
src += in_pitch;
}
}
} }
//res->release(0); block2d_copy(dst, src, line_length, line_count, in_pitch, out_pitch);
return;
}
if (is_block_transfer)
{
std::memmove(dst, src, read_length);
return;
}
// Handle overlapping 2D range using double-copy to temp.
std::vector<u8> temp(line_length * line_count);
u8* buf = temp.data();
block2d_copy(buf, src, line_length, line_count, in_pitch, line_length);
block2d_copy(dst, buf, line_length, line_count, line_length, out_pitch);
} }
} }
} }

View file

@ -374,6 +374,7 @@ namespace rsx
// sync // sync
void sync(); void sync();
flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional); flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
virtual void write_barrier(u32 memory_address, u32 memory_range) {}
virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload); virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload);
virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; } virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; }

View file

@ -1613,6 +1613,12 @@ void VKGSRender::on_guest_texture_read(const vk::command_buffer& cmd)
vkCmdUpdateBuffer(cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::texture_load_complete_event), sizeof(u64), &event_id); vkCmdUpdateBuffer(cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::texture_load_complete_event), sizeof(u64), &event_id);
} }
void VKGSRender::write_barrier(u32 address, u32 range)
{
ensure(is_current_thread());
m_rtts.invalidate_range(utils::address_range32::start_length(address, range));
}
void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload)
{ {
rsx::thread::sync_hint(hint, payload); rsx::thread::sync_hint(hint, payload);

View file

@ -249,6 +249,8 @@ public:
void set_scissor(bool clip_viewport); void set_scissor(bool clip_viewport);
void bind_viewport(); void bind_viewport();
// Sync
void write_barrier(u32 address, u32 range) override;
void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override; void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override;
bool release_GCM_label(u32 address, u32 data) override; bool release_GCM_label(u32 address, u32 data) override;

View file

@ -546,6 +546,7 @@
<ClInclude Include="..\3rdparty\stblib\stb\stb_image.h" /> <ClInclude Include="..\3rdparty\stblib\stb\stb_image.h" />
<ClInclude Include="..\Utilities\address_range.h" /> <ClInclude Include="..\Utilities\address_range.h" />
<ClInclude Include="..\Utilities\cheat_info.h" /> <ClInclude Include="..\Utilities\cheat_info.h" />
<ClInclude Include="..\Utilities\deferred_op.hpp" />
<ClInclude Include="..\Utilities\simple_ringbuf.h" /> <ClInclude Include="..\Utilities\simple_ringbuf.h" />
<ClInclude Include="..\Utilities\stack_trace.h" /> <ClInclude Include="..\Utilities\stack_trace.h" />
<ClInclude Include="..\Utilities\transactional_storage.h" /> <ClInclude Include="..\Utilities\transactional_storage.h" />

View file

@ -2755,6 +2755,9 @@
<ClInclude Include="Emu\RSX\Common\reverse_ptr.hpp"> <ClInclude Include="Emu\RSX\Common\reverse_ptr.hpp">
<Filter>Emu\GPU\RSX\Common</Filter> <Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Utilities\deferred_op.hpp">
<Filter>Utilities</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl"> <None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">