From 7f6842705ce376587c6062d1471118b7b75ffa76 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 22 Nov 2025 02:47:24 +0300 Subject: [PATCH 01/42] rsx: Rework GPU deswizzle kernel to prevent hangs --- rpcs3/Emu/RSX/GL/GLCompute.h | 8 +-- .../Program/GLSLSnippets/GPUDeswizzle.glsl | 50 ++++++++++--------- rpcs3/Emu/RSX/VK/VKCompute.h | 8 +-- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLCompute.h b/rpcs3/Emu/RSX/GL/GLCompute.h index 442d8e4a0d..91210497c7 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.h +++ b/rpcs3/Emu/RSX/GL/GLCompute.h @@ -338,10 +338,10 @@ namespace gl params.logd = rsx::ceil_log2(depth); set_parameters(cmd); - const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); - const u32 texels_per_dword = std::max(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide - const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword; - compute_task::run(cmd, linear_invocations); + const u32 word_count_per_invocation = std::max(sizeof(_BlockType) / 4u, 1u); + const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size); + const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + compute_task::run(cmd, workgroup_invocations); } }; diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl index 708f703983..bdb432d7e5 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl @@ -103,34 +103,48 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_) #if USE_16BIT_ADDRESSING -void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id) +void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z) { const uint masks[] = { 0x0000FFFF, 0xFFFF0000 }; - accumulator |= data_in[src_id / 2] & masks[subword]; + uint accumulator = 0; - if (subword == 1) + const uint subword_count = min(invocation.size.x, 2); + for (uint subword = 0; subword < subword_count; ++subword, ++x) { - data_out[dst_id / 2] = %f(accumulator); + uint src_texel_id = get_z_index(x, y, z); + uint src_id = (src_texel_id + invocation.data_offset); + accumulator |= data_in[src_id / 2] & masks[subword]; } + + data_out[texel_id / 2] = %f(accumulator); } #elif USE_8BIT_ADDRESSING -void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id) +void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z) { const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }; - accumulator |= data_in[src_id / 4] & masks[subword]; + uint accumulator = 0; - if (subword == 3) + const uint subword_count = min(invocation.size.x, 4); + for (uint subword = 0; subword < subword_count; ++subword, ++x) { - data_out[dst_id / 4] = accumulator; + uint src_texel_id = get_z_index(x, y, z); + uint src_id = (src_texel_id + invocation.data_offset); + accumulator |= data_in[src_id / 4] & masks[subword]; } + + data_out[texel_id / 4] = accumulator; } #else -void write32(const in uint word_count, in uint src_id, in uint dst_id) +void decode_32b(const in uint texel_id, const in uint word_count, const in uint x, const in uint y, const in uint z) { + uint src_texel_id = get_z_index(x, y, z); + uint dst_id = (texel_id * word_count); + uint src_id = (src_texel_id + invocation.data_offset) * word_count; + for (uint i = 0; i < word_count; ++i) { uint value = data_in[src_id++]; @@ -165,23 +179,11 @@ void main() uint x = (slice_offset % row_length); #if USE_8BIT_ADDRESSING - for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) { + decode_8b(texel_id, x, y, z); #elif USE_16BIT_ADDRESSING - for (uint subword = 0, accumulator = 0; subword < 2; ++subword, ++x) { -#endif - - uint src_texel_id = get_z_index(x, y, z); - uint dst_id = (texel_id * word_count); - uint src_id = (src_texel_id + invocation.data_offset) * word_count; - -#if USE_8BIT_ADDRESSING - write8(accumulator, subword, src_id, dst_id); - } -#elif USE_16BIT_ADDRESSING - write16(accumulator, subword, src_id, dst_id); - } + decode_16b(texel_id, x, y, z); #else - write32(word_count, src_id, dst_id); + decode_32b(texel_id, word_count, x, y, z); #endif } diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index a62d93ec74..81f8d6a165 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -475,10 +475,10 @@ namespace vk params.logh = rsx::ceil_log2(height); params.logd = rsx::ceil_log2(depth); - const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); - const u32 texels_per_dword = std::max(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide - const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword; - compute_task::run(cmd, linear_invocations); + const u32 word_count_per_invocation = std::max(sizeof(_BlockType) / 4u, 1u); + const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size); + const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + compute_task::run(cmd, workgroup_invocations); } }; From 6e11978638c46d911567c49ef7a6db269aa8cb77 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 21 Nov 2025 02:07:06 +0300 Subject: [PATCH 02/42] rsx: Properly implement data alignment in simple_array --- rpcs3/Emu/RSX/Common/simple_array.hpp | 45 ++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 090c51d674..50f8874992 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -3,11 +3,46 @@ #include #include #include +#include #include "reverse_ptr.hpp" namespace rsx { + namespace aligned_allocator + { + template + void* malloc(size_t size) + { +#ifdef _MSC_VER + return _aligned_malloc(size, Align); +#else + return std::aligned_alloc(Align, size); +#endif + } + + template + void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size) + { +#ifdef _MSC_VER + return _aligned_realloc(prev_ptr, new_size, Align); +#else + void* ret = std::aligned_alloc(Align, new_size); + std::memcpy(ret, prev_ptr, std::min(prev_size, new_size)); + return ret; +#endif + } + + static inline void free(void* ptr) + { +#ifdef _MSC_VER + _aligned_free(ptr); +#else + std::free(ptr); +#endif + } + } + template concept span_like = requires(C& c) { @@ -15,7 +50,7 @@ namespace rsx { c.size() } -> std::integral; }; - template + template requires std::is_trivially_destructible_v && std::is_trivially_copyable_v struct simple_array { @@ -28,7 +63,7 @@ namespace rsx private: static constexpr u32 _local_capacity = std::max(64u / sizeof(Ty), 1u); - char _local_storage[_local_capacity * sizeof(Ty)]; + alignas(Align) char _local_storage[_local_capacity * sizeof(Ty)]; u32 _capacity = _local_capacity; Ty* _data = _local_capacity ? reinterpret_cast(_local_storage) : nullptr; @@ -128,7 +163,7 @@ namespace rsx { if (!is_local_storage()) { - free(_data); + aligned_allocator::free(_data); } _data = nullptr; @@ -196,13 +231,13 @@ namespace rsx if (is_local_storage()) { // Switch to heap storage - ensure(_data = static_cast(std::malloc(sizeof(Ty) * size))); + ensure(_data = static_cast(aligned_allocator::malloc(sizeof(Ty) * size))); std::memcpy(static_cast(_data), _local_storage, size_bytes()); } else { // Extend heap storage - ensure(_data = static_cast(std::realloc(_data, sizeof(Ty) * size))); // "realloc() failed!" + ensure(_data = static_cast(aligned_allocator::realloc(_data, size_bytes(), sizeof(Ty) * size))); // "realloc() failed!" } _capacity = size; From d6a36f4b60d15d447f6e4399a03fdf1315048983 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 21 Nov 2025 02:32:07 +0300 Subject: [PATCH 03/42] vk: Fix build using new simple_array guard-rails --- rpcs3/Emu/RSX/VK/VKQueryPool.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKQueryPool.h b/rpcs3/Emu/RSX/VK/VKQueryPool.h index 009afca379..ee2480b8fd 100644 --- a/rpcs3/Emu/RSX/VK/VKQueryPool.h +++ b/rpcs3/Emu/RSX/VK/VKQueryPool.h @@ -69,8 +69,9 @@ namespace vk void on_query_pool_released(std::unique_ptr& pool); - template class _List> - void free_queries(vk::command_buffer& cmd, _List& list) + template + requires std::ranges::range && std::same_as, u32> // List of u32 + void free_queries(vk::command_buffer& cmd, T& list) { for (const auto index : list) { From ea7183b6bd07c96d041d5f600c763b88119e0310 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 21 Nov 2025 02:32:31 +0300 Subject: [PATCH 04/42] test: Add unit tests for simple array alignment --- rpcs3/tests/test_pair.cpp | 77 ++++++----- rpcs3/tests/test_simple_array.cpp | 36 +++++ rpcs3/tests/test_tuple.cpp | 215 +++++++++++++++--------------- 3 files changed, 185 insertions(+), 143 deletions(-) diff --git a/rpcs3/tests/test_pair.cpp b/rpcs3/tests/test_pair.cpp index 086f7102b6..5df152f054 100644 --- a/rpcs3/tests/test_pair.cpp +++ b/rpcs3/tests/test_pair.cpp @@ -3,44 +3,47 @@ #include "util/types.hpp" #include "util/pair.hpp" -struct some_struct +namespace utils { - u64 v {}; - char s[12] = "Hello World"; - - bool operator == (const some_struct& r) const + struct some_struct { - return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0; + u64 v{}; + char s[12] = "Hello World"; + + bool operator == (const some_struct& r) const + { + return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0; + } + }; + + TEST(Pair, General) + { + some_struct s{}; + s.v = 1234; + + utils::pair p; + EXPECT_EQ(sizeof(p), 32); + EXPECT_EQ(p.first, 0); + EXPECT_EQ(p.second, some_struct{}); + + p = { 666, s }; + EXPECT_EQ(p.first, 666); + EXPECT_EQ(p.second, s); + + const utils::pair p1 = p; + EXPECT_EQ(p.first, 666); + EXPECT_EQ(p.second, s); + EXPECT_EQ(p1.first, 666); + EXPECT_EQ(p1.second, s); + + utils::pair p2 = p1; + EXPECT_EQ(p1.first, 666); + EXPECT_EQ(p1.second, s); + EXPECT_EQ(p2.first, 666); + EXPECT_EQ(p2.second, s); + + utils::pair p3 = std::move(p); + EXPECT_EQ(p3.first, 666); + EXPECT_EQ(p3.second, s); } -}; - -TEST(Utils, Pair) -{ - some_struct s {}; - s.v = 1234; - - utils::pair p; - EXPECT_EQ(sizeof(p), 32); - EXPECT_EQ(p.first, 0); - EXPECT_EQ(p.second, some_struct{}); - - p = { 666, s }; - EXPECT_EQ(p.first, 666); - EXPECT_EQ(p.second, s); - - const utils::pair p1 = p; - EXPECT_EQ(p.first, 666); - EXPECT_EQ(p.second, s); - EXPECT_EQ(p1.first, 666); - EXPECT_EQ(p1.second, s); - - utils::pair p2 = p1; - EXPECT_EQ(p1.first, 666); - EXPECT_EQ(p1.second, s); - EXPECT_EQ(p2.first, 666); - EXPECT_EQ(p2.second, s); - - utils::pair p3 = std::move(p); - EXPECT_EQ(p3.first, 666); - EXPECT_EQ(p3.second, s); } diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index c581ab2277..90a0e73575 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -267,4 +267,40 @@ namespace rsx EXPECT_EQ(std::memcmp(arr[i].second.s, "Hello World", sizeof(arr[i].second.s)), 0); } } + + TEST(SimpleArray, DataAlignment_SmallVector) + { + struct alignas(16) some_struct { + char data[16]; + }; + + rsx::simple_array arr(2); + const auto data_ptr = reinterpret_cast(arr.data()); + + EXPECT_EQ(data_ptr & 15, 0); + } + + TEST(SimpleArray, DataAlignment_HeapAlloc) + { + struct alignas(16) some_struct { + char data[16]; + }; + + rsx::simple_array arr(128); + const auto data_ptr = reinterpret_cast(arr.data()); + + EXPECT_EQ(data_ptr & 15, 0); + } + + TEST(SimpleArray, DataAlignment_Overrides) + { + rsx::simple_array arr(4); + rsx::simple_array arr2(4); + + const auto data_ptr1 = reinterpret_cast(arr.data()); + const auto data_ptr2 = reinterpret_cast(arr2.data()); + + EXPECT_EQ(data_ptr1 & 15, 0); + EXPECT_EQ(data_ptr2 & 127, 0); + } } diff --git a/rpcs3/tests/test_tuple.cpp b/rpcs3/tests/test_tuple.cpp index 2a174d85d3..831c0aac51 100644 --- a/rpcs3/tests/test_tuple.cpp +++ b/rpcs3/tests/test_tuple.cpp @@ -2,113 +2,116 @@ #include "util/tuple.hpp" -struct some_struct +namespace utils { - u64 v {}; - char s[12] = "Hello World"; - - bool operator == (const some_struct& r) const + struct some_struct { - return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0; + u64 v{}; + char s[12] = "Hello World"; + + bool operator == (const some_struct& r) const + { + return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0; + } + }; + + TEST(Tuple, General) + { + some_struct s{}; + s.v = 1234; + + utils::tuple t0 = {}; + EXPECT_EQ(t0.size(), 0); + + utils::tuple t; + EXPECT_EQ(sizeof(t), sizeof(int)); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_EQ(t.size(), 1); + EXPECT_EQ(t.get<0>(), 0); + + utils::tuple t1 = 2; + EXPECT_EQ(sizeof(t1), sizeof(int)); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_EQ(t1.size(), 1); + EXPECT_EQ(t1.get<0>(), 2); + t1 = {}; + EXPECT_EQ(t1.size(), 1); + EXPECT_EQ(t1.get<0>(), 0); + + utils::tuple t2 = { 2, s }; + EXPECT_EQ(sizeof(t2), 32); + EXPECT_EQ(t2.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(t2.get<0>(), 2); + EXPECT_EQ(t2.get<1>(), s); + t2 = {}; + EXPECT_EQ(t2.size(), 2); + EXPECT_EQ(t2.get<0>(), 0); + EXPECT_EQ(t2.get<1>(), some_struct{}); + + t2.get<0>() = 666; + t2.get<1>() = s; + EXPECT_EQ(t2.get<0>(), 666); + EXPECT_EQ(t2.get<1>(), s); + + utils::tuple t3 = { 2, s, 1234.0 }; + EXPECT_EQ(sizeof(t3), 40); + EXPECT_EQ(t3.size(), 3); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_TRUE((std::is_same_v()), double&>)); + EXPECT_EQ(t3.get<0>(), 2); + EXPECT_EQ(t3.get<1>(), s); + EXPECT_EQ(t3.get<2>(), 1234.0); + t3 = {}; + EXPECT_EQ(t3.size(), 3); + EXPECT_EQ(t3.get<0>(), 0); + EXPECT_EQ(t3.get<1>(), some_struct{}); + EXPECT_EQ(t3.get<2>(), 0.0); + + t3.get<0>() = 666; + t3.get<1>() = s; + t3.get<2>() = 7.0; + EXPECT_EQ(t3.get<0>(), 666); + EXPECT_EQ(t3.get<1>(), s); + EXPECT_EQ(t3.get<2>(), 7.0); + + // const + const utils::tuple tc = { 2, s }; + EXPECT_EQ(tc.size(), 2); + EXPECT_TRUE((std::is_same_v()), const int&>)); + EXPECT_TRUE((std::is_same_v()), const some_struct&>)); + EXPECT_EQ(tc.get<0>(), 2); + EXPECT_EQ(tc.get<1>(), s); + + // assignment + const utils::tuple ta1 = { 2, s }; + utils::tuple ta = ta1; + EXPECT_EQ(ta.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(ta.get<0>(), 2); + EXPECT_EQ(ta.get<1>(), s); + + utils::tuple ta2 = { 2, s }; + ta = ta2; + EXPECT_EQ(ta.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(ta.get<0>(), 2); + EXPECT_EQ(ta.get<1>(), s); + EXPECT_EQ(ta2.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(ta2.get<0>(), 2); + EXPECT_EQ(ta2.get<1>(), s); + + ta = std::move(ta2); + EXPECT_EQ(ta.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(ta.get<0>(), 2); + EXPECT_EQ(ta.get<1>(), s); } -}; - -TEST(Utils, Tuple) -{ - some_struct s {}; - s.v = 1234; - - utils::tuple t0 = {}; - EXPECT_EQ(t0.size(), 0); - - utils::tuple t; - EXPECT_EQ(sizeof(t), sizeof(int)); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_EQ(t.size(), 1); - EXPECT_EQ(t.get<0>(), 0); - - utils::tuple t1 = 2; - EXPECT_EQ(sizeof(t1), sizeof(int)); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_EQ(t1.size(), 1); - EXPECT_EQ(t1.get<0>(), 2); - t1 = {}; - EXPECT_EQ(t1.size(), 1); - EXPECT_EQ(t1.get<0>(), 0); - - utils::tuple t2 = { 2, s }; - EXPECT_EQ(sizeof(t2), 32); - EXPECT_EQ(t2.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(t2.get<0>(), 2); - EXPECT_EQ(t2.get<1>(), s); - t2 = {}; - EXPECT_EQ(t2.size(), 2); - EXPECT_EQ(t2.get<0>(), 0); - EXPECT_EQ(t2.get<1>(), some_struct{}); - - t2.get<0>() = 666; - t2.get<1>() = s; - EXPECT_EQ(t2.get<0>(), 666); - EXPECT_EQ(t2.get<1>(), s); - - utils::tuple t3 = { 2, s, 1234.0 }; - EXPECT_EQ(sizeof(t3), 40); - EXPECT_EQ(t3.size(), 3); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_TRUE((std::is_same_v()), double&>)); - EXPECT_EQ(t3.get<0>(), 2); - EXPECT_EQ(t3.get<1>(), s); - EXPECT_EQ(t3.get<2>(), 1234.0); - t3 = {}; - EXPECT_EQ(t3.size(), 3); - EXPECT_EQ(t3.get<0>(), 0); - EXPECT_EQ(t3.get<1>(), some_struct{}); - EXPECT_EQ(t3.get<2>(), 0.0); - - t3.get<0>() = 666; - t3.get<1>() = s; - t3.get<2>() = 7.0; - EXPECT_EQ(t3.get<0>(), 666); - EXPECT_EQ(t3.get<1>(), s); - EXPECT_EQ(t3.get<2>(), 7.0); - - // const - const utils::tuple tc = { 2, s }; - EXPECT_EQ(tc.size(), 2); - EXPECT_TRUE((std::is_same_v()), const int&>)); - EXPECT_TRUE((std::is_same_v()), const some_struct&>)); - EXPECT_EQ(tc.get<0>(), 2); - EXPECT_EQ(tc.get<1>(), s); - - // assignment - const utils::tuple ta1 = { 2, s }; - utils::tuple ta = ta1; - EXPECT_EQ(ta.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(ta.get<0>(), 2); - EXPECT_EQ(ta.get<1>(), s); - - utils::tuple ta2 = { 2, s }; - ta = ta2; - EXPECT_EQ(ta.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(ta.get<0>(), 2); - EXPECT_EQ(ta.get<1>(), s); - EXPECT_EQ(ta2.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(ta2.get<0>(), 2); - EXPECT_EQ(ta2.get<1>(), s); - - ta = std::move(ta2); - EXPECT_EQ(ta.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(ta.get<0>(), 2); - EXPECT_EQ(ta.get<1>(), s); } From e1eb7421fb65df3d02bae44589d705beedebe340 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 21 Nov 2025 02:32:51 +0300 Subject: [PATCH 05/42] rsx: Enforce natural data alignment for IO buffers span cast --- rpcs3/Emu/RSX/Common/io_buffer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/rpcs3/Emu/RSX/Common/io_buffer.h b/rpcs3/Emu/RSX/Common/io_buffer.h index 29397d1136..64f95a5e61 100644 --- a/rpcs3/Emu/RSX/Common/io_buffer.h +++ b/rpcs3/Emu/RSX/Common/io_buffer.h @@ -80,6 +80,7 @@ namespace rsx std::span as_span() const { auto bytes = data(); + ensure((reinterpret_cast(bytes) & (sizeof(T) - 1)) == 0, "IO buffer span cast requires naturally aligned pointers."); return { utils::bless(bytes), m_size / sizeof(T) }; } From dec6fba68d448814a4eb1aab48ba31e0855c6fe7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 21 Nov 2025 02:40:40 +0300 Subject: [PATCH 06/42] gl: Use 16-byte alignment on staging buffers --- rpcs3/Emu/RSX/GL/GLTexture.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 6fd04020ec..7b43cfc0a7 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -590,7 +590,7 @@ namespace gl void fill_texture(gl::command_context& cmd, texture* dst, int format, const std::vector &input_layouts, - bool is_swizzled, GLenum gl_format, GLenum gl_type, rsx::simple_array& staging_buffer) + bool is_swizzled, GLenum gl_format, GLenum gl_type, std::span staging_buffer) { const auto& driver_caps = gl::get_driver_caps(); rsx::texture_uploader_capabilities caps @@ -841,7 +841,7 @@ namespace gl void upload_texture(gl::command_context& cmd, texture* dst, u32 gcm_format, bool is_swizzled, const std::vector& subresources_layout) { // Calculate staging buffer size - rsx::simple_array data_upload_buf; + rsx::simple_array data_upload_buf; rsx::texture_uploader_capabilities caps { .supports_dxt = gl::get_driver_caps().EXT_texture_compression_s3tc_supported }; if (rsx::is_compressed_host_format(caps, gcm_format)) From 98ca15699b4a9ddce187a22a2ab23881a729cc1e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 21 Nov 2025 03:03:42 +0300 Subject: [PATCH 07/42] Fix windows clang builds --- rpcs3/Emu/RSX/Common/simple_array.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 50f8874992..bd90fa9dcb 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -14,7 +14,7 @@ namespace rsx template void* malloc(size_t size) { -#ifdef _MSC_VER +#ifdef _WIN32 return _aligned_malloc(size, Align); #else return std::aligned_alloc(Align, size); @@ -24,7 +24,7 @@ namespace rsx template void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size) { -#ifdef _MSC_VER +#ifdef _WIn32 return _aligned_realloc(prev_ptr, new_size, Align); #else void* ret = std::aligned_alloc(Align, new_size); @@ -35,7 +35,7 @@ namespace rsx static inline void free(void* ptr) { -#ifdef _MSC_VER +#ifdef _WIN32 _aligned_free(ptr); #else std::free(ptr); From a3f7c0d67f7b2aee900201952793fabd15d17903 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 21 Nov 2025 03:27:28 +0300 Subject: [PATCH 08/42] Fix typo --- rpcs3/Emu/RSX/Common/simple_array.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index bd90fa9dcb..4b5ceac877 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -24,7 +24,7 @@ namespace rsx template void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size) { -#ifdef _WIn32 +#ifdef _WIN32 return _aligned_realloc(prev_ptr, new_size, Align); #else void* ret = std::aligned_alloc(Align, new_size); From fcff16b6f711ae7263f883d8efc18a3a3d33e227 Mon Sep 17 00:00:00 2001 From: oltolm Date: Sun, 23 Nov 2025 06:11:36 +0100 Subject: [PATCH 09/42] Fix Clang build and other small fixes (#17736) 1. I fixed the Clang build. 1. I removed what I think is an unnecessary `ptrtoint` in `rpcs3/Emu/Cell/PPUTranslator.cpp`. I am not 100% sure that it's correct, but I tested a small LLVM IR snippet and it didn't make a difference. The ASM code was the same. 1. I also changed the definition of `prefetch_write` from `return __builtin_prefetch(ptr, 1, 0);` to `return __builtin_prefetch(ptr, 1, 3);` because that's how `_m_prefetchw` is defined in GCC. --- Utilities/StrFmt.h | 2 +- rpcs3/Crypto/utils.cpp | 2 +- rpcs3/Emu/Cell/Modules/sceNpTrophy.h | 1 - rpcs3/Emu/Cell/PPUTranslator.cpp | 3 +-- rpcs3/util/asm.hpp | 2 +- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Utilities/StrFmt.h b/Utilities/StrFmt.h index 5eba199e26..6d9ea05c9e 100644 --- a/Utilities/StrFmt.h +++ b/Utilities/StrFmt.h @@ -394,7 +394,7 @@ namespace fmt } #if !defined(_MSC_VER) || defined(__clang__) - [[noreturn]] ~throw_exception(); + [[noreturn]] ~throw_exception() = default; #endif }; diff --git a/rpcs3/Crypto/utils.cpp b/rpcs3/Crypto/utils.cpp index 7432acbf62..8d2fd4e9aa 100644 --- a/rpcs3/Crypto/utils.cpp +++ b/rpcs3/Crypto/utils.cpp @@ -157,7 +157,7 @@ std::array sc_combine_laid_paid(s64 laid, s64 paid) { const std::string paid_laid = fmt::format("%016llx%016llx", laid, paid); std::array out{}; - hex_to_bytes(out.data(), paid_laid.c_str(), PASSPHRASE_KEY_LEN * 2); + hex_to_bytes(out.data(), paid_laid, PASSPHRASE_KEY_LEN * 2); return out; } diff --git a/rpcs3/Emu/Cell/Modules/sceNpTrophy.h b/rpcs3/Emu/Cell/Modules/sceNpTrophy.h index 0c48623fda..6f7d88c148 100644 --- a/rpcs3/Emu/Cell/Modules/sceNpTrophy.h +++ b/rpcs3/Emu/Cell/Modules/sceNpTrophy.h @@ -3,7 +3,6 @@ #include "util/types.hpp" #include "Emu/Memory/vm_ptr.h" #include "Emu/Cell/ErrorCodes.h" -#include #include #include diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 629aae88ae..e59f14892b 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -416,7 +416,6 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) assert(ptr_inst->getResultElementType() == m_ir->getPtrTy()); const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst); - const auto faddr_int = m_ir->CreatePtrToInt(faddr, get_type()); const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc; const auto pos = m_ir->CreateShl(pos_32, 1); const auto ptr = m_ir->CreatePtrAdd(m_exec, pos); @@ -427,7 +426,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type()); // Store to jumptable - m_ir->CreateStore(faddr_int, ptr); + m_ir->CreateStore(faddr, ptr); m_ir->CreateStore(seg_val, seg_ptr); // Increment index and branch back to loop diff --git a/rpcs3/util/asm.hpp b/rpcs3/util/asm.hpp index deca38b413..8942dc2a09 100644 --- a/rpcs3/util/asm.hpp +++ b/rpcs3/util/asm.hpp @@ -60,7 +60,7 @@ namespace utils #if defined(ARCH_X64) return _m_prefetchw(const_cast(ptr)); #else - return __builtin_prefetch(ptr, 1, 0); + return __builtin_prefetch(ptr, 1, 3); #endif } From ecba1d2cb706f155ef700695785d2646abd0c962 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sun, 23 Nov 2025 08:42:32 +0200 Subject: [PATCH 10/42] sys_fs: Remove PPU sleep hacks --- rpcs3/Emu/Cell/lv2/sys_fs.cpp | 42 ++++++++++++----------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/rpcs3/Emu/Cell/lv2/sys_fs.cpp b/rpcs3/Emu/Cell/lv2/sys_fs.cpp index 1f76bb7090..5bb74808be 100644 --- a/rpcs3/Emu/Cell/lv2/sys_fs.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_fs.cpp @@ -1036,7 +1036,6 @@ lv2_file::open_result_t lv2_file::open(std::string_view vpath, s32 flags, s32 mo error_code sys_fs_open(ppu_thread& ppu, vm::cptr path, s32 flags, vm::ptr fd, s32 mode, vm::cptr arg, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_open(path=%s, flags=%#o, fd=*0x%x, mode=%#o, arg=*0x%x, size=0x%llx)", path, flags, fd, mode, arg, size); @@ -1085,7 +1084,6 @@ error_code sys_fs_open(ppu_thread& ppu, vm::cptr path, s32 flags, vm::ptr< error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, vm::ptr nread) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_read(fd=%d, buf=*0x%x, nbytes=0x%llx, nread=*0x%x)", fd, buf, nbytes, nread); @@ -1122,6 +1120,11 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, v return CELL_OK; } + if (nbytes >= 0x100000 && file->type != lv2_file_type::regular) + { + lv2_obj::sleep(ppu); + } + std::unique_lock lock(file->mp->mutex); if (!file->file) @@ -1154,7 +1157,6 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, v error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr buf, u64 nbytes, vm::ptr nwrite) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_write(fd=%d, buf=*0x%x, nbytes=0x%llx, nwrite=*0x%x)", fd, buf, nbytes, nwrite); @@ -1237,7 +1239,6 @@ error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr buf, u64 nbytes, error_code sys_fs_close(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); const auto file = idm::get_unlocked(fd); @@ -1314,7 +1315,6 @@ error_code sys_fs_close(ppu_thread& ppu, u32 fd) error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr path, vm::ptr fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_opendir(path=%s, fd=*0x%x)", path, fd); @@ -1491,7 +1491,6 @@ error_code sys_fs_readdir(ppu_thread& ppu, u32 fd, vm::ptr dir, vm error_code sys_fs_closedir(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_closedir(fd=%d)", fd); @@ -1506,7 +1505,6 @@ error_code sys_fs_closedir(ppu_thread& ppu, u32 fd) error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr sb) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_stat(path=%s, sb=*0x%x)", path, sb); @@ -1610,7 +1608,6 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr error_code sys_fs_fstat(ppu_thread& ppu, u32 fd, vm::ptr sb) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_fstat(fd=%d, sb=*0x%x)", fd, sb); @@ -1666,7 +1663,6 @@ error_code sys_fs_link(ppu_thread&, vm::cptr from, vm::cptr to) error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_mkdir(path=%s, mode=%#o)", path, mode); @@ -1728,7 +1724,6 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_rename(from=%s, to=%s)", from, to); @@ -1794,7 +1789,6 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_rmdir(path=%s)", path); @@ -1850,7 +1844,6 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr path) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_unlink(path=%s)", path); @@ -1951,8 +1944,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0x8000000a: // cellFsReadWithOffset case 0x8000000b: // cellFsWriteWithOffset { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -1992,6 +1983,11 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 sys_fs.error("%s type: Writing %u bytes to FD=%d (path=%s)", file->type, arg->size, file->name.data()); } + if (op == 0x8000000a && file->type != lv2_file_type::regular && arg->size >= 0x100000) + { + lv2_obj::sleep(ppu); + } + std::unique_lock wlock(file->mp->mutex, std::defer_lock); std::shared_lock rlock(file->mp->mutex, std::defer_lock); @@ -2047,8 +2043,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0x80000009: // cellFsSdataOpenByFd { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -2102,8 +2096,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0xc0000002: // cellFsGetFreeSize (TODO) { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); const auto& mp = g_fxo->get().lookup("/dev_hdd0"); @@ -2418,8 +2410,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0xe0000012: // cellFsGetDirectoryEntries { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -2434,8 +2424,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 return CELL_EBADF; } - ppu.check_state(); - u32 read_count = 0; // NOTE: This function is actually capable of reading only one entry at a time @@ -2593,7 +2581,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr pos) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_lseek(fd=%d, offset=0x%llx, whence=0x%x, pos=*0x%x)", fd, offset, whence, pos); @@ -2639,7 +2626,6 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_fdadasync(fd=%d)", fd); @@ -2650,6 +2636,8 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) return CELL_EBADF; } + lv2_obj::sleep(ppu); + std::lock_guard lock(file->mp->mutex); if (!file->file) @@ -2664,7 +2652,6 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) error_code sys_fs_fsync(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_fsync(fd=%d)", fd); @@ -2675,6 +2662,8 @@ error_code sys_fs_fsync(ppu_thread& ppu, u32 fd) return CELL_EBADF; } + lv2_obj::sleep(ppu); + std::lock_guard lock(file->mp->mutex); if (!file->file) @@ -2763,7 +2752,6 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr path, vm::ptr path, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_truncate(path=%s, size=0x%llx)", path, size); @@ -2815,7 +2803,6 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr path, u64 size) error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_ftruncate(fd=%d, size=0x%llx)", fd, size); @@ -3021,7 +3008,6 @@ error_code sys_fs_disk_free(ppu_thread& ppu, vm::cptr path, vm::ptr t error_code sys_fs_utime(ppu_thread& ppu, vm::cptr path, vm::cptr timep) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_utime(path=%s, timep=*0x%x)", path, timep); sys_fs.warning("** actime=%u, modtime=%u", timep->actime, timep->modtime); From 7472d95b0c6f4669df0d256970a3d556eee5f8e0 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sun, 23 Nov 2025 17:48:38 +0200 Subject: [PATCH 11/42] SPU Analyzer: Fix jumptable analysis for SPU Block Mega --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 11eb124eae..2989f64a56 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -3153,6 +3153,35 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + for (usz i = 0; i < jt_abs.size(); i++) + { + if (jt_abs[i] == start + jt_abs.size() * 4) + { + // If jumptable contains absolute address of code start after the jumptable itself + // It is likely an absolute-type jumptable + + bool is_good_conclusion = true; + + // For verification: make sure there is none like this in relative table + + for (u32 target : jt_rel) + { + if (target == start + jt_rel.size() * 4) + { + is_good_conclusion = false; + break; + } + } + + if (is_good_conclusion) + { + jt_rel.clear(); + } + + break; + } + } + // Choose position after the jt as an anchor and compute the average distance for (u32 target : jt_abs) { @@ -7241,6 +7270,19 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback } + for (u32 i = 0; i < result.data.size(); i++) + { + const be_t ls_val = ls[result.lower_bound / 4 + i]; + + if (result.data[i] && std::bit_cast(ls_val) != result.data[i]) + { + std::string out_dump; + dump(result, out_dump); + spu_log.error("SPU Function Dump:\n%s", out_dump); + fmt::throw_exception("SPU Analyzer failed: Instruction mismatch at 0x%x [read: 0x%x vs LS: 0x%x] (i=0x%x)", result.lower_bound + i * 4, std::bit_cast>(result.data[i]), ls_val, i); + } + } + return result; } From 89a13b75f70815f5018c1168b3f5b80ff49d0005 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sun, 23 Nov 2025 18:14:39 +0200 Subject: [PATCH 12/42] SPU Analyzer: Fix jumptable append --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 2989f64a56..9a192989e0 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -3122,7 +3122,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u64 dabs = 0; u64 drel = 0; - for (u32 i = start; i < limit; i += 4) + for (u32 i = start, abs_fail = 0, rel_fail = 0; i < limit; i += 4) { const u32 target = ls[i / 4]; @@ -3135,13 +3135,27 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (target >= lsa && target < SPU_LS_SIZE) { // Possible jump table entry (absolute) - jt_abs.push_back(target); + if (!abs_fail) + { + jt_abs.push_back(target); + } + } + else + { + abs_fail++; } if (target + start >= lsa && target + start < SPU_LS_SIZE) { // Possible jump table entry (relative) - jt_rel.push_back(target + start); + if (!rel_fail) + { + jt_rel.push_back(target + start); + } + } + else + { + rel_fail++; } if (std::max(jt_abs.size(), jt_rel.size()) * 4 + start <= i) From 683baf46b2c6a33c94e16d642237523f22bcd3ba Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 22 Nov 2025 20:25:47 +0300 Subject: [PATCH 13/42] rsx: Add find_if operation for simple_array --- rpcs3/Emu/RSX/Common/simple_array.hpp | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 4b5ceac877..69397291f9 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -50,6 +50,12 @@ namespace rsx { c.size() } -> std::integral; }; + template + concept is_trivially_comparable_v = + requires (T t1, U t2) { + { t1 == t2 } -> std::same_as; + }; + template requires std::is_trivially_destructible_v && std::is_trivially_copyable_v struct simple_array @@ -492,6 +498,50 @@ namespace rsx return false; } + /** + * Note that find and find_if return pointers to objects and not iterators for simplified usage. + * It is functionally equivalent to retrieve a nullptr meaning empty object stored and nullptr meaning not found for all practical uses of this container. + */ + template + requires is_trivially_comparable_v + Ty* find(const T& value) + { + for (auto it = begin(); it != end(); ++it) + { + if (*it == value) + { + return &(*it); + } + } + return nullptr; + } + + // Remove when we switch to C++23 + template + requires is_trivially_comparable_v + const Ty* find(const T& value) const + { + return const_cast*>(this)->find(value); + } + + Ty* find_if(std::predicate auto predicate) + { + for (auto it = begin(); it != end(); ++it) + { + if (std::invoke(predicate, *it)) + { + return &(*it); + } + } + return nullptr; + } + + // Remove with C++23 + const Ty* find_if(std::predicate auto predicate) const + { + return const_cast*>(this)->find_if(predicate); + } + bool erase_if(std::predicate auto predicate) { if (!_size) From f300832edb4da782cc5ee59d63127822f274995f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 22 Nov 2025 20:26:14 +0300 Subject: [PATCH 14/42] rsx: Implement FP to CFG analyzer --- rpcs3/CMakeLists.txt | 1 + rpcs3/Emu/CMakeLists.txt | 1 + rpcs3/Emu/RSX/Program/Assembler/CFG.h | 39 ++++ rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp | 158 +++++++++++++ rpcs3/Emu/RSX/Program/Assembler/IR.h | 70 ++++++ .../RSX/Program/FragmentProgramDecompiler.h | 2 + rpcs3/emucore.vcxproj | 3 + rpcs3/emucore.vcxproj.filters | 12 + rpcs3/tests/rpcs3_test.vcxproj | 1 + rpcs3/tests/test_rsx_cfg.cpp | 218 ++++++++++++++++++ rpcs3/tests/test_simple_array.cpp | 20 ++ 11 files changed, 525 insertions(+) create mode 100644 rpcs3/Emu/RSX/Program/Assembler/CFG.h create mode 100644 rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp create mode 100644 rpcs3/Emu/RSX/Program/Assembler/IR.h create mode 100644 rpcs3/tests/test_rsx_cfg.cpp diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 3217cc38f5..796351e16c 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -192,6 +192,7 @@ if(BUILD_RPCS3_TESTS) tests/test_tuple.cpp tests/test_simple_array.cpp tests/test_address_range.cpp + tests/test_rsx_cfg.cpp ) target_link_libraries(rpcs3_test diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 139688947d..1a902b46df 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -516,6 +516,7 @@ target_sources(rpcs3_emu PRIVATE RSX/Overlays/overlay_video.cpp RSX/Overlays/Shaders/shader_loading_dialog.cpp RSX/Overlays/Shaders/shader_loading_dialog_native.cpp + RSX/Program/Assembler/FPToCFG.cpp RSX/Program/CgBinaryProgram.cpp RSX/Program/CgBinaryFragmentProgram.cpp RSX/Program/CgBinaryVertexProgram.cpp diff --git a/rpcs3/Emu/RSX/Program/Assembler/CFG.h b/rpcs3/Emu/RSX/Program/Assembler/CFG.h new file mode 100644 index 0000000000..b2d4ad75a8 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/CFG.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "IR.h" + +#include + +struct RSXFragmentProgram; + +namespace rsx::assembler +{ + struct FlowGraph + { + std::list blocks; + + BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0) + { + if (!parent && !blocks.empty()) + { + parent = &blocks.back(); + } + + blocks.push_back({}); + BasicBlock* new_block = &blocks.back(); + + if (parent) + { + parent->insert_succ(new_block); + new_block->insert_pred(parent); + } + + new_block->id = pc; + return new_block; + } + }; + + FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog); +} + diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp new file mode 100644 index 0000000000..0f8cd2ea91 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -0,0 +1,158 @@ +#include "stdafx.h" + +#pragma optimize("", off) + +#include "CFG.h" + +#include "Emu/RSX/Common/simple_array.hpp" +#include "Emu/RSX/Program/RSXFragmentProgram.h" +#include "Emu/RSX/Program/ProgramStateCache.h" + +#include +#include + +using namespace program_hash_util; + +namespace rsx::assembler +{ + inline v128 decode_instruction(const v128& raw_inst) + { + // Fixup of RSX's weird half-word shuffle for FP instructions + // Convert input stream into LE u16 array + __m128i _mask0 = _mm_set1_epi32(0xff00ff00); + __m128i _mask1 = _mm_set1_epi32(0x00ff00ff); + __m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8); + __m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8); + __m128i ret = _mm_or_epi32( + _mm_and_epi32(_mask0, a), + _mm_and_epi32(_mask1, b) + ); + return v128::loadu(&ret); + } + + FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog) + { + // For a flowgraph, we don't care at all about the actual contents, just flow control instructions. + OPDEST dst{}; + SRC0 src0{}; + SRC1 src1{}; + SRC2 src2{}; + + u32 pc = 0; // Program counter + u32 instruction_size = 0; + bool end = false; + + // Flow control data + rsx::simple_array end_blocks; + rsx::simple_array else_blocks; + + // Data block + u32* data = static_cast(prog.get_data()); + + // Output + FlowGraph graph{}; + BasicBlock* bb = graph.push(); + + auto find_block_for_pc = [&](u32 id) -> BasicBlock* + { + auto found = std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == id)); + if (found != graph.blocks.end()) + { + return &(*found); + } + return nullptr; + }; + + auto safe_insert_block = [&](BasicBlock* parent, u32 id) -> BasicBlock* + { + if (auto found = find_block_for_pc(id)) + { + parent->insert_succ(found); + found->insert_pred(parent); + return found; + } + + return graph.push(parent, id); + }; + + while (!end) + { + BasicBlock** found = end_blocks.find_if(FN(x->id == pc)); + + if (!found) + { + found = else_blocks.find_if(FN(x->id == pc)); + } + + if (found) + { + bb = *found; + } + + const v128 raw_inst = v128::loadu(data, pc); + v128 decoded = decode_instruction(raw_inst); + + dst.HEX = decoded._u32[0]; + src0.HEX = decoded._u32[1]; + src1.HEX = decoded._u32[2]; + src2.HEX = decoded._u32[3]; + + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + + if (opcode == RSX_FP_OPCODE_NOP) + { + pc++; + continue; + } + + end = !!dst.end; + + bb->instructions.push_back({}); + auto& ir_inst = bb->instructions.back(); + std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16); + + switch (opcode) + { + case RSX_FP_OPCODE_CAL: + // Unimplemented. Also unused by the RSX compiler + fmt::throw_exception("Unimplemented FP CAL instruction."); + break; + case RSX_FP_OPCODE_RET: + // Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early. + // This will not alter flow control. + break; + case RSX_FP_OPCODE_IFE: + { + // Inserts if and else and end blocks + auto parent = bb; + bb = safe_insert_block(parent, pc + 1); + if (src2.end_offset != src1.else_offset) + { + else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2)); + } + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2)); + break; + } + case RSX_FP_OPCODE_LOOP: + case RSX_FP_OPCODE_REP: + { + // Inserts for and end blocks + auto parent = bb; + bb = safe_insert_block(parent, pc + 1); + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2)); + break; + } + default: + if (fragment_program_utils::is_any_src_constant(decoded)) + { + pc++; + } + } + + pc++; + } + + graph.blocks.sort(FN(x.id < y.id)); + return graph; + } +} diff --git a/rpcs3/Emu/RSX/Program/Assembler/IR.h b/rpcs3/Emu/RSX/Program/Assembler/IR.h new file mode 100644 index 0000000000..53d6080a11 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/IR.h @@ -0,0 +1,70 @@ +#pragma once + +#include + +namespace rsx::assembler +{ + struct BasicBlock; + + struct Register + { + int id = 0; + bool f16 = false; + }; + + struct RegisterRef + { + Register reg{}; + + // Vector information + union + { + u32 mask; + + struct + { + bool x : 1; + bool y : 1; + bool z : 1; + bool w : 1; + }; + }; + }; + + struct Instruction + { + // Raw data. Every instruction is max 128 bits + u32 bytecode[4]; + + // Decoded + u32 opcode = 0; + std::vector srcs; + std::vector dsts; + }; + + struct FlowEdge + { + BasicBlock* from = nullptr; + BasicBlock* to = nullptr; + }; + + struct BasicBlock + { + u32 id = 0; + std::vector instructions; + std::vector succ; // [0] = if/loop, [1] = else + std::vector pred; // Back edge. + + void insert_succ(BasicBlock* b) + { + FlowEdge e{ .from = this, .to = b }; + succ.push_back(e); + } + + void insert_pred(BasicBlock* b) + { + FlowEdge e{ .from = this, .to = b }; + pred.push_back(e); + } + }; +} diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index 467c6f3ac7..d3ef8643f1 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -3,6 +3,8 @@ #include "FragmentProgramRegister.h" #include "RSXFragmentProgram.h" +#include "Assembler/CFG.h" + #include #include diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 0bbea41832..1b5716f01b 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -156,6 +156,7 @@ + @@ -699,6 +700,8 @@ + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 65cd509f85..23b7ef174d 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -133,6 +133,9 @@ {ce6d6b90-8313-4273-b46c-d92bd450c002} + + {d99df916-8a99-428b-869a-9f14ac0ab411} + @@ -1372,6 +1375,9 @@ Emu\Io + + Emu\GPU\RSX\Program\Assembler + @@ -2764,6 +2770,12 @@ Utilities + + Emu\GPU\RSX\Program\Assembler + + + Emu\GPU\RSX\Program\Assembler + diff --git a/rpcs3/tests/rpcs3_test.vcxproj b/rpcs3/tests/rpcs3_test.vcxproj index 4f0d136a9a..22992e6a07 100644 --- a/rpcs3/tests/rpcs3_test.vcxproj +++ b/rpcs3/tests/rpcs3_test.vcxproj @@ -88,6 +88,7 @@ + diff --git a/rpcs3/tests/test_rsx_cfg.cpp b/rpcs3/tests/test_rsx_cfg.cpp new file mode 100644 index 0000000000..f868965515 --- /dev/null +++ b/rpcs3/tests/test_rsx_cfg.cpp @@ -0,0 +1,218 @@ +#pragma optimize("", off) +#include + +#include "Emu/RSX/Common/simple_array.hpp" +#include "Emu/RSX/Program/Assembler/CFG.h" +#include "Emu/RSX/Program/RSXFragmentProgram.h" + +#include + +namespace rsx::assembler +{ + auto swap_bytes16 = [](u32 dword) -> u32 + { + // Lazy encode, but good enough for what we need here. + union v32 + { + u32 HEX; + u8 _v[4]; + }; + + u8* src_bytes = reinterpret_cast(&dword); + v32 dst_bytes; + + dst_bytes._v[0] = src_bytes[1]; + dst_bytes._v[1] = src_bytes[0]; + dst_bytes._v[2] = src_bytes[3]; + dst_bytes._v[3] = src_bytes[2]; + + return dst_bytes.HEX; + }; + + // Instruction mocks because we don't have a working assember (yet) + auto encode_instruction = [](u32 opcode, bool end = false) -> v128 + { + OPDEST dst{}; + dst.opcode = opcode; + + if (end) + { + dst.end = 1; + } + + return v128::from32(swap_bytes16(dst.HEX), 0, 0, 0); + }; + + auto create_if(u32 end, u32 _else = 0) + { + OPDEST dst{}; + dst.opcode = RSX_FP_OPCODE_IFE; + + SRC1 src1{}; + src1.else_offset = (_else ? _else : end) << 2; + src1.opcode_is_branch = 1; + + SRC2 src2{}; + src2.end_offset = end << 2; + + return v128::from32(swap_bytes16(dst.HEX), 0, swap_bytes16(src1.HEX), swap_bytes16(src2.HEX)); + }; + + TEST(CFG, FpToCFG_Basic) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), + encode_instruction(RSX_FP_OPCODE_MOV, true) + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + EXPECT_EQ(graph.blocks.size(), 1); + EXPECT_EQ(graph.blocks.front().instructions.size(), 2); + } + + TEST(CFG, FpToCFG_IF) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(4), // 2 (BR, 4) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + encode_instruction(RSX_FP_OPCODE_MOV, true), // 4 (Merge block) + }; + + const std::pair expected_block_data[3] = { + { 0, 3 }, // Head + { 3, 1 }, // Branch + { 4, 1 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 3); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } + + TEST(CFG, FpToCFG_NestedIF) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(8), // 2 (BR, 8) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + create_if(6), // 4 (BR, 6) + encode_instruction(RSX_FP_OPCODE_MOV), // 5 + encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block 1) + encode_instruction(RSX_FP_OPCODE_ADD), // 7 + encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 (merge block 2 + }; + + const std::pair expected_block_data[5] = { + { 0, 3 }, // Head + { 3, 2 }, // Branch 1 + { 5, 1 }, // Branch 2 + { 6, 2 }, // Merge 1 + { 8, 1 }, // Merge 2 + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 5); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } + + TEST(CFG, FpToCFG_NestedIF_MultiplePred) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(6), // 2 (BR, 6) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + create_if(6), // 4 (BR, 6) + encode_instruction(RSX_FP_OPCODE_MOV), // 5 + encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block) + encode_instruction(RSX_FP_OPCODE_ADD), // 7 + encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 + }; + + const std::pair expected_block_data[4] = { + { 0, 3 }, // Head + { 3, 2 }, // Branch 1 + { 5, 1 }, // Branch 2 + { 6, 3 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 4); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } + + TEST(CFG, FpToCFG_IF_ELSE) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(6, 4), // 2 (BR, 6) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + encode_instruction(RSX_FP_OPCODE_MOV), // 4 (Else) + encode_instruction(RSX_FP_OPCODE_ADD), // 5 + encode_instruction(RSX_FP_OPCODE_MOV, true), // 6 (Merge) + }; + + const std::pair expected_block_data[4] = { + { 0, 3 }, // Head + { 3, 1 }, // Branch positive + { 4, 2 }, // Branch negative + { 6, 1 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 4); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } +} diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index 90a0e73575..fcec3568f4 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -303,4 +303,24 @@ namespace rsx EXPECT_EQ(data_ptr1 & 15, 0); EXPECT_EQ(data_ptr2 & 127, 0); } + + TEST(SimpleArray, Find) + { + const rsx::simple_array arr{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + }; + + EXPECT_EQ(*arr.find(8), 8); + EXPECT_EQ(arr.find(99), nullptr); + } + + TEST(SimpleArray, FindIf) + { + const rsx::simple_array arr{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + }; + + EXPECT_EQ(*arr.find_if(FN(x == 8)), 8); + EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr); + } } From 42d9065c11466a218d8de64f60a89bd2f84e83b4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 23 Nov 2025 19:25:07 +0300 Subject: [PATCH 15/42] rsx: Add flow information to flow edges --- rpcs3/Emu/RSX/Program/Assembler/CFG.h | 6 ++--- rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp | 18 ++++++------- rpcs3/Emu/RSX/Program/Assembler/IR.h | 30 ++++++++++++++++----- rpcs3/tests/test_rsx_cfg.cpp | 5 ++++ 4 files changed, 40 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/Assembler/CFG.h b/rpcs3/Emu/RSX/Program/Assembler/CFG.h index b2d4ad75a8..9bc44a22d1 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/CFG.h +++ b/rpcs3/Emu/RSX/Program/Assembler/CFG.h @@ -13,7 +13,7 @@ namespace rsx::assembler { std::list blocks; - BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0) + BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0, EdgeType edge_type = EdgeType::NONE) { if (!parent && !blocks.empty()) { @@ -25,8 +25,8 @@ namespace rsx::assembler if (parent) { - parent->insert_succ(new_block); - new_block->insert_pred(parent); + parent->insert_succ(new_block, edge_type); + new_block->insert_pred(parent, edge_type); } new_block->id = pc; diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp index 0f8cd2ea91..234a0e5450 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -63,16 +63,16 @@ namespace rsx::assembler return nullptr; }; - auto safe_insert_block = [&](BasicBlock* parent, u32 id) -> BasicBlock* + auto safe_insert_block = [&](BasicBlock* parent, u32 id, EdgeType edge_type) -> BasicBlock* { if (auto found = find_block_for_pc(id)) { - parent->insert_succ(found); - found->insert_pred(parent); + parent->insert_succ(found, edge_type); + found->insert_pred(parent, edge_type); return found; } - return graph.push(parent, id); + return graph.push(parent, id, edge_type); }; while (!end) @@ -125,12 +125,12 @@ namespace rsx::assembler { // Inserts if and else and end blocks auto parent = bb; - bb = safe_insert_block(parent, pc + 1); + bb = safe_insert_block(parent, pc + 1, EdgeType::IF); if (src2.end_offset != src1.else_offset) { - else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2)); + else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2, EdgeType::ELSE)); } - end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2)); + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDIF)); break; } case RSX_FP_OPCODE_LOOP: @@ -138,8 +138,8 @@ namespace rsx::assembler { // Inserts for and end blocks auto parent = bb; - bb = safe_insert_block(parent, pc + 1); - end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2)); + bb = safe_insert_block(parent, pc + 1, EdgeType::LOOP); + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDLOOP)); break; } default: diff --git a/rpcs3/Emu/RSX/Program/Assembler/IR.h b/rpcs3/Emu/RSX/Program/Assembler/IR.h index 53d6080a11..3ff8eb1a3a 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/IR.h +++ b/rpcs3/Emu/RSX/Program/Assembler/IR.h @@ -42,8 +42,19 @@ namespace rsx::assembler std::vector dsts; }; + enum class EdgeType + { + NONE, + IF, + ELSE, + ENDIF, + LOOP, + ENDLOOP + }; + struct FlowEdge { + EdgeType type = EdgeType::NONE; BasicBlock* from = nullptr; BasicBlock* to = nullptr; }; @@ -51,20 +62,25 @@ namespace rsx::assembler struct BasicBlock { u32 id = 0; - std::vector instructions; - std::vector succ; // [0] = if/loop, [1] = else - std::vector pred; // Back edge. + std::vector instructions; // Program instructions for the RSX processor + std::vector succ; // [0] = if/loop, [1] = else + std::vector pred; // Back edge. - void insert_succ(BasicBlock* b) + std::vector prologue; // Prologue, created by passes + std::vector epilogue; // Epilogue, created by passes + + FlowEdge* insert_succ(BasicBlock* b, EdgeType type = EdgeType::NONE) { - FlowEdge e{ .from = this, .to = b }; + FlowEdge e{ .type = type, .from = this, .to = b }; succ.push_back(e); + return &succ.back(); } - void insert_pred(BasicBlock* b) + FlowEdge* insert_pred(BasicBlock* b, EdgeType type = EdgeType::NONE) { - FlowEdge e{ .from = this, .to = b }; + FlowEdge e{ .type = type, .from = this, .to = b }; pred.push_back(e); + return &pred.back(); } }; } diff --git a/rpcs3/tests/test_rsx_cfg.cpp b/rpcs3/tests/test_rsx_cfg.cpp index f868965515..532721fc95 100644 --- a/rpcs3/tests/test_rsx_cfg.cpp +++ b/rpcs3/tests/test_rsx_cfg.cpp @@ -104,6 +104,11 @@ namespace rsx::assembler EXPECT_EQ(it->id, expected.first); EXPECT_EQ(it->instructions.size(), expected.second); } + + // Check edges + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 3))->pred[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 4))->pred[0].type, EdgeType::ENDIF); } TEST(CFG, FpToCFG_NestedIF) From 9d92e190eb183c92b36731ecfff6f6fe9c2fa07e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 23 Nov 2025 21:11:05 +0300 Subject: [PATCH 16/42] rsx/cfg: Add support for multi-slot instructions with literals - Also fix pred edge direction bug --- rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp | 35 ++++++++++++++++++--- rpcs3/Emu/RSX/Program/Assembler/IR.h | 17 +++++++--- rpcs3/tests/test_rsx_cfg.cpp | 10 +++++- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp index 234a0e5450..4433af29a2 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -1,17 +1,14 @@ #include "stdafx.h" -#pragma optimize("", off) - #include "CFG.h" #include "Emu/RSX/Common/simple_array.hpp" #include "Emu/RSX/Program/RSXFragmentProgram.h" -#include "Emu/RSX/Program/ProgramStateCache.h" #include +#include #include -using namespace program_hash_util; namespace rsx::assembler { @@ -75,6 +72,13 @@ namespace rsx::assembler return graph.push(parent, id, edge_type); }; + auto includes_literal_constant = [&]() + { + return src0.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT || + src1.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT || + src2.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT; + }; + while (!end) { BasicBlock** found = end_blocks.find_if(FN(x->id == pc)); @@ -110,13 +114,21 @@ namespace rsx::assembler bb->instructions.push_back({}); auto& ir_inst = bb->instructions.back(); std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16); + ir_inst.length = 4; + ir_inst.addr = pc * 16; switch (opcode) { + case RSX_FP_OPCODE_BRK: + break; case RSX_FP_OPCODE_CAL: // Unimplemented. Also unused by the RSX compiler fmt::throw_exception("Unimplemented FP CAL instruction."); break; + case RSX_FP_OPCODE_FENCT: + break; + case RSX_FP_OPCODE_FENCB: + break; case RSX_FP_OPCODE_RET: // Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early. // This will not alter flow control. @@ -143,8 +155,13 @@ namespace rsx::assembler break; } default: - if (fragment_program_utils::is_any_src_constant(decoded)) + if (includes_literal_constant()) { + const v128 constant_literal = v128::loadu(data, pc); + v128 decoded_literal = decode_instruction(constant_literal); + + std::memcpy(ir_inst.bytecode + 4, &decoded_literal._u32[0], 16); + ir_inst.length += 4; pc++; } } @@ -152,6 +169,14 @@ namespace rsx::assembler pc++; } + // Sort edges for each block by distance + for (auto& block : graph.blocks) + { + std::sort(block.pred.begin(), block.pred.end(), FN(x.from->id > y.from->id)); + std::sort(block.succ.begin(), block.succ.end(), FN(x.to->id < y.to->id)); + } + + // Sort block nodes by distance graph.blocks.sort(FN(x.id < y.id)); return graph; } diff --git a/rpcs3/Emu/RSX/Program/Assembler/IR.h b/rpcs3/Emu/RSX/Program/Assembler/IR.h index 3ff8eb1a3a..1f1c1d81d8 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/IR.h +++ b/rpcs3/Emu/RSX/Program/Assembler/IR.h @@ -33,11 +33,20 @@ namespace rsx::assembler struct Instruction { - // Raw data. Every instruction is max 128 bits - u32 bytecode[4]; + // Raw data. Every instruction is max 128 bits. + // Each instruction can also have 128 bits of literal/embedded data. + u32 bytecode[8]{ {} }; + u32 addr = 0; // Decoded u32 opcode = 0; + u8 length = 4; // Length in dwords + + // Padding + u8 reserved0 = 0; + u16 reserved1 = 0; + + // References std::vector srcs; std::vector dsts; }; @@ -49,7 +58,7 @@ namespace rsx::assembler ELSE, ENDIF, LOOP, - ENDLOOP + ENDLOOP, }; struct FlowEdge @@ -78,7 +87,7 @@ namespace rsx::assembler FlowEdge* insert_pred(BasicBlock* b, EdgeType type = EdgeType::NONE) { - FlowEdge e{ .type = type, .from = this, .to = b }; + FlowEdge e{ .type = type, .from = b, .to = this }; pred.push_back(e); return &pred.back(); } diff --git a/rpcs3/tests/test_rsx_cfg.cpp b/rpcs3/tests/test_rsx_cfg.cpp index 532721fc95..b5969081d9 100644 --- a/rpcs3/tests/test_rsx_cfg.cpp +++ b/rpcs3/tests/test_rsx_cfg.cpp @@ -1,4 +1,3 @@ -#pragma optimize("", off) #include #include "Emu/RSX/Common/simple_array.hpp" @@ -72,6 +71,8 @@ namespace rsx::assembler EXPECT_EQ(graph.blocks.size(), 1); EXPECT_EQ(graph.blocks.front().instructions.size(), 2); + EXPECT_EQ(graph.blocks.front().instructions.front().length, 4); + EXPECT_NE(graph.blocks.front().instructions.front().addr, 0); } TEST(CFG, FpToCFG_IF) @@ -184,6 +185,13 @@ namespace rsx::assembler EXPECT_EQ(it->id, expected.first); EXPECT_EQ(it->instructions.size(), expected.second); } + + // Predecessors must be ordered, closest first + ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred.size(), 2); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].from->id, 3); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].from->id, 0); } TEST(CFG, FpToCFG_IF_ELSE) From cb7650240c3f80bcbcb4afac07eefa587f8dc012 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 23 Nov 2025 19:39:48 +0300 Subject: [PATCH 17/42] rsx/fp: Use CFG for fragment program --- .../RSX/Program/FragmentProgramDecompiler.cpp | 255 +++++++++--------- 1 file changed, 130 insertions(+), 125 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index e5742fffda..ad2ac55075 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -1297,7 +1297,7 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode) std::string FragmentProgramDecompiler::Decompile() { - auto data = static_cast*>(m_prog.get_data()); + const auto graph = rsx::assembler::deconstruct_fragment_program(m_prog); m_size = 0; m_location = 0; m_loop_count = 0; @@ -1314,141 +1314,146 @@ std::string FragmentProgramDecompiler::Decompile() int forced_unit = FORCE_NONE; - while (true) + for (const auto &block : graph.blocks) { - for (auto found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); - found != m_end_offsets.end(); - found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) + // TODO: Handle block prologue if any + + for (const auto& inst : block.instructions) { - m_end_offsets.erase(found); - m_code_level--; - AddCode("}"); - m_loop_count--; - } - - for (auto found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); - found != m_else_offsets.end(); - found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) - { - m_else_offsets.erase(found); - m_code_level--; - AddCode("}"); - AddCode("else"); - AddCode("{"); - m_code_level++; - } - - dst.HEX = GetData(data[0]); - src0.HEX = GetData(data[1]); - src1.HEX = GetData(data[2]); - src2.HEX = GetData(data[3]); - - m_offset = 4 * sizeof(u32); - opflags = 0; - - const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); - - auto SIP = [&]() - { - switch (opcode) + for (auto found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); + found != m_end_offsets.end(); + found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) { - case RSX_FP_OPCODE_BRK: - if (m_loop_count) AddFlowOp("break"); - else rsx_log.error("BRK opcode found outside of a loop"); - break; - case RSX_FP_OPCODE_CAL: - rsx_log.error("Unimplemented SIP instruction: CAL"); - break; - case RSX_FP_OPCODE_FENCT: - AddCode("//FENCT"); - forced_unit = FORCE_SCT; - break; - case RSX_FP_OPCODE_FENCB: - AddCode("//FENCB"); - forced_unit = FORCE_SCB; - break; - case RSX_FP_OPCODE_IFE: - AddCode("if($cond)"); - if (src2.end_offset != src1.else_offset) - m_else_offsets.push_back(src1.else_offset << 2); - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - break; - case RSX_FP_OPCODE_LOOP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_REP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_RET: - AddFlowOp("return"); - break; - - default: - return false; + m_end_offsets.erase(found); + m_code_level--; + AddCode("}"); + m_loop_count--; } - return true; - }; + for (auto found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); + found != m_else_offsets.end(); + found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) + { + m_else_offsets.erase(found); + m_code_level--; + AddCode("}"); + AddCode("else"); + AddCode("{"); + m_code_level++; + } - switch (opcode) - { - case RSX_FP_OPCODE_NOP: - break; - case RSX_FP_OPCODE_KIL: - properties.has_discard_op = true; - AddFlowOp("_kill()"); - break; - default: - int prev_force_unit = forced_unit; + dst.HEX = inst.bytecode[0]; + src0.HEX = inst.bytecode[1]; + src1.HEX = inst.bytecode[2]; + src2.HEX = inst.bytecode[3]; - // Some instructions do not respect forced unit - // Tested with Tales of Vesperia - if (SIP()) break; - if (handle_tex_srb(opcode)) break; + m_offset = 4 * sizeof(u32); + opflags = 0; - // FENCT/FENCB do not actually reject instructions if they dont match the forced unit - // Looks like they are optimization hints and not hard-coded forced paths - if (handle_sct_scb(opcode)) break; - forced_unit = FORCE_NONE; + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); - rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit); - break; + auto SIP = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_BRK: + if (m_loop_count) AddFlowOp("break"); + else rsx_log.error("BRK opcode found outside of a loop"); + break; + case RSX_FP_OPCODE_CAL: + rsx_log.error("Unimplemented SIP instruction: CAL"); + break; + case RSX_FP_OPCODE_FENCT: + AddCode("//FENCT"); + forced_unit = FORCE_SCT; + break; + case RSX_FP_OPCODE_FENCB: + AddCode("//FENCB"); + forced_unit = FORCE_SCB; + break; + case RSX_FP_OPCODE_IFE: + AddCode("if($cond)"); + if (src2.end_offset != src1.else_offset) + m_else_offsets.push_back(src1.else_offset << 2); + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + break; + case RSX_FP_OPCODE_LOOP: + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) + { + AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); + } + else + { + AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + m_loop_count++; + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + } + break; + case RSX_FP_OPCODE_REP: + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) + { + AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); + } + else + { + AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + m_loop_count++; + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + } + break; + case RSX_FP_OPCODE_RET: + AddFlowOp("return"); + break; + + default: + return false; + } + + return true; + }; + + switch (opcode) + { + case RSX_FP_OPCODE_NOP: + break; + case RSX_FP_OPCODE_KIL: + properties.has_discard_op = true; + AddFlowOp("_kill()"); + break; + default: + int prev_force_unit = forced_unit; + + // Some instructions do not respect forced unit + // Tested with Tales of Vesperia + if (SIP()) break; + if (handle_tex_srb(opcode)) break; + + // FENCT/FENCB do not actually reject instructions if they dont match the forced unit + // Looks like they are optimization hints and not hard-coded forced paths + if (handle_sct_scb(opcode)) break; + forced_unit = FORCE_NONE; + + rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit); + break; + } + + m_size += m_offset; + ensure((m_offset & 15) == 0); // Must be aligned to 16 bytes + + if (dst.end) break; } - m_size += m_offset; - - if (dst.end) break; - - ensure(m_offset % sizeof(u32) == 0); - data += m_offset / sizeof(u32); + // TODO: Handle block epilogue if needed } while (m_code_level > 1) From 5c187f5cdae21dc03baf256e2e5b2f5e31f3be24 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 23 Nov 2025 21:12:44 +0300 Subject: [PATCH 18/42] rsx/fp: Use CFG to decompile fragment programs --- .../RSX/Program/FragmentProgramDecompiler.cpp | 102 ++++++++---------- .../RSX/Program/FragmentProgramDecompiler.h | 5 +- 2 files changed, 44 insertions(+), 63 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index ad2ac55075..2ebfd7d8d7 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -234,7 +234,8 @@ std::string FragmentProgramDecompiler::AddCond() std::string FragmentProgramDecompiler::AddConst() { - const u32 constant_id = m_size + (4 * sizeof(u32)); + ensure(m_instruction->length == 8); + const u32 constant_id = m_instruction->addr + 16; u32 index = umax; if (auto found = m_constant_offsets.find(constant_id); @@ -249,9 +250,6 @@ std::string FragmentProgramDecompiler::AddConst() m_constant_offsets[constant_id] = index; } - // Skip next instruction, its just a literal - m_offset = 2 * 4 * sizeof(u32); - // Return the next offset index return "_fetch_constant(" + std::to_string(index) + ")"; } @@ -1317,37 +1315,52 @@ std::string FragmentProgramDecompiler::Decompile() for (const auto &block : graph.blocks) { // TODO: Handle block prologue if any + if (!block.pred.empty()) + { + // CFG guarantees predecessors are sorted, closest one first + for (const auto& pred : block.pred) + { + switch (pred.type) + { + case rsx::assembler::EdgeType::ENDLOOP: + m_loop_count--; + [[ fallthrough ]]; + case rsx::assembler::EdgeType::ENDIF: + m_code_level--; + AddCode("}"); + break; + case rsx::assembler::EdgeType::LOOP: + m_loop_count++; + [[ fallthrough ]]; + case rsx::assembler::EdgeType::IF: + // Instruction will be inserted by the SIP decoder + AddCode("{"); + m_code_level++; + break; + case rsx::assembler::EdgeType::ELSE: + // This one needs more testing + m_code_level--; + AddCode("}"); + AddCode("else"); + AddCode("{"); + m_code_level++; + break; + default: + // Start a new block anyway + fmt::throw_exception("Unexpected block found"); + } + } + } for (const auto& inst : block.instructions) { - for (auto found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); - found != m_end_offsets.end(); - found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) - { - m_end_offsets.erase(found); - m_code_level--; - AddCode("}"); - m_loop_count--; - } - - for (auto found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); - found != m_else_offsets.end(); - found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) - { - m_else_offsets.erase(found); - m_code_level--; - AddCode("}"); - AddCode("else"); - AddCode("{"); - m_code_level++; - } + m_instruction = &inst; dst.HEX = inst.bytecode[0]; src0.HEX = inst.bytecode[1]; src1.HEX = inst.bytecode[2]; src2.HEX = inst.bytecode[3]; - m_offset = 4 * sizeof(u32); opflags = 0; const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); @@ -1373,43 +1386,14 @@ std::string FragmentProgramDecompiler::Decompile() break; case RSX_FP_OPCODE_IFE: AddCode("if($cond)"); - if (src2.end_offset != src1.else_offset) - m_else_offsets.push_back(src1.else_offset << 2); - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; break; case RSX_FP_OPCODE_LOOP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", + AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } break; case RSX_FP_OPCODE_REP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", + AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } break; case RSX_FP_OPCODE_RET: AddFlowOp("return"); @@ -1447,9 +1431,7 @@ std::string FragmentProgramDecompiler::Decompile() break; } - m_size += m_offset; - ensure((m_offset & 15) == 0); // Must be aligned to 16 bytes - + m_size += m_instruction->length * 4; if (dst.end) break; } diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index d3ef8643f1..b68750bdfc 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -41,17 +41,16 @@ class FragmentProgramDecompiler SRC2 src2; u32 opflags; + const rsx::assembler::Instruction* m_instruction; + std::string main; u32& m_size; u32 m_const_index = 0; - u32 m_offset; u32 m_location = 0; bool m_is_valid_ucode = true; u32 m_loop_count; int m_code_level; - std::vector m_end_offsets; - std::vector m_else_offsets; std::unordered_map m_constant_offsets; std::array temp_registers; From 0fbd0e8cc71e0c11e77a0ab1d3bf2dbc81286130 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 23 Nov 2025 21:51:42 +0300 Subject: [PATCH 19/42] rsx/gtest: Add tests for CFG BB succ edges and fix UT failures --- rpcs3/Emu/RSX/Program/Assembler/IR.h | 4 ++-- rpcs3/tests/test_rsx_cfg.cpp | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/Assembler/IR.h b/rpcs3/Emu/RSX/Program/Assembler/IR.h index 1f1c1d81d8..65960f3d99 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/IR.h +++ b/rpcs3/Emu/RSX/Program/Assembler/IR.h @@ -72,8 +72,8 @@ namespace rsx::assembler { u32 id = 0; std::vector instructions; // Program instructions for the RSX processor - std::vector succ; // [0] = if/loop, [1] = else - std::vector pred; // Back edge. + std::vector succ; // Forward edges. Sorted closest first. + std::vector pred; // Back edges. Sorted closest first. std::vector prologue; // Prologue, created by passes std::vector epilogue; // Epilogue, created by passes diff --git a/rpcs3/tests/test_rsx_cfg.cpp b/rpcs3/tests/test_rsx_cfg.cpp index b5969081d9..905ac5a049 100644 --- a/rpcs3/tests/test_rsx_cfg.cpp +++ b/rpcs3/tests/test_rsx_cfg.cpp @@ -72,7 +72,8 @@ namespace rsx::assembler EXPECT_EQ(graph.blocks.size(), 1); EXPECT_EQ(graph.blocks.front().instructions.size(), 2); EXPECT_EQ(graph.blocks.front().instructions.front().length, 4); - EXPECT_NE(graph.blocks.front().instructions.front().addr, 0); + EXPECT_EQ(graph.blocks.front().instructions[0].addr, 0); + EXPECT_EQ(graph.blocks.front().instructions[1].addr, 16); } TEST(CFG, FpToCFG_IF) @@ -192,6 +193,13 @@ namespace rsx::assembler EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].from->id, 3); EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].type, EdgeType::ENDIF); EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].from->id, 0); + + // Successors must also be ordered, closest first + ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ.size(), 2); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].to->id, 3); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].to->id, 6); } TEST(CFG, FpToCFG_IF_ELSE) From 2c1d962bdc0bff8a68212b0de055f6106f70b88d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 23 Nov 2025 23:57:03 +0300 Subject: [PATCH 20/42] rsx/cfg: Replace 'avx512' intrinsics with SSE2-compatible ones - These are just PAND and POR instruction wrappers for SSE2, no idea why they're classified as 'avx512' --- rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp index 4433af29a2..61026a2d64 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -20,9 +20,9 @@ namespace rsx::assembler __m128i _mask1 = _mm_set1_epi32(0x00ff00ff); __m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8); __m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8); - __m128i ret = _mm_or_epi32( - _mm_and_epi32(_mask0, a), - _mm_and_epi32(_mask1, b) + __m128i ret = _mm_or_si128( + _mm_and_si128(_mask0, a), + _mm_and_si128(_mask1, b) ); return v128::loadu(&ret); } @@ -36,7 +36,6 @@ namespace rsx::assembler SRC2 src2{}; u32 pc = 0; // Program counter - u32 instruction_size = 0; bool end = false; // Flow control data From 8495a138c678de2bf665113956e67e540c7d797b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 24 Nov 2025 00:33:12 +0300 Subject: [PATCH 21/42] rsx/gtest: Fix unit tests build on GCC --- rpcs3/tests/test_rsx_cfg.cpp | 2 +- rpcs3/tests/test_simple_array.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rpcs3/tests/test_rsx_cfg.cpp b/rpcs3/tests/test_rsx_cfg.cpp index 905ac5a049..1708774d76 100644 --- a/rpcs3/tests/test_rsx_cfg.cpp +++ b/rpcs3/tests/test_rsx_cfg.cpp @@ -45,7 +45,7 @@ namespace rsx::assembler auto create_if(u32 end, u32 _else = 0) { OPDEST dst{}; - dst.opcode = RSX_FP_OPCODE_IFE; + dst.opcode = RSX_FP_OPCODE_IFE & 0x3Fu; SRC1 src1{}; src1.else_offset = (_else ? _else : end) << 2; diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index fcec3568f4..0627c1d4d1 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -306,7 +306,7 @@ namespace rsx TEST(SimpleArray, Find) { - const rsx::simple_array arr{ + const rsx::simple_array arr{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; @@ -316,7 +316,7 @@ namespace rsx TEST(SimpleArray, FindIf) { - const rsx::simple_array arr{ + const rsx::simple_array arr{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; From 6a398f994793d6d74d649ec7989d621627e547e1 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 24 Nov 2025 00:43:44 +0300 Subject: [PATCH 22/42] rsx/cfg: Fix arm64 build --- rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp index 61026a2d64..c9a16b58e2 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -9,6 +9,18 @@ #include #include +#if defined(ARCH_ARM64) +#if !defined(_MSC_VER) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#pragma GCC diagnostic ignored "-Wold-style-cast" +#endif +#undef FORCE_INLINE +#include "Emu/CPU/sse2neon.h" +#if !defined(_MSC_VER) +#pragma GCC diagnostic pop +#endif +#endif namespace rsx::assembler { From 5a9083e4fc0bfb73b09c4c436d8f5e78f8c2702a Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 24 Nov 2025 21:57:12 +0300 Subject: [PATCH 23/42] rsx: Fix saw-tooth artifacts when using GPU deswizzle for small texel widths --- rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl index bdb432d7e5..1e0b66c36c 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl @@ -105,7 +105,6 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_) void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z) { - const uint masks[] = { 0x0000FFFF, 0xFFFF0000 }; uint accumulator = 0; const uint subword_count = min(invocation.size.x, 2); @@ -113,7 +112,9 @@ void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uin { uint src_texel_id = get_z_index(x, y, z); uint src_id = (src_texel_id + invocation.data_offset); - accumulator |= data_in[src_id / 2] & masks[subword]; + int src_bit_offset = int(src_id % 2) << 4; + uint src_value = bitfieldExtract(data_in[src_id / 2], src_bit_offset, 16); + accumulator = bitfieldInsert(accumulator, src_value, int(subword << 4), 16); } data_out[texel_id / 2] = %f(accumulator); @@ -123,7 +124,6 @@ void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uin void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z) { - const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }; uint accumulator = 0; const uint subword_count = min(invocation.size.x, 4); @@ -131,7 +131,9 @@ void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint { uint src_texel_id = get_z_index(x, y, z); uint src_id = (src_texel_id + invocation.data_offset); - accumulator |= data_in[src_id / 4] & masks[subword]; + int src_bit_offset = int(src_id % 4) << 3; + uint src_value = bitfieldExtract(data_in[src_id / 4], src_bit_offset, 8); + accumulator = bitfieldInsert(accumulator, src_value, int(subword << 3), 8); } data_out[texel_id / 4] = accumulator; From 7986ee58de0286e7b1cf08d1d2a449b896b5a888 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 24 Nov 2025 23:39:56 +0300 Subject: [PATCH 24/42] vk: Allow buffer creation to fail gracefully if placement is not possible --- rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp | 27 ++++++++++++++++++---- rpcs3/Emu/RSX/VK/vkutils/buffer_object.h | 24 +++++++++++++++++-- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp index 4d7c5237cc..daf60ad03c 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp @@ -39,11 +39,20 @@ namespace vk return false; } - buffer::buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool) + buffer::buffer( + const vk::render_device& dev, + u64 size, + const memory_type_info& memory_type, + u32 access_flags, + VkBufferUsageFlags usage, + VkBufferCreateFlags flags, + vmm_allocation_pool allocation_pool) : m_device(dev) { + const bool nullable = !!(flags & VK_BUFFER_CREATE_ALLOW_NULL_RPCS3); + info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - info.flags = flags; + info.flags = flags & ~VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3; info.size = size; info.usage = usage; info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; @@ -60,8 +69,18 @@ namespace vk fmt::throw_exception("No compatible memory type was found!"); } - memory = std::make_unique(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool); - vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset()); + memory = std::make_unique(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool, nullable); + if (auto device_memory = memory->get_vk_device_memory(); + device_memory != VK_NULL_HANDLE) + { + vkBindBufferMemory(dev, value, device_memory, memory->get_vk_device_memory_offset()); + } + else + { + ensure(nullable); + vkDestroyBuffer(m_device, value, nullptr); + value = VK_NULL_HANDLE; + } } buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size) diff --git a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h index c74cb1aaa5..ba5309749a 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h +++ b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h @@ -7,6 +7,13 @@ namespace vk { + enum : u32 + { + VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 = 0x80000000, + + VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3) + }; + struct buffer_view : public unique_resource { VkBufferView value; @@ -30,8 +37,21 @@ namespace vk VkBufferCreateInfo info = {}; std::unique_ptr memory; - buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool); - buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size); + buffer( + const vk::render_device& dev, + u64 size, + const memory_type_info& memory_type, + u32 access_flags, + VkBufferUsageFlags usage, + VkBufferCreateFlags flags, + vmm_allocation_pool allocation_pool); + + buffer( + const vk::render_device& dev, + VkBufferUsageFlags usage, + void* host_pointer, + u64 size); + ~buffer(); void* map(u64 offset, u64 size); From b451dfe8771b9469146a1a31508c3db885bd8b41 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 24 Nov 2025 23:40:58 +0300 Subject: [PATCH 25/42] vk: Fall back to host-visible pool if we cannot place resources in Re-BAR pool --- rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp | 42 ++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp index ba1b4e79c1..7fa6a46a81 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp @@ -47,9 +47,28 @@ namespace vk usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; memory_index = memory_map.device_local; + m_prefer_writethrough = false; } - heap = std::make_unique(*g_render_device, size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + VkFlags create_flags = 0; + if (m_prefer_writethrough) + { + create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3; + } + + heap = std::make_unique(*g_render_device, size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM); + + if (!heap->value) + { + rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name); + ensure(m_prefer_writethrough); + + // We failed to place the buffer in rebar memory. Try again in host-visible. + m_prefer_writethrough = false; + auto gc = get_resource_manager(); + gc->dispose(heap); + heap = std::make_unique(*g_render_device, size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + } initial_size = size; notify_on_grow = bool(notify); @@ -112,6 +131,7 @@ namespace vk auto gc = get_resource_manager(); if (shadow) { + ensure(!m_prefer_writethrough); rsx_log.warning("Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", usage); gc->dispose(shadow); @@ -122,7 +142,25 @@ namespace vk } gc->dispose(heap); - heap = std::make_unique(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + + VkFlags create_flags = 0; + if (m_prefer_writethrough) + { + create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3; + } + + heap = std::make_unique(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM); + + if (!heap->value) + { + rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name); + ensure(m_prefer_writethrough); + + // We failed to place the buffer in rebar memory. Try again in host-visible. + m_prefer_writethrough = false; + gc->dispose(heap); + heap = std::make_unique(*g_render_device, aligned_new_size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + } if (notify_on_grow) { From 914b52cf4af9234784fcc9c1c754234edcbd0c5a Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Wed, 26 Nov 2025 08:38:58 +0200 Subject: [PATCH 26/42] util/types.hpp: Fix signed/unsigned comparison error --- rpcs3/util/types.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rpcs3/util/types.hpp b/rpcs3/util/types.hpp index 690f51c0e7..80486a6e1a 100644 --- a/rpcs3/util/types.hpp +++ b/rpcs3/util/types.hpp @@ -999,17 +999,18 @@ template requires (std::is_integral_v; constexpr bool is_to_signed = std::is_signed_v; + // For unsigned/signed mismatch, create an "unsigned" compatible mask constexpr auto from_mask = (is_from_signed && !is_to_signed) ? UnFrom{umax} >> 1 : UnFrom{umax}; constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax}; - constexpr auto mask = ~(from_mask & to_mask); + constexpr auto mask = static_cast(~(from_mask & to_mask)); - // Signed to unsigned always require test - // Otherwise, this is bit-wise narrowing or conversion between types of different signedness of the same size - if constexpr ((is_from_signed && !is_to_signed) || to_mask < from_mask) + // If destination ("unsigned" compatible) mask is smaller than source ("unsigned" compatible) mask + // It requires narrowing. + if constexpr (!!mask) { // Try to optimize test if both are of the same signedness - if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast(value) != value) [[unlikely]] + if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast(value) != static_cast(value)) [[unlikely]] { fmt::raw_verify_error(src_loc, u8"Narrowing error", +value); } From 6ebdb0c0c15e6d2d5bf0aa3a0833b2a641f252b8 Mon Sep 17 00:00:00 2001 From: Antonino Di Guardo <64427768+digant73@users.noreply.github.com> Date: Wed, 26 Nov 2025 12:05:10 +0100 Subject: [PATCH 27/42] =?UTF-8?q?Add=20an=20entry=20on=20Log=20panel's=20c?= =?UTF-8?q?ontextual=20menu=20to=20show=20the=20main=20disk=20usa=E2=80=A6?= =?UTF-8?q?=20(#17715)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added the entry `Show Disk Usage` on `Log` panel's contextual menu to show the main disk usage of the emulator. It reports VFS disk usage (with the exception of useless usb directories) and cache disk usage. I avoided to automatically display the disk usage, e.g. when refreshing the game list, just because the disk usage calculation can require some time (so it will slow down the emulator). So I opted to provide the functionality on demand. This PR is propaedeutic to pr6008. A separated PR to complete pr6008 will follow. It will allow multiple selection of games on game list and a contextual menu to manage the removal of the selected games. --- rpcs3/Emu/system_utils.cpp | 72 +++++++++++++++++++++++++++++++++++++ rpcs3/Emu/system_utils.hpp | 9 +++++ rpcs3/rpcs3qt/log_frame.cpp | 51 ++++++++++++++++++++++++++ rpcs3/rpcs3qt/log_frame.h | 4 +++ 4 files changed, 136 insertions(+) diff --git a/rpcs3/Emu/system_utils.cpp b/rpcs3/Emu/system_utils.cpp index ba98a44795..e840887bac 100644 --- a/rpcs3/Emu/system_utils.cpp +++ b/rpcs3/Emu/system_utils.cpp @@ -101,6 +101,48 @@ namespace rpcs3::utils return worker(); } + std::vector> get_vfs_disk_usage() + { + std::vector> disk_usage; + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd0_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_hdd0", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd1_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_hdd1", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash2_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash2", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash3_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash3", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_bdvd_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_bdvd", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_games_dir(), 1); data_size != umax) + { + disk_usage.push_back({"games", data_size}); + } + + return disk_usage; + } + std::string get_emu_dir() { const std::string& emu_dir_ = g_cfg_vfs.emulator_dir; @@ -122,6 +164,36 @@ namespace rpcs3::utils return g_cfg_vfs.get(g_cfg_vfs.dev_hdd1, get_emu_dir()); } + std::string get_flash_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash, get_emu_dir()); + } + + std::string get_flash2_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash2, get_emu_dir()); + } + + std::string get_flash3_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash3, get_emu_dir()); + } + + std::string get_bdvd_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_bdvd, get_emu_dir()); + } + + u64 get_cache_disk_usage() + { + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_cache_dir(), 1); data_size != umax) + { + return data_size; + } + + return 0; + } + std::string get_cache_dir() { return fs::get_cache_dir() + "cache/"; diff --git a/rpcs3/Emu/system_utils.hpp b/rpcs3/Emu/system_utils.hpp index 30ccb0add0..b4142dacb9 100644 --- a/rpcs3/Emu/system_utils.hpp +++ b/rpcs3/Emu/system_utils.hpp @@ -23,10 +23,19 @@ namespace rpcs3::utils bool install_pkg(const std::string& path); + // VFS directories and disk usage + std::vector> get_vfs_disk_usage(); std::string get_emu_dir(); std::string get_games_dir(); std::string get_hdd0_dir(); std::string get_hdd1_dir(); + std::string get_flash_dir(); + std::string get_flash2_dir(); + std::string get_flash3_dir(); + std::string get_bdvd_dir(); + + // Cache directories and disk usage + u64 get_cache_disk_usage(); std::string get_cache_dir(); std::string get_cache_dir(std::string_view module_path); diff --git a/rpcs3/rpcs3qt/log_frame.cpp b/rpcs3/rpcs3qt/log_frame.cpp index 4dd664a99e..a155cf215d 100644 --- a/rpcs3/rpcs3qt/log_frame.cpp +++ b/rpcs3/rpcs3qt/log_frame.cpp @@ -4,10 +4,14 @@ #include "hex_validator.h" #include "memory_viewer_panel.h" +#include "Emu/System.h" +#include "Emu/system_utils.hpp" #include "Utilities/lockless.h" #include "util/asm.hpp" +#include #include +#include #include #include #include @@ -17,6 +21,8 @@ #include #include +LOG_CHANNEL(sys_log, "SYS"); + extern fs::file g_tty; extern atomic_t g_tty_size; extern std::array, 16> g_tty_input; @@ -165,6 +171,28 @@ log_frame::log_frame(std::shared_ptr _gui_settings, QWidget* paren connect(m_timer, &QTimer::timeout, this, &log_frame::UpdateUI); } +void log_frame::show_disk_usage(const std::vector>& vfs_disk_usage, u64 cache_disk_usage) +{ + QString text; + u64 tot_data_size = 0; + + for (const auto& [dev, data_size] : vfs_disk_usage) + { + text += tr("\n %0: %1").arg(QString::fromStdString(dev)).arg(gui::utils::format_byte_size(data_size)); + tot_data_size += data_size; + } + + if (!text.isEmpty()) + { + text = tr("\n VFS disk usage: %0%1").arg(gui::utils::format_byte_size(tot_data_size)).arg(text); + } + + text += tr("\n Cache disk usage: %0").arg(gui::utils::format_byte_size(cache_disk_usage)); + + sys_log.success("%s", text); + QMessageBox::information(this, tr("Disk usage"), text); +} + void log_frame::SetLogLevel(logs::level lev) const { switch (lev) @@ -245,6 +273,26 @@ void log_frame::CreateAndConnectActions() m_tty->clear(); }); + m_show_disk_usage_act = new QAction(tr("Show Disk Usage"), this); + connect(m_show_disk_usage_act, &QAction::triggered, [this]() + { + if (m_disk_usage_future.isRunning()) + { + return; // Still running the last request + } + + m_disk_usage_future = QtConcurrent::run([this]() + { + const std::vector> vfs_disk_usage = rpcs3::utils::get_vfs_disk_usage(); + const u64 cache_disk_usage = rpcs3::utils::get_cache_disk_usage(); + + Emu.CallFromMainThread([this, vfs_disk_usage, cache_disk_usage]() + { + show_disk_usage(vfs_disk_usage, cache_disk_usage); + }, nullptr, false); + }); + }); + m_perform_goto_on_debugger = new QAction(tr("Go-To On The Debugger"), this); connect(m_perform_goto_on_debugger, &QAction::triggered, [this]() { @@ -369,6 +417,9 @@ void log_frame::CreateAndConnectActions() { QMenu* menu = m_log->createStandardContextMenu(); menu->addAction(m_clear_act); + menu->addSeparator(); + menu->addAction(m_show_disk_usage_act); + menu->addSeparator(); menu->addAction(m_perform_goto_on_debugger); menu->addAction(m_perform_goto_thread_on_debugger); menu->addAction(m_perform_show_in_mem_viewer); diff --git a/rpcs3/rpcs3qt/log_frame.h b/rpcs3/rpcs3qt/log_frame.h index 0de081863c..159fdd38aa 100644 --- a/rpcs3/rpcs3qt/log_frame.h +++ b/rpcs3/rpcs3qt/log_frame.h @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -38,6 +39,7 @@ protected: private Q_SLOTS: void UpdateUI(); private: + void show_disk_usage(const std::vector>& vfs_disk_usage, u64 cache_disk_usage); void SetLogLevel(logs::level lev) const; void SetTTYLogging(bool val) const; @@ -48,6 +50,7 @@ private: std::unique_ptr m_find_dialog; QTimer* m_timer = nullptr; + QFuture m_disk_usage_future; std::vector m_color; QColor m_color_stack; @@ -72,6 +75,7 @@ private: QAction* m_clear_act = nullptr; QAction* m_clear_tty_act = nullptr; + QAction* m_show_disk_usage_act = nullptr; QAction* m_perform_goto_on_debugger = nullptr; QAction* m_perform_goto_thread_on_debugger = nullptr; QAction* m_perform_show_in_mem_viewer = nullptr; From 41aaa912e789db481a163b9d6eb8c49e43db97fc Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Wed, 26 Nov 2025 15:34:38 +0200 Subject: [PATCH 28/42] Fixup ::narrow (#17756) Also fix negative signed to unsigned narrowing when source type is greater than destination. --- rpcs3/util/types.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/util/types.hpp b/rpcs3/util/types.hpp index 80486a6e1a..4a2ef5baea 100644 --- a/rpcs3/util/types.hpp +++ b/rpcs3/util/types.hpp @@ -1000,7 +1000,7 @@ template requires (std::is_integral_v; // For unsigned/signed mismatch, create an "unsigned" compatible mask - constexpr auto from_mask = (is_from_signed && !is_to_signed) ? UnFrom{umax} >> 1 : UnFrom{umax}; + constexpr auto from_mask = (is_from_signed && !is_to_signed && sizeof(CommonFrom) <= sizeof(CommonTo)) ? UnFrom{umax} >> 1 : UnFrom{umax}; constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax}; constexpr auto mask = static_cast(~(from_mask & to_mask)); @@ -1010,7 +1010,7 @@ template requires (std::is_integral_v(value) != static_cast(value)) [[unlikely]] + if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast(static_cast(value)) != value) [[unlikely]] { fmt::raw_verify_error(src_loc, u8"Narrowing error", +value); } From a442cb91a101bf19b1b0912db7fb071d62360369 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 27 Nov 2025 02:25:00 +0300 Subject: [PATCH 29/42] rsx/cfg: Check for end flag before handling any instruction including NOP --- rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp index c9a16b58e2..d8de4eda0b 100644 --- a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -112,6 +112,7 @@ namespace rsx::assembler src1.HEX = decoded._u32[2]; src2.HEX = decoded._u32[3]; + end = !!dst.end; const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); if (opcode == RSX_FP_OPCODE_NOP) @@ -120,8 +121,6 @@ namespace rsx::assembler continue; } - end = !!dst.end; - bb->instructions.push_back({}); auto& ir_inst = bb->instructions.back(); std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16); From d9f913016cbbdc32ce43ac02f4e0ef6a8ffd1ae4 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Thu, 27 Nov 2025 02:50:45 +0200 Subject: [PATCH 30/42] Fix possible ambiguity of utils::bless (#17757) Make utils::bless at util/bless.hpp argument type be complete, avoiding possible ambiguity with RSX-utility version. A fix for Apple Clang compiler. --- rpcs3/util/bless.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rpcs3/util/bless.hpp b/rpcs3/util/bless.hpp index af2f8d32f3..6a097a2c64 100644 --- a/rpcs3/util/bless.hpp +++ b/rpcs3/util/bless.hpp @@ -1,10 +1,12 @@ #pragma once +#include + namespace utils { // Hack. Pointer cast util to workaround UB. Use with extreme care. - template - [[nodiscard]] T* bless(U* ptr) + template requires (std::is_pointer_v>) + [[nodiscard]] inline T* bless(const U& ptr) { #ifdef _MSC_VER return (T*)ptr; @@ -21,3 +23,4 @@ namespace utils #endif } } + From 50c9a919424f6efcfc8bc47a973324997d0db568 Mon Sep 17 00:00:00 2001 From: rcaridade145 Date: Fri, 28 Nov 2025 18:54:45 +0000 Subject: [PATCH 31/42] rsx: Free previous pointer after reallocating memory After realloc in simple_array free the memory referenced by the old pointer. --- rpcs3/Emu/RSX/Common/simple_array.hpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 69397291f9..dda8cb5b3a 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -24,11 +24,18 @@ namespace rsx template void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size) { + ensure(reinterpret_cast(prev_ptr) % Align == 0, + "Pointer not aligned to Align"); + if (prev_size >= ((new_size + Align - 1) & (0 - Align))) + { + return prev_ptr; + } #ifdef _WIN32 return _aligned_realloc(prev_ptr, new_size, Align); #else void* ret = std::aligned_alloc(Align, new_size); std::memcpy(ret, prev_ptr, std::min(prev_size, new_size)); + std::free(prev_ptr); return ret; #endif } From 7b560e5ffaf3504b590db69309bd8290125920e4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 28 Nov 2025 23:04:30 +0300 Subject: [PATCH 32/42] rsx: Clean up aligned realloc implementation --- rpcs3/Emu/RSX/Common/simple_array.hpp | 8 +++---- rpcs3/tests/test_simple_array.cpp | 31 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index dda8cb5b3a..4f8d2a5100 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -24,12 +24,12 @@ namespace rsx template void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size) { - ensure(reinterpret_cast(prev_ptr) % Align == 0, - "Pointer not aligned to Align"); - if (prev_size >= ((new_size + Align - 1) & (0 - Align))) + if (prev_size >= new_size) { - return prev_ptr; + return prev_ptr; } + + ensure(reinterpret_cast(prev_ptr) % Align == 0, "Pointer not aligned to Align"); #ifdef _WIN32 return _aligned_realloc(prev_ptr, new_size, Align); #else diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index 0627c1d4d1..05bba60f4a 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -323,4 +323,35 @@ namespace rsx EXPECT_EQ(*arr.find_if(FN(x == 8)), 8); EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr); } + + TEST(AlignedAllocator, Alloc) + { + auto ptr = rsx::aligned_allocator::malloc<256>(16); + const auto ptr_value = reinterpret_cast(ptr); + rsx::aligned_allocator::free(ptr); + + EXPECT_NE(ptr_value, 0); + EXPECT_EQ(ptr_value % 256, 0); + } + + TEST(AlignedAllocator, Realloc) + { + auto ptr = rsx::aligned_allocator::malloc<256>(16); + auto ptr2 = rsx::aligned_allocator::realloc<256>(ptr, 16, 32); + const auto ptr_value = reinterpret_cast(ptr2); + rsx::aligned_allocator::free(ptr2); + + EXPECT_NE(ptr, ptr2); + EXPECT_NE(ptr_value, 0); + EXPECT_EQ(ptr_value % 256, 0); + } + + TEST(AlignedAllocator, Realloc_ReturnsPreviousPointerIfFits) + { + auto ptr = rsx::aligned_allocator::malloc<256>(16); + auto ptr2 = rsx::aligned_allocator::realloc<256>(ptr, 16, 8); + rsx::aligned_allocator::free(ptr2); + + EXPECT_EQ(ptr, ptr2); + } } From 727f3dd7a17decd923a03a4848e1d377ec9f57ac Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 22 Nov 2025 11:21:06 +0200 Subject: [PATCH 33/42] SPU Analyzer: Try to detect no-return in BISL/BRSL --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 76 ++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 9a192989e0..c6f6595fd9 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -3069,6 +3069,39 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s values[op.rt] = pos + 4; } + const u32 pos_next = wa; + + bool is_no_return = false; + + if (pos_next >= lsa && pos_next < limit) + { + const u32 data_next = ls[pos_next / 4]; + const auto type_next = g_spu_itype.decode(data_next); + const auto flag_next = g_spu_iflag.decode(data_next); + const auto op_next = spu_opcode_t{data_next}; + + if (!(type_next & spu_itype::zregmod) && !(type_next & spu_itype::branch)) + { + if (auto iflags = g_spu_iflag.decode(data_next)) + { + if (+flag_next & +spu_iflag::use_ra) + { + is_no_return = is_no_return || (op_next.ra >= 4 && op_next.ra < 10); + } + + if (+flag_next & +spu_iflag::use_rb) + { + is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); + } + + if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + { + is_no_return = is_no_return || (op_next.ra >= 4 && op_next.rb < 10); + } + } + } + } + if (af & vf::is_const) { const u32 target = spu_branch_target(av); @@ -3105,7 +3138,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s limit = std::min(limit, target); } - if (sl && g_cfg.core.spu_block_size != spu_block_size_type::safe) + if (!is_no_return && sl && g_cfg.core.spu_block_size != spu_block_size_type::safe) { m_ret_info[pos / 4 + 1] = true; m_entry_info[pos / 4 + 1] = true; @@ -3294,9 +3327,9 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.notice("[0x%x] At 0x%x: ignoring indirect branch (SYNC)", entry_point, pos); } - if (type == spu_itype::BI || sl) + if (type == spu_itype::BI || sl || is_no_return) { - if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe) + if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe || is_no_return) { m_targets[pos]; } @@ -3333,9 +3366,42 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + const u32 pos_next = wa; + + bool is_no_return = false; + + if (pos_next >= lsa && pos_next < limit) + { + const u32 data_next = ls[pos_next / 4]; + const auto type_next = g_spu_itype.decode(data_next); + const auto flag_next = g_spu_iflag.decode(data_next); + const auto op_next = spu_opcode_t{data_next}; + + if (!(type_next & spu_itype::zregmod) && !(type_next & spu_itype::branch)) + { + if (auto iflags = g_spu_iflag.decode(data_next)) + { + if (+flag_next & +spu_iflag::use_ra) + { + is_no_return = is_no_return || (op_next.ra >= 4 && op_next.ra < 10); + } + + if (+flag_next & +spu_iflag::use_rb) + { + is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); + } + + if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + { + is_no_return = is_no_return || (op_next.rc >= 4 && op_next.rc < 10); + } + } + } + } + m_targets[pos].push_back(target); - if (g_cfg.core.spu_block_size != spu_block_size_type::safe) + if (!is_no_return && g_cfg.core.spu_block_size != spu_block_size_type::safe) { m_ret_info[pos / 4 + 1] = true; m_entry_info[pos / 4 + 1] = true; @@ -3343,7 +3409,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s add_block(pos + 4); } - if (g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync) + if (!is_no_return && g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync) { m_entry_info[target / 4] = true; add_block(target); From 27c2f2ae4d8dc486b82f1d5772a5d619c3b926b4 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 22 Nov 2025 11:21:55 +0200 Subject: [PATCH 34/42] SPU Analyzer: Contnue with failed patterns until they proven failure --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 47 ++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index c6f6595fd9..74a8f0e46b 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -4970,19 +4970,25 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bool put_active = false; // PUTLLC happened bool get_rdatomic = false; // True if MFC_RdAtomicStat was read after GETLLAR u32 mem_count = 0; + u32 break_cause = 100; + u32 break_pc = SPU_LS_SIZE; // Return old state for error reporting atomic16_t discard() { const u32 pc = lsa_pc; const u32 last_pc = lsa_last_pc; + const u32 cause = break_cause; + const u32 break_pos = break_pc; const atomic16_t old = *this; *this = atomic16_t{}; // Keep some members - lsa_pc = pc; - lsa_last_pc = last_pc; + this->lsa_pc = pc; + this->lsa_last_pc = last_pc; + this->break_cause = cause; + this->break_pc = break_pos; return old; } @@ -5189,15 +5195,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { if (previous.active && likely_putllc_loop && getllar_starts.contains(previous.lsa_pc)) { - const bool is_first = !std::exchange(getllar_starts[previous.lsa_pc], true); + had_putllc_evaluation = true; - if (!is_first) + if (cause != 24) { + atomic16->break_cause = cause; + atomic16->break_pc = pos; return; } - had_putllc_evaluation = true; - + cause = atomic16->break_cause; + getllar_starts[previous.lsa_pc] = true; g_fxo->get().breaking_reason[cause]++; if (!spu_log.notice) @@ -5205,7 +5213,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s return; } - std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", pos, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc); + std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", atomic16->break_pc, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc); const auto values = sort_breakig_reasons(g_fxo->get().breaking_reason); @@ -6381,6 +6389,24 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s invalidate = false; } } + else if (atomic16->break_cause != 100 && atomic16->lsa_pc != SPU_LS_SIZE) + { + const auto it = atomic16_all.find(pos); + + if (it == atomic16_all.end()) + { + // Ensure future failure + atomic16_all.emplace(pos, *atomic16); + break_putllc16(24, FN(x.active = true, x)(as_rvalue(*atomic16))); + } + else if (it->second.active && atomic16->break_cause != 100) + { + it->second = *atomic16; + break_putllc16(24, FN(x.active = true, x)(as_rvalue(*atomic16))); + } + + atomic16->break_cause = 100; + } break; } @@ -7350,6 +7376,13 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback } + if (!m_patterns.empty()) + { + std::string out_dump; + dump(result, out_dump); + spu_log.notice("Dump SPU Function with pattern(s):\n%s", out_dump); + } + for (u32 i = 0; i < result.data.size(); i++) { const be_t ls_val = ls[result.lower_bound / 4 + i]; From e2da6d36ba714c0d3106482211fa7efddc0f4074 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 22 Nov 2025 11:23:05 +0200 Subject: [PATCH 35/42] SPU LLVM: Permit relative-PC in PUTLLC16 with alignment check --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 62 +++++++++++++---------- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 69 ++++++++++++++++++++------ rpcs3/Emu/Cell/SPURecompiler.h | 4 +- 3 files changed, 94 insertions(+), 41 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 74a8f0e46b..15fad98e7a 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -4969,6 +4969,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bool select_16_or_0_at_runtime = false; bool put_active = false; // PUTLLC happened bool get_rdatomic = false; // True if MFC_RdAtomicStat was read after GETLLAR + u32 required_pc = SPU_LS_SIZE; // Require program to be location specific for this optimization (SPU_LS_SIZE - no requirement) u32 mem_count = 0; u32 break_cause = 100; u32 break_pc = SPU_LS_SIZE; @@ -6375,6 +6376,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s existing.ls_invalid |= atomic16->ls_invalid; existing.ls_access |= atomic16->ls_access; existing.mem_count = std::max(existing.mem_count, atomic16->mem_count); + existing.required_pc = std::min(existing.required_pc, atomic16->required_pc); existing.select_16_or_0_at_runtime |= atomic16->select_16_or_0_at_runtime; } @@ -6477,6 +6479,10 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Do not clear lower 16 bytes addressing because the program can move on 4-byte basis const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16); + const u32 true_offs = spu_branch_target(pos, op.si16); + + // Make this optimization depend on the location of the program + atomic16->required_pc = result.lower_bound; if (atomic16->lsa.is_const() && [&]() { @@ -6501,6 +6507,10 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { // Ignore memory access in this case } + else if (atomic16->lsa.is_const() && !atomic16->lsa.compare_with_mask_indifference(true_offs, SPU_LS_MASK_128)) + { + // Same + } else if (atomic16->ls_invalid && is_store) { break_putllc16(35, atomic16->set_invalid_ls(is_store)); @@ -7254,27 +7264,33 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s continue; } + union putllc16_or_0_info + { + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } value{}; + auto& stats = g_fxo->get(); had_putllc_evaluation = true; if (!pattern.ls_write) { + if (pattern.required_pc != SPU_LS_SIZE) + { + value.required_pc = pattern.required_pc; + } + spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all); - add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa); + add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa, value.data); continue; } - union putllc16_info - { - u32 data; - bf_t type; - bf_t runtime16_select; - bf_t no_notify; - bf_t reg; - bf_t off18; - bf_t reg2; - } value{}; - enum : u32 { v_const = 0, @@ -7305,6 +7321,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s value.runtime16_select = pattern.select_16_or_0_at_runtime; value.reg = s_reg_max; + if (pattern.required_pc != SPU_LS_SIZE) + { + value.required_pc = pattern.required_pc; + } + if (pattern.ls.is_const()) { ensure(pattern.reg == s_reg_max && pattern.reg2 == s_reg_max && pattern.ls_offs.is_const(), "Unexpected register usage"); @@ -7360,7 +7381,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none) { - add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point); + add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point, 0); spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash); } @@ -8445,19 +8466,10 @@ std::array& block_reg_info::evaluate_start_state(const s return walkby_state; } -void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end) +void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info) { - if (end == umax) - { - end = start; - } - - m_patterns[start] = pattern_info{utils::address_range32::start_end(start, end)}; - - for (u32 i = start; i <= (fill_all ? end : start); i += 4) - { - m_inst_attrs[i / 4] = attr; - } + m_patterns[start] = pattern_info{info}; + m_inst_attrs[start / 4] = attr; } extern std::string format_spu_func_info(u32 addr, cpu_thread* spu) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 8b820ca600..33489d1b74 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -1080,7 +1080,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(_body); } - void putllc16_pattern(const spu_program& /*prog*/, utils::address_range32 range) + void putllc16_pattern(const spu_program& /*prog*/, u64 pattern_info) { // Prevent store elimination m_block->store_context_ctr[s_reg_mfc_eal]++; @@ -1109,16 +1109,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } }; - const union putllc16_info + const union putllc16_or_0_info { - u32 data; - bf_t type; - bf_t runtime16_select; - bf_t no_notify; - bf_t reg; - bf_t off18; - bf_t reg2; - } info = std::bit_cast(range.end); + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } info = std::bit_cast(pattern_info); enum : u32 { @@ -1150,8 +1151,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator value_t eal_val; eal_val.value = _eal; - auto get_reg32 = [&](u32 reg) + auto get_reg32 = [&](u64 reg_) { + const u32 reg = static_cast(reg_); + if (get_reg_type(reg) != get_type()) { return get_reg_fixed(reg, get_type()); @@ -1170,6 +1173,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } else if (info.type == v_relative) { + if (info.required_pc && info.required_pc != SPU_LS_SIZE) + { + const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc16_short_op", m_function); + const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc16_heavy_op", m_function); + + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op); + m_ir->SetInsertPoint(heavy_op); + update_pc(); + call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread); + m_ir->CreateBr(_final); + m_ir->SetInsertPoint(short_op); + } + dest = m_ir->CreateAnd(get_pc(spu_branch_target(info.off18 + m_base)), 0x3fff0); } else if (info.type == v_reg_offs) @@ -1373,7 +1389,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(_final); } - void putllc0_pattern(const spu_program& /*prog*/, utils::address_range32 /*range*/) + void putllc0_pattern(const spu_program& /*prog*/, u64 pattern_info) { // Prevent store elimination m_block->store_context_ctr[s_reg_mfc_eal]++; @@ -1401,6 +1417,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } }; + const union putllc16_or_0_info + { + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } info = std::bit_cast(pattern_info); + const auto _next = llvm::BasicBlock::Create(m_context, "", m_function); const auto _next0 = llvm::BasicBlock::Create(m_context, "", m_function); const auto _fail = llvm::BasicBlock::Create(m_context, "", m_function); @@ -1409,6 +1437,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto _eal = (get_reg_fixed(s_reg_mfc_eal) & -128).eval(m_ir); const auto _raddr = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::raddr)); + if (info.required_pc && info.required_pc != SPU_LS_SIZE) + { + const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc0_short_op", m_function); + const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc0_heavy_op", m_function); + + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op); + m_ir->SetInsertPoint(heavy_op); + update_pc(); + call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread); + m_ir->CreateBr(_final); + m_ir->SetInsertPoint(short_op); + } + m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpEQ(_eal, _raddr), m_ir->CreateIsNotNull(_raddr)), _next, _fail, m_md_likely); m_ir->SetInsertPoint(_next); @@ -2143,12 +2184,12 @@ public: { case inst_attr::putllc0: { - putllc0_pattern(func, m_patterns.at(m_pos - start).range); + putllc0_pattern(func, m_patterns.at(m_pos - start).info); continue; } case inst_attr::putllc16: { - putllc16_pattern(func, m_patterns.at(m_pos - start).range); + putllc16_pattern(func, m_patterns.at(m_pos - start).info); continue; } case inst_attr::omit: diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index ddee888b1e..6bddb5a035 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -397,12 +397,12 @@ protected: struct pattern_info { - utils::address_range32 range; + u64 info; }; std::unordered_map m_patterns; - void add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end = -1); + void add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info); private: // For private use From e11f8df06436ba6a80c7ed57b2536e1181c9b455 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 22 Nov 2025 11:28:55 +0200 Subject: [PATCH 36/42] PPU LLVM: Fixup PPU Profiler for PRX --- rpcs3/Emu/Cell/PPUTranslator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index e59f14892b..6d4766a947 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -595,7 +595,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) if (g_cfg.core.ppu_prof) { - m_ir->CreateStore(m_ir->getInt32(target_last), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast(&m_cia - m_locals))); + m_ir->CreateStore(GetAddr(target_last - m_addr), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast(&m_cia - m_locals))); } } } From 14bd6b0cc524da4b150d89c8bcc48f0f4675cde9 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 22 Nov 2025 11:35:52 +0200 Subject: [PATCH 37/42] SPU: Enable PUTLLC16 by default Experimental, may be reduced to specific patterns instead of being enabled globally. --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 2 +- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 15fad98e7a..e3455fcb2d 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -7357,7 +7357,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (g_cfg.core.spu_accurate_reservations) { // Because enabling it is a hack, as it turns out - continue; + // continue; } add_pattern(false, inst_attr::putllc16, pattern.put_pc - result.entry_point, value.data); diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 33489d1b74..1d02b5a62e 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -1284,7 +1284,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto _new = m_ir->CreateAlignedLoad(get_type(), _ptr(m_lsptr, dest), llvm::MaybeAlign{16}); const auto _rdata = m_ir->CreateAlignedLoad(get_type(), _ptr(spu_ptr(&spu_thread::rdata), m_ir->CreateAnd(diff, 0x70)), llvm::MaybeAlign{16}); - const bool is_accurate_op = !!g_cfg.core.spu_accurate_reservations; + const bool is_accurate_op = true || !!g_cfg.core.spu_accurate_reservations; const auto compare_data_change_res = is_accurate_op ? m_ir->getTrue() : m_ir->CreateICmpNE(_new, _rdata); From cae77784dbd3d8cd1055a7bc57e9f4507f4dda0c Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 29 Nov 2025 06:09:04 +0200 Subject: [PATCH 38/42] UTs: Fix AlignedAllocator.Realloc test _aligned_realloc may return the same pointer if the internal implementation finds it suitable. Fixes Windows' builds. --- rpcs3/tests/test_simple_array.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index 05bba60f4a..8d64599b96 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -341,7 +341,6 @@ namespace rsx const auto ptr_value = reinterpret_cast(ptr2); rsx::aligned_allocator::free(ptr2); - EXPECT_NE(ptr, ptr2); EXPECT_NE(ptr_value, 0); EXPECT_EQ(ptr_value % 256, 0); } From d625c1d00416815880183c2edd9173a719c0e4c6 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 29 Nov 2025 06:42:46 +0200 Subject: [PATCH 39/42] SPU LLVM: Reduce notifications for PUTLLC16 --- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 1d02b5a62e..989ba2e84f 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -1286,15 +1286,16 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const bool is_accurate_op = true || !!g_cfg.core.spu_accurate_reservations; - const auto compare_data_change_res = is_accurate_op ? m_ir->getTrue() : m_ir->CreateICmpNE(_new, _rdata); + const auto compare_data_change_res = m_ir->CreateICmpNE(_new, _rdata); + const auto second_test_for_complete_op = is_accurate_op ? m_ir->getTrue() : compare_data_change_res; if (info.runtime16_select) { - m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpULT(diff, m_ir->getInt64(128)), compare_data_change_res), _begin_op, _inc_res, m_md_likely); + m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpULT(diff, m_ir->getInt64(128)), second_test_for_complete_op), _begin_op, _inc_res, m_md_likely); } else { - m_ir->CreateCondBr(compare_data_change_res, _begin_op, _inc_res, m_md_unlikely); + m_ir->CreateCondBr(second_test_for_complete_op, _begin_op, _inc_res, m_md_unlikely); } m_ir->SetInsertPoint(_begin_op); @@ -1339,7 +1340,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (!info.no_notify) { + const auto notify_block = llvm::BasicBlock::Create(m_context, "__putllc16_block_notify", m_function); + const auto notify_next = llvm::BasicBlock::Create(m_context, "__putllc16_block_notify_next", m_function); + + m_ir->CreateCondBr(compare_data_change_res, notify_block, notify_next); + m_ir->SetInsertPoint(notify_block); call("atomic_wait_engine::notify_all", static_cast(atomic_wait_engine::notify_all), rptr); + m_ir->CreateBr(notify_next); + m_ir->SetInsertPoint(notify_next); } m_ir->CreateBr(_success); From e09be04df6ec35c2b2bedb0c62bc6d1bc55f5b28 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 29 Nov 2025 09:15:23 +0200 Subject: [PATCH 40/42] Emu/UX: Automatic Cache Precompilation for PKG instal --- rpcs3/Emu/Cell/PPUThread.cpp | 10 ++++- rpcs3/Emu/System.cpp | 8 ++-- rpcs3/Emu/System.h | 11 ++++++ rpcs3/rpcs3qt/game_list_frame.cpp | 32 ++++++++++----- rpcs3/rpcs3qt/game_list_frame.h | 7 ++-- rpcs3/rpcs3qt/main_window.cpp | 65 +++++++++++++++++++++---------- rpcs3/rpcs3qt/main_window.h | 2 + 7 files changed, 97 insertions(+), 38 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 95536540f5..0982ed79e4 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -3718,7 +3718,7 @@ extern void ppu_finalize(const ppu_module& info, bool force_mem_release #endif } -extern void ppu_precompile(std::vector& dir_queue, std::vector*>* loaded_modules) +extern void ppu_precompile(std::vector& dir_queue, std::vector*>* loaded_modules, bool is_fast_compilation) { if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm) { @@ -4166,6 +4166,12 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector g_watchdog_hold_ctr{0}; extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr); extern void spu_load_exec(const spu_exec_object&); extern void spu_load_rel_exec(const spu_rel_object&); -extern void ppu_precompile(std::vector& dir_queue, std::vector*>* loaded_prx); +extern void ppu_precompile(std::vector& dir_queue, std::vector*>* loaded_prx, bool is_fast_compilation); extern bool ppu_initialize(const ppu_module&, bool check_only = false, u64 file_size = 0); extern void ppu_finalize(const ppu_module&); extern void ppu_unload_prx(const lv2_prx&); @@ -1684,7 +1684,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, } } - g_fxo->init("SPRX Loader"sv, [this, dir_queue]() mutable + g_fxo->init("SPRX Loader"sv, [this, dir_queue, is_fast = m_precompilation_option.is_fast]() mutable { std::vector*> mod_list; @@ -1705,7 +1705,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, return; } - ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list); + ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list, is_fast); if (Emu.IsStopped()) { @@ -3230,6 +3230,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s read_used_savestate_versions(); m_savestate_extension_flags1 = {}; m_emu_state_close_pending = false; + m_precompilation_option = {}; // Enable logging rpcs3::utils::configure_logs(true); @@ -3824,6 +3825,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s read_used_savestate_versions(); m_savestate_extension_flags1 = {}; m_emu_state_close_pending = false; + m_precompilation_option = {}; initialize_timebased_time(0, true); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 0c26d09a4b..954a041e9e 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -120,6 +120,11 @@ namespace utils struct serial; }; +struct emu_precompilation_option_t +{ + bool is_fast = false; +}; + class Emulator final { atomic_t m_state{system_state::stopped}; @@ -188,6 +193,7 @@ class Emulator final }; bs_t m_savestate_extension_flags1{}; + emu_precompilation_option_t m_precompilation_option{}; public: static constexpr std::string_view game_id_boot_prefix = "%RPCS3_GAMEID%:"; @@ -245,6 +251,11 @@ public: m_state = system_state::running; } + void SetPrecompileCacheOption(emu_precompilation_option_t option) + { + m_precompilation_option = option; + } + void Init(); std::vector argv; diff --git a/rpcs3/rpcs3qt/game_list_frame.cpp b/rpcs3/rpcs3qt/game_list_frame.cpp index 313e043613..162b8cb0f6 100644 --- a/rpcs3/rpcs3qt/game_list_frame.cpp +++ b/rpcs3/rpcs3qt/game_list_frame.cpp @@ -2011,10 +2011,11 @@ void game_list_frame::ShowContextMenu(const QPoint &pos) menu.exec(global_pos); } -bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string& serial) +bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string& serial, bool is_fast_compilation) { Emu.GracefulShutdown(false); Emu.SetForceBoot(true); + Emu.SetPrecompileCacheOption(emu_precompilation_option_t{.is_fast = is_fast_compilation}); if (const auto error = Emu.BootGame(fs::is_file(path) ? fs::get_parent_dir(path) : path, serial, true); error != game_boot_result::no_errors) { @@ -2026,9 +2027,9 @@ bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string return true; } -bool game_list_frame::CreateCPUCaches(const game_info& game) +bool game_list_frame::CreateCPUCaches(const game_info& game, bool is_fast_compilation) { - return game && CreateCPUCaches(game->info.path, game->info.serial); + return game && CreateCPUCaches(game->info.path, game->info.serial, is_fast_compilation); } bool game_list_frame::RemoveCustomConfiguration(const std::string& title_id, const game_info& game, bool is_interactive) @@ -2404,6 +2405,9 @@ void game_list_frame::BatchActionBySerials(progress_dialog* pdlg, const std::set connect(pdlg, &progress_dialog::canceled, this, [pdlg](){ pdlg->deleteLater(); }); QApplication::beep(); + // Signal termination back to the callback + action(""); + if (refresh_on_finish && index) { Refresh(true); @@ -2414,7 +2418,7 @@ void game_list_frame::BatchActionBySerials(progress_dialog* pdlg, const std::set QTimer::singleShot(1, this, *periodic_func); } -void game_list_frame::BatchCreateCPUCaches(const std::vector& game_data) +void game_list_frame::BatchCreateCPUCaches(const std::vector& game_data, bool is_fast_compilation) { std::set serials; @@ -2433,11 +2437,13 @@ void game_list_frame::BatchCreateCPUCaches(const std::vector& game_da if (total == 0) { QMessageBox::information(this, tr("LLVM Cache Batch Creation"), tr("No titles found"), QMessageBox::Ok); + Q_EMIT NotifyBatchedGameActionFinished(); return; } if (!m_gui_settings->GetBootConfirmation(this)) { + Q_EMIT NotifyBatchedGameActionFinished(); return; } @@ -2459,13 +2465,19 @@ void game_list_frame::BatchCreateCPUCaches(const std::vector& game_da BatchActionBySerials(pdlg, serials, tr("%0\nProgress: %1/%2 caches compiled").arg(main_label), [&, game_data](const std::string& serial) { + if (serial.empty()) + { + Q_EMIT NotifyBatchedGameActionFinished(); + return false; + } + if (Emu.IsStopped(true)) { const auto it = std::find_if(m_game_data.begin(), m_game_data.end(), FN(x->info.serial == serial)); if (it != m_game_data.end()) { - return CreateCPUCaches((*it)->info.path, serial); + return CreateCPUCaches((*it)->info.path, serial, is_fast_compilation); } } @@ -2512,7 +2524,7 @@ void game_list_frame::BatchRemovePPUCaches() BatchActionBySerials(pdlg, serials, tr("%0/%1 caches cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemovePPUCache(GetCacheDirBySerial(serial)); + return !serial.empty() &&Emu.IsStopped(true) && RemovePPUCache(GetCacheDirBySerial(serial)); }, [this](u32, u32) { @@ -2551,7 +2563,7 @@ void game_list_frame::BatchRemoveSPUCaches() BatchActionBySerials(pdlg, serials, tr("%0/%1 caches cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemoveSPUCache(GetCacheDirBySerial(serial)); + return !serial.empty() && Emu.IsStopped(true) && RemoveSPUCache(GetCacheDirBySerial(serial)); }, [this](u32 removed, u32 total) { @@ -2586,7 +2598,7 @@ void game_list_frame::BatchRemoveCustomConfigurations() BatchActionBySerials(pdlg, serials, tr("%0/%1 custom configurations cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemoveCustomConfiguration(serial); + return !serial.empty() && Emu.IsStopped(true) && RemoveCustomConfiguration(serial); }, [this](u32 removed, u32 total) { @@ -2620,7 +2632,7 @@ void game_list_frame::BatchRemoveCustomPadConfigurations() BatchActionBySerials(pdlg, serials, tr("%0/%1 custom pad configurations cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemoveCustomPadConfiguration(serial); + return !serial.empty() && Emu.IsStopped(true) && RemoveCustomPadConfiguration(serial); }, [this](u32 removed, u32 total) { @@ -2659,7 +2671,7 @@ void game_list_frame::BatchRemoveShaderCaches() BatchActionBySerials(pdlg, serials, tr("%0/%1 shader caches cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemoveShadersCache(GetCacheDirBySerial(serial)); + return !serial.empty() && Emu.IsStopped(true) && RemoveShadersCache(GetCacheDirBySerial(serial)); }, [this](u32 removed, u32 total) { diff --git a/rpcs3/rpcs3qt/game_list_frame.h b/rpcs3/rpcs3qt/game_list_frame.h index 0252cbf84f..fb366c933c 100644 --- a/rpcs3/rpcs3qt/game_list_frame.h +++ b/rpcs3/rpcs3qt/game_list_frame.h @@ -64,7 +64,7 @@ public: bool IsEntryVisible(const game_info& game, bool search_fallback = false) const; public Q_SLOTS: - void BatchCreateCPUCaches(const std::vector& game_data = {}); + void BatchCreateCPUCaches(const std::vector& game_data = {}, bool is_fast_compilation = false); void BatchRemovePPUCaches(); void BatchRemoveSPUCaches(); void BatchRemoveCustomConfigurations(); @@ -96,6 +96,7 @@ Q_SIGNALS: void FocusToSearchBar(); void Refreshed(); void RequestSaveStateManager(const game_info& game); + void NotifyBatchedGameActionFinished(); public: template @@ -135,8 +136,8 @@ private: bool RemovePPUCache(const std::string& base_dir, bool is_interactive = false); bool RemoveSPUCache(const std::string& base_dir, bool is_interactive = false); void RemoveHDD1Cache(const std::string& base_dir, const std::string& title_id, bool is_interactive = false); - static bool CreateCPUCaches(const std::string& path, const std::string& serial = {}); - static bool CreateCPUCaches(const game_info& game); + static bool CreateCPUCaches(const std::string& path, const std::string& serial = {}, bool is_fast_compilation = false); + static bool CreateCPUCaches(const game_info& game, bool is_fast_compilation = false); static bool RemoveContentPath(const std::string& path, const std::string& desc); static u32 RemoveContentPathList(const std::vector& path_list, const std::string& desc); diff --git a/rpcs3/rpcs3qt/main_window.cpp b/rpcs3/rpcs3qt/main_window.cpp index 38767ceabb..34154d846d 100644 --- a/rpcs3/rpcs3qt/main_window.cpp +++ b/rpcs3/rpcs3qt/main_window.cpp @@ -1187,7 +1187,13 @@ bool main_window::HandlePackageInstallation(QStringList file_paths, bool from_bo } } - ShowOptionalGamePreparations(tr("Success!"), tr("Successfully installed software from package(s)!"), std::move(paths)); + // Executes after PrecompileCachesFromInstalledPackages + m_notify_batch_game_action_cb = [this, paths]() mutable + { + ShowOptionalGamePreparations(tr("Success!"), tr("Successfully installed software from package(s)!"), std::move(paths)); + }; + + PrecompileCachesFromInstalledPackages(paths); }); } @@ -2368,8 +2374,7 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri #else QCheckBox* quick_check = new QCheckBox(tr("Add launcher shortcut(s)")); #endif - QCheckBox* precompile_check = new QCheckBox(tr("Precompile caches")); - QLabel* label = new QLabel(tr("%1\nWould you like to install shortcuts to the installed software and precompile caches? (%2 new software detected)\n\n").arg(message).arg(bootable_paths.size()), dlg); + QLabel* label = new QLabel(tr("%1\nWould you like to install shortcuts to the installed software? (%2 new software detected)\n\n").arg(message).arg(bootable_paths.size()), dlg); vlayout->addWidget(label); vlayout->addStretch(10); @@ -2377,10 +2382,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri vlayout->addStretch(3); vlayout->addWidget(quick_check); vlayout->addStretch(3); - vlayout->addWidget(precompile_check); - vlayout->addStretch(3); - - precompile_check->setToolTip(tr("Spend time building data needed for game boot now instead of at launch.")); QDialogButtonBox* btn_box = new QDialogButtonBox(QDialogButtonBox::Ok); @@ -2391,7 +2392,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri { const bool create_desktop_shortcuts = desk_check->isChecked(); const bool create_app_shortcut = quick_check->isChecked(); - const bool create_caches = precompile_check->isChecked(); dlg->hide(); dlg->accept(); @@ -2411,12 +2411,11 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri locations.insert(gui::utils::shortcut_location::applications); } - if (locations.empty() && !create_caches) + if (locations.empty()) { return; } - std::vector game_data; std::vector game_data_shortcuts; for (const auto& [boot_path, title_id] : paths) @@ -2431,11 +2430,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri { game_data_shortcuts.push_back(gameinfo); } - - if (create_caches) - { - game_data.push_back(gameinfo); - } } break; @@ -2447,17 +2441,39 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri { m_game_list_frame->CreateShortcuts(game_data_shortcuts, locations); } - - if (!game_data.empty()) - { - m_game_list_frame->BatchCreateCPUCaches(game_data); - } }); dlg->setAttribute(Qt::WA_DeleteOnClose); dlg->open(); } + +void main_window::PrecompileCachesFromInstalledPackages(const std::map& bootable_paths) +{ + std::vector game_data; + + for (const auto& [boot_path, title_id] : bootable_paths) + { + for (const game_info& gameinfo : m_game_list_frame->GetGameInfo()) + { + if (gameinfo && gameinfo->info.serial == title_id.toStdString()) + { + if (Emu.IsPathInsideDir(boot_path, gameinfo->info.path)) + { + game_data.push_back(gameinfo); + } + + break; + } + } + } + + if (!game_data.empty()) + { + m_game_list_frame->BatchCreateCPUCaches(game_data, true); + } +} + void main_window::CreateActions() { ui->exitAct->setShortcuts(QKeySequence::Quit); @@ -3401,6 +3417,15 @@ void main_window::CreateConnects() connect(ui->mw_searchbar, &QLineEdit::textChanged, m_game_list_frame, &game_list_frame::SetSearchText); connect(ui->mw_searchbar, &QLineEdit::returnPressed, m_game_list_frame, &game_list_frame::FocusAndSelectFirstEntryIfNoneIs); connect(m_game_list_frame, &game_list_frame::FocusToSearchBar, this, [this]() { ui->mw_searchbar->setFocus(); }); + + connect(m_game_list_frame, &game_list_frame::NotifyBatchedGameActionFinished, this, [this]() mutable + { + if (m_notify_batch_game_action_cb) + { + m_notify_batch_game_action_cb(); + m_notify_batch_game_action_cb = {}; + } + }); } void main_window::CreateDockWindows() diff --git a/rpcs3/rpcs3qt/main_window.h b/rpcs3/rpcs3qt/main_window.h index c712d01fd3..4e5b498587 100644 --- a/rpcs3/rpcs3qt/main_window.h +++ b/rpcs3/rpcs3qt/main_window.h @@ -48,6 +48,7 @@ class main_window : public QMainWindow bool m_save_slider_pos = false; bool m_requested_show_logs_on_exit = false; int m_other_slider_pos = 0; + std::function m_notify_batch_game_action_cb; QIcon m_app_icon; QIcon m_icon_play; @@ -141,6 +142,7 @@ private: void CreateDockWindows(); void EnableMenus(bool enabled) const; void ShowTitleBars(bool show) const; + void PrecompileCachesFromInstalledPackages(const std::map& bootable_paths); void ShowOptionalGamePreparations(const QString& title, const QString& message, std::map game_path); static bool InstallFileInExData(const std::string& extension, const QString& path, const std::string& filename); From c48ae344a8a2b5266eb2a733a53ccd2af4b8f28b Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 29 Nov 2025 07:43:50 +0200 Subject: [PATCH 41/42] SPU LLVM (Mega): Fix quarter of missing jumptable discoveries --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index e3455fcb2d..2a792f677e 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -3165,6 +3165,15 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + if (target >= SPU_LS_SIZE && target <= 0u - SPU_LS_SIZE) + { + if (g_spu_itype.decode(target) != spu_itype::UNK) + { + // End of jumptable: valid instruction + break; + } + } + if (target >= lsa && target < SPU_LS_SIZE) { // Possible jump table entry (absolute) From 4a042ae84ff43d35f631b62c20ab69a7934e862c Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 29 Nov 2025 16:38:00 +0200 Subject: [PATCH 42/42] SPU/config: Make SPU LLVM Mega block size default --- rpcs3/Emu/system_config.h | 2 +- rpcs3/rpcs3qt/emu_settings.cpp | 6 +++--- rpcs3/rpcs3qt/emu_settings_type.h | 2 +- rpcs3/rpcs3qt/tooltips.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index c7b5e8e0fb..18f11da896 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -36,7 +36,7 @@ struct cfg_root : cfg::node cfg::_int<0, 16> spu_delay_penalty{ this, "SPU delay penalty", 3 }; // Number of milliseconds to block a thread if a virtual 'core' isn't free cfg::_bool spu_loop_detection{ this, "SPU loop detection", false }; // Try to detect wait loops and trigger thread yield cfg::_int<1, 6> max_spurs_threads{ this, "Max SPURS Threads", 6, true }; // HACK. If less then 6, max number of running SPURS threads in each thread group. - cfg::_enum spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe }; + cfg::_enum spu_block_size{ this, "SPU Analyzer Block Size", spu_block_size_type::mega }; cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false }; cfg::_bool spu_accurate_reservations{ this, "Accurate SPU Reservations", true }; cfg::_bool accurate_cache_line_stores{ this, "Accurate Cache Line Stores", false }; diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp index 0c99cfc119..bba3b0b235 100644 --- a/rpcs3/rpcs3qt/emu_settings.cpp +++ b/rpcs3/rpcs3qt/emu_settings.cpp @@ -975,9 +975,9 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_ case emu_settings_type::SPUBlockSize: switch (static_cast(index)) { - case spu_block_size_type::safe: return tr("Safe", "SPU block size"); - case spu_block_size_type::mega: return tr("Mega", "SPU block size"); - case spu_block_size_type::giga: return tr("Giga", "SPU block size"); + case spu_block_size_type::safe: return tr("Safe", "SPU Analyzer Block Size"); + case spu_block_size_type::mega: return tr("Mega", "SPU Analyzer Block Size"); + case spu_block_size_type::giga: return tr("Giga", "SPU Analyzer Block Size"); } break; case emu_settings_type::ThreadSchedulerMode: diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h index c398b07a28..d90aa7c862 100644 --- a/rpcs3/rpcs3qt/emu_settings_type.h +++ b/rpcs3/rpcs3qt/emu_settings_type.h @@ -239,7 +239,7 @@ inline static const std::map settings_location { emu_settings_type::XFloatAccuracy, { "Core", "XFloat Accuracy"}}, { emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}}, { emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}}, - { emu_settings_type::SPUBlockSize, { "Core", "SPU Block Size"}}, + { emu_settings_type::SPUBlockSize, { "Core", "SPU Analyzer Block Size"}}, { emu_settings_type::SPUCache, { "Core", "SPU Cache"}}, { emu_settings_type::DebugConsoleMode, { "Core", "Debug Console Mode"}}, { emu_settings_type::MaxSPURSThreads, { "Core", "Max SPURS Threads"}}, diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h index f7ec927332..3b84a66270 100644 --- a/rpcs3/rpcs3qt/tooltips.h +++ b/rpcs3/rpcs3qt/tooltips.h @@ -91,7 +91,7 @@ public: const QString xfloat = tr("Control accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Dynamic or LLVM."); const QString enable_thread_scheduler = tr("Control how RPCS3 utilizes the threads of your system.\nEach option heavily depends on the game and on your CPU. It's recommended to try each option to find out which performs the best.\nChanging the thread scheduler is not supported on CPUs with less than 12 threads."); const QString spu_loop_detection = tr("Try to detect loop conditions in SPU kernels and use them as scheduling hints.\nImproves performance and reduces CPU usage.\nMay cause severe audio stuttering in rare cases."); - const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility."); + const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility at the cost of lower performance."); const QString preferred_spu_threads = tr("Some SPU stages are sensitive to race conditions and allowing a limited number at a time helps alleviate performance stalls.\nSetting this to a smaller value might improve performance and reduce stuttering in some games.\nLeave this on auto if performance is negatively affected when setting a small value."); const QString max_cpu_preempt = tr("Reduces CPU usage and power consumption, improving battery life on mobile devices. (0 means disabled)\nHigher values cause a more pronounced effect, but may cause audio or performance issues. A value of 50 or less is recommended.\nThis option forces an FPS limit because it's active when framerate is stable.\nThe lighter the game is on the hardware, the more power is saved by it. (until the preemption count barrier is reached)");