From 3c5c74c4965f6865b327f9b1e51ab2f6b2deed4c Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sun, 30 Nov 2025 08:26:30 +0200 Subject: [PATCH 1/4] SPU Cache debug --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index c9d784d3ac..d140b4a149 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -720,9 +720,19 @@ void spu_cache::initialize(bool build_existing_cache) } // SPU cache file (version + block size type) - const std::string loc = ppu_cache + "spu-" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v1-tane.dat"; + const std::string filename = "spu-" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v1-tane.dat"; + const std::string loc = ppu_cache + filename; + const std::string loc_debug = fs::get_cache_dir() + "DEBUG/" + filename; - spu_cache cache(loc); + bool is_debug = false; + + if (fs::is_file(loc_debug)) + { + spu_log.success("SPU Cache override applied!"); + is_debug = true; + } + + spu_cache cache(is_debug ? loc_debug : loc); if (!cache) { From 70019404837c05b9a8f995814d2b5b79c7c92578 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sun, 30 Nov 2025 08:29:56 +0200 Subject: [PATCH 2/4] SPU Analyzer: Be more strict with loads --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index d140b4a149..1a9b30ed9c 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -5018,7 +5018,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s ls_invalid = true; ls_write |= write; - if (write) + if (ls_write) { return discard(); } From d822d85ea11081c1e9473dd094344ae25af8a702 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sun, 30 Nov 2025 15:11:16 +0200 Subject: [PATCH 3/4] SPU: Tame PUTLLC16 --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 45 ++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 1a9b30ed9c..4186c613af 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -4973,6 +4973,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 lsa_last_pc = SPU_LS_SIZE; // PC of first LSA write u32 get_pc = SPU_LS_SIZE; // PC of GETLLAR u32 put_pc = SPU_LS_SIZE; // PC of PUTLLC + u32 rdatomic_pc = SPU_LS_SIZE; // PC of last RdAtomcStat read reg_state_t ls{}; // state of LS load/store address register reg_state_t ls_offs = reg_state_t::from_value(0); // Added value to ls reg_state_t lsa{}; // state of LSA register on GETLLAR @@ -6333,6 +6334,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + atomic16->rdatomic_pc = pos; + const auto it = atomic16_all.find(pos); if (it == atomic16_all.end()) @@ -7273,7 +7276,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (const auto& [pc_commited, pattern] : atomic16_all) { - if (!pattern.active) + if (!pattern.active || pattern.lsa_pc >= pattern.rdatomic_pc) { continue; } @@ -7283,6 +7286,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s continue; } + std::string pattern_hash; + { + sha1_context ctx; + u8 output[20]{}; + + sha1_starts(&ctx); + sha1_update(&ctx, reinterpret_cast(result.data.data()) + (pattern.lsa_pc - result.lower_bound), pattern.rdatomic_pc - pattern.lsa_pc); + sha1_finish(&ctx, output); + fmt::append(pattern_hash, "%s", fmt::base57(output)); + } + union putllc16_or_0_info { u64 data; @@ -7373,16 +7387,35 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s value.reg2 = pattern.reg2; } + bool allow_pattern = true; + if (g_cfg.core.spu_accurate_reservations) { - // Because enabling it is a hack, as it turns out - // continue; + // The problem with PUTLLC16 optimization, that it is in theory correct at the bounds of the spu function. + // But if the SPU code reuses the cache line data observed, it is not truly atomic. + // So we may enable it only for known cases where SPU atomic data is not used after the function leaves. + + // So the two options are: + + // 1. Atomic compare exchange 16 bytes operation. (rest of data is not read) -> good for RPCS3 to optimize. + // 2. Fetch 128 bytes (read them later), modify only 16 bytes. -> Bad for RPCS3 to optimize. + + // This difference cannot be known at analyzer time but from observing callers. + static constexpr std::initializer_list allowed_patterns = + { + "620oYSe8uQqq9eTkhWfMqoEXX0us"sv, // CellSpurs JobChain acquire pattern + }; + + allow_pattern = std::any_of(allowed_patterns.begin(), allowed_patterns.end(), FN(pattern_hash == x)); } - add_pattern(false, inst_attr::putllc16, pattern.put_pc - result.entry_point, value.data); + if (allow_pattern) + { + add_pattern(false, inst_attr::putllc16, pattern.put_pc - result.entry_point, value.data); + } - spu_log.success("PUTLLC16 Pattern Detected! (mem_count=%d, put_pc=0x%x, pc_rel=%d, offset=0x%x, const=%u, two_regs=%d, reg=%u, runtime=%d, 0x%x-%s) (putllc0=%d, putllc16+0=%d, all=%d)" - , pattern.mem_count, pattern.put_pc, value.type == v_relative, value.off18, value.type == v_const, value.type == v_reg2, value.reg, value.runtime16_select, entry_point, func_hash, +stats.nowrite, ++stats.single, +stats.all); + spu_log.success("PUTLLC16 Pattern Detected! (mem_count=%d, put_pc=0x%x, pc_rel=%d, offset=0x%x, const=%u, two_regs=%d, reg=%u, runtime=%d, 0x%x-%s, pattern-hash=%s) (putllc0=%d, putllc16+0=%d, all=%d)" + , pattern.mem_count, pattern.put_pc, value.type == v_relative, value.off18, value.type == v_const, value.type == v_reg2, value.reg, value.runtime16_select, entry_point, func_hash, pattern_hash, +stats.nowrite, ++stats.single, +stats.all); } for (const auto& [read_pc, pattern] : rchcnt_loop_all) From 4bda2f9b0f81557386ff935c944a596e3c5c1ae1 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sun, 30 Nov 2025 16:43:43 +0200 Subject: [PATCH 4/4] Test: Disable PUTLLC0 --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 4186c613af..d23903997f 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -7319,8 +7319,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s value.required_pc = pattern.required_pc; } - spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all); - add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa, value.data); + // spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all); + // add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa, value.data); continue; }