From e2da6d36ba714c0d3106482211fa7efddc0f4074 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 22 Nov 2025 11:23:05 +0200 Subject: [PATCH] SPU LLVM: Permit relative-PC in PUTLLC16 with alignment check --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 62 +++++++++++++---------- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 69 ++++++++++++++++++++------ rpcs3/Emu/Cell/SPURecompiler.h | 4 +- 3 files changed, 94 insertions(+), 41 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 74a8f0e46b..15fad98e7a 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -4969,6 +4969,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bool select_16_or_0_at_runtime = false; bool put_active = false; // PUTLLC happened bool get_rdatomic = false; // True if MFC_RdAtomicStat was read after GETLLAR + u32 required_pc = SPU_LS_SIZE; // Require program to be location specific for this optimization (SPU_LS_SIZE - no requirement) u32 mem_count = 0; u32 break_cause = 100; u32 break_pc = SPU_LS_SIZE; @@ -6375,6 +6376,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s existing.ls_invalid |= atomic16->ls_invalid; existing.ls_access |= atomic16->ls_access; existing.mem_count = std::max(existing.mem_count, atomic16->mem_count); + existing.required_pc = std::min(existing.required_pc, atomic16->required_pc); existing.select_16_or_0_at_runtime |= atomic16->select_16_or_0_at_runtime; } @@ -6477,6 +6479,10 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Do not clear lower 16 bytes addressing because the program can move on 4-byte basis const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16); + const u32 true_offs = spu_branch_target(pos, op.si16); + + // Make this optimization depend on the location of the program + atomic16->required_pc = result.lower_bound; if (atomic16->lsa.is_const() && [&]() { @@ -6501,6 +6507,10 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { // Ignore memory access in this case } + else if (atomic16->lsa.is_const() && !atomic16->lsa.compare_with_mask_indifference(true_offs, SPU_LS_MASK_128)) + { + // Same + } else if (atomic16->ls_invalid && is_store) { break_putllc16(35, atomic16->set_invalid_ls(is_store)); @@ -7254,27 +7264,33 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s continue; } + union putllc16_or_0_info + { + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } value{}; + auto& stats = g_fxo->get(); had_putllc_evaluation = true; if (!pattern.ls_write) { + if (pattern.required_pc != SPU_LS_SIZE) + { + value.required_pc = pattern.required_pc; + } + spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all); - add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa); + add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa, value.data); continue; } - union putllc16_info - { - u32 data; - bf_t type; - bf_t runtime16_select; - bf_t no_notify; - bf_t reg; - bf_t off18; - bf_t reg2; - } value{}; - enum : u32 { v_const = 0, @@ -7305,6 +7321,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s value.runtime16_select = pattern.select_16_or_0_at_runtime; value.reg = s_reg_max; + if (pattern.required_pc != SPU_LS_SIZE) + { + value.required_pc = pattern.required_pc; + } + if (pattern.ls.is_const()) { ensure(pattern.reg == s_reg_max && pattern.reg2 == s_reg_max && pattern.ls_offs.is_const(), "Unexpected register usage"); @@ -7360,7 +7381,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none) { - add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point); + add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point, 0); spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash); } @@ -8445,19 +8466,10 @@ std::array& block_reg_info::evaluate_start_state(const s return walkby_state; } -void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end) +void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info) { - if (end == umax) - { - end = start; - } - - m_patterns[start] = pattern_info{utils::address_range32::start_end(start, end)}; - - for (u32 i = start; i <= (fill_all ? end : start); i += 4) - { - m_inst_attrs[i / 4] = attr; - } + m_patterns[start] = pattern_info{info}; + m_inst_attrs[start / 4] = attr; } extern std::string format_spu_func_info(u32 addr, cpu_thread* spu) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 8b820ca600..33489d1b74 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -1080,7 +1080,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(_body); } - void putllc16_pattern(const spu_program& /*prog*/, utils::address_range32 range) + void putllc16_pattern(const spu_program& /*prog*/, u64 pattern_info) { // Prevent store elimination m_block->store_context_ctr[s_reg_mfc_eal]++; @@ -1109,16 +1109,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } }; - const union putllc16_info + const union putllc16_or_0_info { - u32 data; - bf_t type; - bf_t runtime16_select; - bf_t no_notify; - bf_t reg; - bf_t off18; - bf_t reg2; - } info = std::bit_cast(range.end); + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } info = std::bit_cast(pattern_info); enum : u32 { @@ -1150,8 +1151,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator value_t eal_val; eal_val.value = _eal; - auto get_reg32 = [&](u32 reg) + auto get_reg32 = [&](u64 reg_) { + const u32 reg = static_cast(reg_); + if (get_reg_type(reg) != get_type()) { return get_reg_fixed(reg, get_type()); @@ -1170,6 +1173,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } else if (info.type == v_relative) { + if (info.required_pc && info.required_pc != SPU_LS_SIZE) + { + const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc16_short_op", m_function); + const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc16_heavy_op", m_function); + + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op); + m_ir->SetInsertPoint(heavy_op); + update_pc(); + call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread); + m_ir->CreateBr(_final); + m_ir->SetInsertPoint(short_op); + } + dest = m_ir->CreateAnd(get_pc(spu_branch_target(info.off18 + m_base)), 0x3fff0); } else if (info.type == v_reg_offs) @@ -1373,7 +1389,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(_final); } - void putllc0_pattern(const spu_program& /*prog*/, utils::address_range32 /*range*/) + void putllc0_pattern(const spu_program& /*prog*/, u64 pattern_info) { // Prevent store elimination m_block->store_context_ctr[s_reg_mfc_eal]++; @@ -1401,6 +1417,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } }; + const union putllc16_or_0_info + { + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } info = std::bit_cast(pattern_info); + const auto _next = llvm::BasicBlock::Create(m_context, "", m_function); const auto _next0 = llvm::BasicBlock::Create(m_context, "", m_function); const auto _fail = llvm::BasicBlock::Create(m_context, "", m_function); @@ -1409,6 +1437,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto _eal = (get_reg_fixed(s_reg_mfc_eal) & -128).eval(m_ir); const auto _raddr = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::raddr)); + if (info.required_pc && info.required_pc != SPU_LS_SIZE) + { + const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc0_short_op", m_function); + const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc0_heavy_op", m_function); + + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op); + m_ir->SetInsertPoint(heavy_op); + update_pc(); + call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread); + m_ir->CreateBr(_final); + m_ir->SetInsertPoint(short_op); + } + m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpEQ(_eal, _raddr), m_ir->CreateIsNotNull(_raddr)), _next, _fail, m_md_likely); m_ir->SetInsertPoint(_next); @@ -2143,12 +2184,12 @@ public: { case inst_attr::putllc0: { - putllc0_pattern(func, m_patterns.at(m_pos - start).range); + putllc0_pattern(func, m_patterns.at(m_pos - start).info); continue; } case inst_attr::putllc16: { - putllc16_pattern(func, m_patterns.at(m_pos - start).range); + putllc16_pattern(func, m_patterns.at(m_pos - start).info); continue; } case inst_attr::omit: diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index ddee888b1e..6bddb5a035 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -397,12 +397,12 @@ protected: struct pattern_info { - utils::address_range32 range; + u64 info; }; std::unordered_map m_patterns; - void add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end = -1); + void add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info); private: // For private use