mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-06 07:12:28 +01:00
SPU LLVM: Permit relative-PC in PUTLLC16 with alignment check
This commit is contained in:
parent
27c2f2ae4d
commit
e2da6d36ba
|
|
@ -4969,6 +4969,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
bool select_16_or_0_at_runtime = false;
|
||||
bool put_active = false; // PUTLLC happened
|
||||
bool get_rdatomic = false; // True if MFC_RdAtomicStat was read after GETLLAR
|
||||
u32 required_pc = SPU_LS_SIZE; // Require program to be location specific for this optimization (SPU_LS_SIZE - no requirement)
|
||||
u32 mem_count = 0;
|
||||
u32 break_cause = 100;
|
||||
u32 break_pc = SPU_LS_SIZE;
|
||||
|
|
@ -6375,6 +6376,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
existing.ls_invalid |= atomic16->ls_invalid;
|
||||
existing.ls_access |= atomic16->ls_access;
|
||||
existing.mem_count = std::max<u32>(existing.mem_count, atomic16->mem_count);
|
||||
existing.required_pc = std::min<u32>(existing.required_pc, atomic16->required_pc);
|
||||
existing.select_16_or_0_at_runtime |= atomic16->select_16_or_0_at_runtime;
|
||||
}
|
||||
|
||||
|
|
@ -6477,6 +6479,10 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
|
||||
// Do not clear lower 16 bytes addressing because the program can move on 4-byte basis
|
||||
const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16);
|
||||
const u32 true_offs = spu_branch_target(pos, op.si16);
|
||||
|
||||
// Make this optimization depend on the location of the program
|
||||
atomic16->required_pc = result.lower_bound;
|
||||
|
||||
if (atomic16->lsa.is_const() && [&]()
|
||||
{
|
||||
|
|
@ -6501,6 +6507,10 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
{
|
||||
// Ignore memory access in this case
|
||||
}
|
||||
else if (atomic16->lsa.is_const() && !atomic16->lsa.compare_with_mask_indifference(true_offs, SPU_LS_MASK_128))
|
||||
{
|
||||
// Same
|
||||
}
|
||||
else if (atomic16->ls_invalid && is_store)
|
||||
{
|
||||
break_putllc16(35, atomic16->set_invalid_ls(is_store));
|
||||
|
|
@ -7254,26 +7264,32 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
continue;
|
||||
}
|
||||
|
||||
union putllc16_or_0_info
|
||||
{
|
||||
u64 data;
|
||||
bf_t<u64, 32, 18> required_pc;
|
||||
bf_t<u64, 30, 2> type;
|
||||
bf_t<u64, 29, 1> runtime16_select;
|
||||
bf_t<u64, 28, 1> no_notify;
|
||||
bf_t<u64, 18, 8> reg;
|
||||
bf_t<u64, 0, 18> off18;
|
||||
bf_t<u64, 0, 8> reg2;
|
||||
} value{};
|
||||
|
||||
auto& stats = g_fxo->get<putllc16_statistics_t>();
|
||||
had_putllc_evaluation = true;
|
||||
|
||||
if (!pattern.ls_write)
|
||||
{
|
||||
spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all);
|
||||
add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa);
|
||||
continue;
|
||||
if (pattern.required_pc != SPU_LS_SIZE)
|
||||
{
|
||||
value.required_pc = pattern.required_pc;
|
||||
}
|
||||
|
||||
union putllc16_info
|
||||
{
|
||||
u32 data;
|
||||
bf_t<u32, 30, 2> type;
|
||||
bf_t<u32, 29, 1> runtime16_select;
|
||||
bf_t<u32, 28, 1> no_notify;
|
||||
bf_t<u32, 18, 8> reg;
|
||||
bf_t<u32, 0, 18> off18;
|
||||
bf_t<u32, 0, 8> reg2;
|
||||
} value{};
|
||||
spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all);
|
||||
add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa, value.data);
|
||||
continue;
|
||||
}
|
||||
|
||||
enum : u32
|
||||
{
|
||||
|
|
@ -7305,6 +7321,11 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
value.runtime16_select = pattern.select_16_or_0_at_runtime;
|
||||
value.reg = s_reg_max;
|
||||
|
||||
if (pattern.required_pc != SPU_LS_SIZE)
|
||||
{
|
||||
value.required_pc = pattern.required_pc;
|
||||
}
|
||||
|
||||
if (pattern.ls.is_const())
|
||||
{
|
||||
ensure(pattern.reg == s_reg_max && pattern.reg2 == s_reg_max && pattern.ls_offs.is_const(), "Unexpected register usage");
|
||||
|
|
@ -7360,7 +7381,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
|
||||
if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none)
|
||||
{
|
||||
add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point);
|
||||
add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point, 0);
|
||||
|
||||
spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash);
|
||||
}
|
||||
|
|
@ -8445,19 +8466,10 @@ std::array<reg_state_t, s_reg_max>& block_reg_info::evaluate_start_state(const s
|
|||
return walkby_state;
|
||||
}
|
||||
|
||||
void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end)
|
||||
void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info)
|
||||
{
|
||||
if (end == umax)
|
||||
{
|
||||
end = start;
|
||||
}
|
||||
|
||||
m_patterns[start] = pattern_info{utils::address_range32::start_end(start, end)};
|
||||
|
||||
for (u32 i = start; i <= (fill_all ? end : start); i += 4)
|
||||
{
|
||||
m_inst_attrs[i / 4] = attr;
|
||||
}
|
||||
m_patterns[start] = pattern_info{info};
|
||||
m_inst_attrs[start / 4] = attr;
|
||||
}
|
||||
|
||||
extern std::string format_spu_func_info(u32 addr, cpu_thread* spu)
|
||||
|
|
|
|||
|
|
@ -1080,7 +1080,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
m_ir->SetInsertPoint(_body);
|
||||
}
|
||||
|
||||
void putllc16_pattern(const spu_program& /*prog*/, utils::address_range32 range)
|
||||
void putllc16_pattern(const spu_program& /*prog*/, u64 pattern_info)
|
||||
{
|
||||
// Prevent store elimination
|
||||
m_block->store_context_ctr[s_reg_mfc_eal]++;
|
||||
|
|
@ -1109,16 +1109,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
}
|
||||
};
|
||||
|
||||
const union putllc16_info
|
||||
const union putllc16_or_0_info
|
||||
{
|
||||
u32 data;
|
||||
bf_t<u32, 30, 2> type;
|
||||
bf_t<u32, 29, 1> runtime16_select;
|
||||
bf_t<u32, 28, 1> no_notify;
|
||||
bf_t<u32, 18, 8> reg;
|
||||
bf_t<u32, 0, 18> off18;
|
||||
bf_t<u32, 0, 8> reg2;
|
||||
} info = std::bit_cast<putllc16_info>(range.end);
|
||||
u64 data;
|
||||
bf_t<u64, 32, 18> required_pc;
|
||||
bf_t<u64, 30, 2> type;
|
||||
bf_t<u64, 29, 1> runtime16_select;
|
||||
bf_t<u64, 28, 1> no_notify;
|
||||
bf_t<u64, 18, 8> reg;
|
||||
bf_t<u64, 0, 18> off18;
|
||||
bf_t<u64, 0, 8> reg2;
|
||||
} info = std::bit_cast<putllc16_or_0_info>(pattern_info);
|
||||
|
||||
enum : u32
|
||||
{
|
||||
|
|
@ -1150,8 +1151,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
value_t<u32> eal_val;
|
||||
eal_val.value = _eal;
|
||||
|
||||
auto get_reg32 = [&](u32 reg)
|
||||
auto get_reg32 = [&](u64 reg_)
|
||||
{
|
||||
const u32 reg = static_cast<u32>(reg_);
|
||||
|
||||
if (get_reg_type(reg) != get_type<u32[4]>())
|
||||
{
|
||||
return get_reg_fixed(reg, get_type<u32>());
|
||||
|
|
@ -1170,6 +1173,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
}
|
||||
else if (info.type == v_relative)
|
||||
{
|
||||
if (info.required_pc && info.required_pc != SPU_LS_SIZE)
|
||||
{
|
||||
const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc16_short_op", m_function);
|
||||
const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc16_heavy_op", m_function);
|
||||
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op);
|
||||
m_ir->SetInsertPoint(heavy_op);
|
||||
update_pc();
|
||||
call("spu_exec_mfc_cmd", &exec_mfc_cmd<false>, m_thread);
|
||||
m_ir->CreateBr(_final);
|
||||
m_ir->SetInsertPoint(short_op);
|
||||
}
|
||||
|
||||
dest = m_ir->CreateAnd(get_pc(spu_branch_target(info.off18 + m_base)), 0x3fff0);
|
||||
}
|
||||
else if (info.type == v_reg_offs)
|
||||
|
|
@ -1373,7 +1389,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
m_ir->SetInsertPoint(_final);
|
||||
}
|
||||
|
||||
void putllc0_pattern(const spu_program& /*prog*/, utils::address_range32 /*range*/)
|
||||
void putllc0_pattern(const spu_program& /*prog*/, u64 pattern_info)
|
||||
{
|
||||
// Prevent store elimination
|
||||
m_block->store_context_ctr[s_reg_mfc_eal]++;
|
||||
|
|
@ -1401,6 +1417,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
}
|
||||
};
|
||||
|
||||
const union putllc16_or_0_info
|
||||
{
|
||||
u64 data;
|
||||
bf_t<u64, 32, 18> required_pc;
|
||||
bf_t<u64, 30, 2> type;
|
||||
bf_t<u64, 29, 1> runtime16_select;
|
||||
bf_t<u64, 28, 1> no_notify;
|
||||
bf_t<u64, 18, 8> reg;
|
||||
bf_t<u64, 0, 18> off18;
|
||||
bf_t<u64, 0, 8> reg2;
|
||||
} info = std::bit_cast<putllc16_or_0_info>(pattern_info);
|
||||
|
||||
const auto _next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto _next0 = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto _fail = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
|
|
@ -1409,6 +1437,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
const auto _eal = (get_reg_fixed<u32>(s_reg_mfc_eal) & -128).eval(m_ir);
|
||||
const auto _raddr = m_ir->CreateLoad(get_type<u32>(), spu_ptr(&spu_thread::raddr));
|
||||
|
||||
if (info.required_pc && info.required_pc != SPU_LS_SIZE)
|
||||
{
|
||||
const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc0_short_op", m_function);
|
||||
const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc0_heavy_op", m_function);
|
||||
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op);
|
||||
m_ir->SetInsertPoint(heavy_op);
|
||||
update_pc();
|
||||
call("spu_exec_mfc_cmd", &exec_mfc_cmd<false>, m_thread);
|
||||
m_ir->CreateBr(_final);
|
||||
m_ir->SetInsertPoint(short_op);
|
||||
}
|
||||
|
||||
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpEQ(_eal, _raddr), m_ir->CreateIsNotNull(_raddr)), _next, _fail, m_md_likely);
|
||||
m_ir->SetInsertPoint(_next);
|
||||
|
||||
|
|
@ -2143,12 +2184,12 @@ public:
|
|||
{
|
||||
case inst_attr::putllc0:
|
||||
{
|
||||
putllc0_pattern(func, m_patterns.at(m_pos - start).range);
|
||||
putllc0_pattern(func, m_patterns.at(m_pos - start).info);
|
||||
continue;
|
||||
}
|
||||
case inst_attr::putllc16:
|
||||
{
|
||||
putllc16_pattern(func, m_patterns.at(m_pos - start).range);
|
||||
putllc16_pattern(func, m_patterns.at(m_pos - start).info);
|
||||
continue;
|
||||
}
|
||||
case inst_attr::omit:
|
||||
|
|
|
|||
|
|
@ -397,12 +397,12 @@ protected:
|
|||
|
||||
struct pattern_info
|
||||
{
|
||||
utils::address_range32 range;
|
||||
u64 info;
|
||||
};
|
||||
|
||||
std::unordered_map<u32, pattern_info> m_patterns;
|
||||
|
||||
void add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end = -1);
|
||||
void add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info);
|
||||
|
||||
private:
|
||||
// For private use
|
||||
|
|
|
|||
Loading…
Reference in a new issue