Merge branch 'master' into nastys-patch-17

This commit is contained in:
nastys 2025-11-29 18:54:36 +01:00 committed by GitHub
commit 173edea60c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 1714 additions and 467 deletions

View file

@ -394,7 +394,7 @@ namespace fmt
}
#if !defined(_MSC_VER) || defined(__clang__)
[[noreturn]] ~throw_exception();
[[noreturn]] ~throw_exception() = default;
#endif
};

View file

@ -192,6 +192,7 @@ if(BUILD_RPCS3_TESTS)
tests/test_tuple.cpp
tests/test_simple_array.cpp
tests/test_address_range.cpp
tests/test_rsx_cfg.cpp
)
target_link_libraries(rpcs3_test

View file

@ -157,7 +157,7 @@ std::array<u8, PASSPHRASE_KEY_LEN> sc_combine_laid_paid(s64 laid, s64 paid)
{
const std::string paid_laid = fmt::format("%016llx%016llx", laid, paid);
std::array<u8, PASSPHRASE_KEY_LEN> out{};
hex_to_bytes(out.data(), paid_laid.c_str(), PASSPHRASE_KEY_LEN * 2);
hex_to_bytes(out.data(), paid_laid, PASSPHRASE_KEY_LEN * 2);
return out;
}

View file

@ -516,6 +516,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/Overlays/overlay_video.cpp
RSX/Overlays/Shaders/shader_loading_dialog.cpp
RSX/Overlays/Shaders/shader_loading_dialog_native.cpp
RSX/Program/Assembler/FPToCFG.cpp
RSX/Program/CgBinaryProgram.cpp
RSX/Program/CgBinaryFragmentProgram.cpp
RSX/Program/CgBinaryVertexProgram.cpp

View file

@ -3,7 +3,6 @@
#include "util/types.hpp"
#include "Emu/Memory/vm_ptr.h"
#include "Emu/Cell/ErrorCodes.h"
#include <mutex>
#include <vector>
#include <mutex>

View file

@ -3718,7 +3718,7 @@ extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release
#endif
}
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_modules)
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_modules, bool is_fast_compilation)
{
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
{
@ -4166,6 +4166,12 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
break;
}
if (is_fast_compilation)
{
// Skip overlays in fast mode
break;
}
if (!wait_for_memory())
{
// Emulation stopped
@ -4460,7 +4466,7 @@ extern void ppu_initialize()
progress_dialog.reset();
ppu_precompile(dir_queue, &module_list);
ppu_precompile(dir_queue, &module_list, false);
if (Emu.IsStopped())
{

View file

@ -416,7 +416,6 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
assert(ptr_inst->getResultElementType() == m_ir->getPtrTy());
const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst);
const auto faddr_int = m_ir->CreatePtrToInt(faddr, get_type<uptr>());
const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc;
const auto pos = m_ir->CreateShl(pos_32, 1);
const auto ptr = m_ir->CreatePtrAdd(m_exec, pos);
@ -427,7 +426,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type<u16>());
// Store to jumptable
m_ir->CreateStore(faddr_int, ptr);
m_ir->CreateStore(faddr, ptr);
m_ir->CreateStore(seg_val, seg_ptr);
// Increment index and branch back to loop
@ -596,7 +595,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
if (g_cfg.core.ppu_prof)
{
m_ir->CreateStore(m_ir->getInt32(target_last), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
m_ir->CreateStore(GetAddr(target_last - m_addr), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
}
}
}

View file

@ -3069,6 +3069,39 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
values[op.rt] = pos + 4;
}
const u32 pos_next = wa;
bool is_no_return = false;
if (pos_next >= lsa && pos_next < limit)
{
const u32 data_next = ls[pos_next / 4];
const auto type_next = g_spu_itype.decode(data_next);
const auto flag_next = g_spu_iflag.decode(data_next);
const auto op_next = spu_opcode_t{data_next};
if (!(type_next & spu_itype::zregmod) && !(type_next & spu_itype::branch))
{
if (auto iflags = g_spu_iflag.decode(data_next))
{
if (+flag_next & +spu_iflag::use_ra)
{
is_no_return = is_no_return || (op_next.ra >= 4 && op_next.ra < 10);
}
if (+flag_next & +spu_iflag::use_rb)
{
is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10);
}
if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc)
{
is_no_return = is_no_return || (op_next.ra >= 4 && op_next.rb < 10);
}
}
}
}
if (af & vf::is_const)
{
const u32 target = spu_branch_target(av);
@ -3105,7 +3138,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
limit = std::min<u32>(limit, target);
}
if (sl && g_cfg.core.spu_block_size != spu_block_size_type::safe)
if (!is_no_return && sl && g_cfg.core.spu_block_size != spu_block_size_type::safe)
{
m_ret_info[pos / 4 + 1] = true;
m_entry_info[pos / 4 + 1] = true;
@ -3122,7 +3155,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
u64 dabs = 0;
u64 drel = 0;
for (u32 i = start; i < limit; i += 4)
for (u32 i = start, abs_fail = 0, rel_fail = 0; i < limit; i += 4)
{
const u32 target = ls[i / 4];
@ -3132,16 +3165,39 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
break;
}
if (target >= SPU_LS_SIZE && target <= 0u - SPU_LS_SIZE)
{
if (g_spu_itype.decode(target) != spu_itype::UNK)
{
// End of jumptable: valid instruction
break;
}
}
if (target >= lsa && target < SPU_LS_SIZE)
{
// Possible jump table entry (absolute)
jt_abs.push_back(target);
if (!abs_fail)
{
jt_abs.push_back(target);
}
}
else
{
abs_fail++;
}
if (target + start >= lsa && target + start < SPU_LS_SIZE)
{
// Possible jump table entry (relative)
jt_rel.push_back(target + start);
if (!rel_fail)
{
jt_rel.push_back(target + start);
}
}
else
{
rel_fail++;
}
if (std::max(jt_abs.size(), jt_rel.size()) * 4 + start <= i)
@ -3153,6 +3209,35 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
}
}
for (usz i = 0; i < jt_abs.size(); i++)
{
if (jt_abs[i] == start + jt_abs.size() * 4)
{
// If jumptable contains absolute address of code start after the jumptable itself
// It is likely an absolute-type jumptable
bool is_good_conclusion = true;
// For verification: make sure there is none like this in relative table
for (u32 target : jt_rel)
{
if (target == start + jt_rel.size() * 4)
{
is_good_conclusion = false;
break;
}
}
if (is_good_conclusion)
{
jt_rel.clear();
}
break;
}
}
// Choose position after the jt as an anchor and compute the average distance
for (u32 target : jt_abs)
{
@ -3251,9 +3336,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
spu_log.notice("[0x%x] At 0x%x: ignoring indirect branch (SYNC)", entry_point, pos);
}
if (type == spu_itype::BI || sl)
if (type == spu_itype::BI || sl || is_no_return)
{
if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe)
if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe || is_no_return)
{
m_targets[pos];
}
@ -3290,9 +3375,42 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
break;
}
const u32 pos_next = wa;
bool is_no_return = false;
if (pos_next >= lsa && pos_next < limit)
{
const u32 data_next = ls[pos_next / 4];
const auto type_next = g_spu_itype.decode(data_next);
const auto flag_next = g_spu_iflag.decode(data_next);
const auto op_next = spu_opcode_t{data_next};
if (!(type_next & spu_itype::zregmod) && !(type_next & spu_itype::branch))
{
if (auto iflags = g_spu_iflag.decode(data_next))
{
if (+flag_next & +spu_iflag::use_ra)
{
is_no_return = is_no_return || (op_next.ra >= 4 && op_next.ra < 10);
}
if (+flag_next & +spu_iflag::use_rb)
{
is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10);
}
if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc)
{
is_no_return = is_no_return || (op_next.rc >= 4 && op_next.rc < 10);
}
}
}
}
m_targets[pos].push_back(target);
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
if (!is_no_return && g_cfg.core.spu_block_size != spu_block_size_type::safe)
{
m_ret_info[pos / 4 + 1] = true;
m_entry_info[pos / 4 + 1] = true;
@ -3300,7 +3418,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
add_block(pos + 4);
}
if (g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync)
if (!is_no_return && g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync)
{
m_entry_info[target / 4] = true;
add_block(target);
@ -4860,20 +4978,27 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
bool select_16_or_0_at_runtime = false;
bool put_active = false; // PUTLLC happened
bool get_rdatomic = false; // True if MFC_RdAtomicStat was read after GETLLAR
u32 required_pc = SPU_LS_SIZE; // Require program to be location specific for this optimization (SPU_LS_SIZE - no requirement)
u32 mem_count = 0;
u32 break_cause = 100;
u32 break_pc = SPU_LS_SIZE;
// Return old state for error reporting
atomic16_t discard()
{
const u32 pc = lsa_pc;
const u32 last_pc = lsa_last_pc;
const u32 cause = break_cause;
const u32 break_pos = break_pc;
const atomic16_t old = *this;
*this = atomic16_t{};
// Keep some members
lsa_pc = pc;
lsa_last_pc = last_pc;
this->lsa_pc = pc;
this->lsa_last_pc = last_pc;
this->break_cause = cause;
this->break_pc = break_pos;
return old;
}
@ -5080,15 +5205,17 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{
if (previous.active && likely_putllc_loop && getllar_starts.contains(previous.lsa_pc))
{
const bool is_first = !std::exchange(getllar_starts[previous.lsa_pc], true);
had_putllc_evaluation = true;
if (!is_first)
if (cause != 24)
{
atomic16->break_cause = cause;
atomic16->break_pc = pos;
return;
}
had_putllc_evaluation = true;
cause = atomic16->break_cause;
getllar_starts[previous.lsa_pc] = true;
g_fxo->get<putllc16_statistics_t>().breaking_reason[cause]++;
if (!spu_log.notice)
@ -5096,7 +5223,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
return;
}
std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", pos, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc);
std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", atomic16->break_pc, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc);
const auto values = sort_breakig_reasons(g_fxo->get<putllc16_statistics_t>().breaking_reason);
@ -6258,6 +6385,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
existing.ls_invalid |= atomic16->ls_invalid;
existing.ls_access |= atomic16->ls_access;
existing.mem_count = std::max<u32>(existing.mem_count, atomic16->mem_count);
existing.required_pc = std::min<u32>(existing.required_pc, atomic16->required_pc);
existing.select_16_or_0_at_runtime |= atomic16->select_16_or_0_at_runtime;
}
@ -6272,6 +6400,24 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
invalidate = false;
}
}
else if (atomic16->break_cause != 100 && atomic16->lsa_pc != SPU_LS_SIZE)
{
const auto it = atomic16_all.find(pos);
if (it == atomic16_all.end())
{
// Ensure future failure
atomic16_all.emplace(pos, *atomic16);
break_putllc16(24, FN(x.active = true, x)(as_rvalue(*atomic16)));
}
else if (it->second.active && atomic16->break_cause != 100)
{
it->second = *atomic16;
break_putllc16(24, FN(x.active = true, x)(as_rvalue(*atomic16)));
}
atomic16->break_cause = 100;
}
break;
}
@ -6342,6 +6488,10 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
// Do not clear lower 16 bytes addressing because the program can move on 4-byte basis
const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16);
const u32 true_offs = spu_branch_target(pos, op.si16);
// Make this optimization depend on the location of the program
atomic16->required_pc = result.lower_bound;
if (atomic16->lsa.is_const() && [&]()
{
@ -6366,6 +6516,10 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{
// Ignore memory access in this case
}
else if (atomic16->lsa.is_const() && !atomic16->lsa.compare_with_mask_indifference(true_offs, SPU_LS_MASK_128))
{
// Same
}
else if (atomic16->ls_invalid && is_store)
{
break_putllc16(35, atomic16->set_invalid_ls(is_store));
@ -7119,27 +7273,33 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
continue;
}
union putllc16_or_0_info
{
u64 data;
bf_t<u64, 32, 18> required_pc;
bf_t<u64, 30, 2> type;
bf_t<u64, 29, 1> runtime16_select;
bf_t<u64, 28, 1> no_notify;
bf_t<u64, 18, 8> reg;
bf_t<u64, 0, 18> off18;
bf_t<u64, 0, 8> reg2;
} value{};
auto& stats = g_fxo->get<putllc16_statistics_t>();
had_putllc_evaluation = true;
if (!pattern.ls_write)
{
if (pattern.required_pc != SPU_LS_SIZE)
{
value.required_pc = pattern.required_pc;
}
spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all);
add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa);
add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa, value.data);
continue;
}
union putllc16_info
{
u32 data;
bf_t<u32, 30, 2> type;
bf_t<u32, 29, 1> runtime16_select;
bf_t<u32, 28, 1> no_notify;
bf_t<u32, 18, 8> reg;
bf_t<u32, 0, 18> off18;
bf_t<u32, 0, 8> reg2;
} value{};
enum : u32
{
v_const = 0,
@ -7170,6 +7330,11 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
value.runtime16_select = pattern.select_16_or_0_at_runtime;
value.reg = s_reg_max;
if (pattern.required_pc != SPU_LS_SIZE)
{
value.required_pc = pattern.required_pc;
}
if (pattern.ls.is_const())
{
ensure(pattern.reg == s_reg_max && pattern.reg2 == s_reg_max && pattern.ls_offs.is_const(), "Unexpected register usage");
@ -7201,7 +7366,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (g_cfg.core.spu_accurate_reservations)
{
// Because enabling it is a hack, as it turns out
continue;
// continue;
}
add_pattern(false, inst_attr::putllc16, pattern.put_pc - result.entry_point, value.data);
@ -7225,7 +7390,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none)
{
add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point);
add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point, 0);
spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash);
}
@ -7241,6 +7406,26 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
// Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback
}
if (!m_patterns.empty())
{
std::string out_dump;
dump(result, out_dump);
spu_log.notice("Dump SPU Function with pattern(s):\n%s", out_dump);
}
for (u32 i = 0; i < result.data.size(); i++)
{
const be_t<u32> ls_val = ls[result.lower_bound / 4 + i];
if (result.data[i] && std::bit_cast<u32>(ls_val) != result.data[i])
{
std::string out_dump;
dump(result, out_dump);
spu_log.error("SPU Function Dump:\n%s", out_dump);
fmt::throw_exception("SPU Analyzer failed: Instruction mismatch at 0x%x [read: 0x%x vs LS: 0x%x] (i=0x%x)", result.lower_bound + i * 4, std::bit_cast<be_t<u32>>(result.data[i]), ls_val, i);
}
}
return result;
}
@ -8290,19 +8475,10 @@ std::array<reg_state_t, s_reg_max>& block_reg_info::evaluate_start_state(const s
return walkby_state;
}
void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end)
void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info)
{
if (end == umax)
{
end = start;
}
m_patterns[start] = pattern_info{utils::address_range32::start_end(start, end)};
for (u32 i = start; i <= (fill_all ? end : start); i += 4)
{
m_inst_attrs[i / 4] = attr;
}
m_patterns[start] = pattern_info{info};
m_inst_attrs[start / 4] = attr;
}
extern std::string format_spu_func_info(u32 addr, cpu_thread* spu)

View file

@ -1080,7 +1080,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_ir->SetInsertPoint(_body);
}
void putllc16_pattern(const spu_program& /*prog*/, utils::address_range32 range)
void putllc16_pattern(const spu_program& /*prog*/, u64 pattern_info)
{
// Prevent store elimination
m_block->store_context_ctr[s_reg_mfc_eal]++;
@ -1109,16 +1109,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}
};
const union putllc16_info
const union putllc16_or_0_info
{
u32 data;
bf_t<u32, 30, 2> type;
bf_t<u32, 29, 1> runtime16_select;
bf_t<u32, 28, 1> no_notify;
bf_t<u32, 18, 8> reg;
bf_t<u32, 0, 18> off18;
bf_t<u32, 0, 8> reg2;
} info = std::bit_cast<putllc16_info>(range.end);
u64 data;
bf_t<u64, 32, 18> required_pc;
bf_t<u64, 30, 2> type;
bf_t<u64, 29, 1> runtime16_select;
bf_t<u64, 28, 1> no_notify;
bf_t<u64, 18, 8> reg;
bf_t<u64, 0, 18> off18;
bf_t<u64, 0, 8> reg2;
} info = std::bit_cast<putllc16_or_0_info>(pattern_info);
enum : u32
{
@ -1150,8 +1151,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
value_t<u32> eal_val;
eal_val.value = _eal;
auto get_reg32 = [&](u32 reg)
auto get_reg32 = [&](u64 reg_)
{
const u32 reg = static_cast<u32>(reg_);
if (get_reg_type(reg) != get_type<u32[4]>())
{
return get_reg_fixed(reg, get_type<u32>());
@ -1170,6 +1173,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}
else if (info.type == v_relative)
{
if (info.required_pc && info.required_pc != SPU_LS_SIZE)
{
const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc16_short_op", m_function);
const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc16_heavy_op", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op);
m_ir->SetInsertPoint(heavy_op);
update_pc();
call("spu_exec_mfc_cmd", &exec_mfc_cmd<false>, m_thread);
m_ir->CreateBr(_final);
m_ir->SetInsertPoint(short_op);
}
dest = m_ir->CreateAnd(get_pc(spu_branch_target(info.off18 + m_base)), 0x3fff0);
}
else if (info.type == v_reg_offs)
@ -1268,17 +1284,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto _new = m_ir->CreateAlignedLoad(get_type<u128>(), _ptr(m_lsptr, dest), llvm::MaybeAlign{16});
const auto _rdata = m_ir->CreateAlignedLoad(get_type<u128>(), _ptr(spu_ptr(&spu_thread::rdata), m_ir->CreateAnd(diff, 0x70)), llvm::MaybeAlign{16});
const bool is_accurate_op = !!g_cfg.core.spu_accurate_reservations;
const bool is_accurate_op = true || !!g_cfg.core.spu_accurate_reservations;
const auto compare_data_change_res = is_accurate_op ? m_ir->getTrue() : m_ir->CreateICmpNE(_new, _rdata);
const auto compare_data_change_res = m_ir->CreateICmpNE(_new, _rdata);
const auto second_test_for_complete_op = is_accurate_op ? m_ir->getTrue() : compare_data_change_res;
if (info.runtime16_select)
{
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpULT(diff, m_ir->getInt64(128)), compare_data_change_res), _begin_op, _inc_res, m_md_likely);
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpULT(diff, m_ir->getInt64(128)), second_test_for_complete_op), _begin_op, _inc_res, m_md_likely);
}
else
{
m_ir->CreateCondBr(compare_data_change_res, _begin_op, _inc_res, m_md_unlikely);
m_ir->CreateCondBr(second_test_for_complete_op, _begin_op, _inc_res, m_md_unlikely);
}
m_ir->SetInsertPoint(_begin_op);
@ -1323,7 +1340,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (!info.no_notify)
{
const auto notify_block = llvm::BasicBlock::Create(m_context, "__putllc16_block_notify", m_function);
const auto notify_next = llvm::BasicBlock::Create(m_context, "__putllc16_block_notify_next", m_function);
m_ir->CreateCondBr(compare_data_change_res, notify_block, notify_next);
m_ir->SetInsertPoint(notify_block);
call("atomic_wait_engine::notify_all", static_cast<void(*)(const void*)>(atomic_wait_engine::notify_all), rptr);
m_ir->CreateBr(notify_next);
m_ir->SetInsertPoint(notify_next);
}
m_ir->CreateBr(_success);
@ -1373,7 +1397,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_ir->SetInsertPoint(_final);
}
void putllc0_pattern(const spu_program& /*prog*/, utils::address_range32 /*range*/)
void putllc0_pattern(const spu_program& /*prog*/, u64 pattern_info)
{
// Prevent store elimination
m_block->store_context_ctr[s_reg_mfc_eal]++;
@ -1401,6 +1425,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}
};
const union putllc16_or_0_info
{
u64 data;
bf_t<u64, 32, 18> required_pc;
bf_t<u64, 30, 2> type;
bf_t<u64, 29, 1> runtime16_select;
bf_t<u64, 28, 1> no_notify;
bf_t<u64, 18, 8> reg;
bf_t<u64, 0, 18> off18;
bf_t<u64, 0, 8> reg2;
} info = std::bit_cast<putllc16_or_0_info>(pattern_info);
const auto _next = llvm::BasicBlock::Create(m_context, "", m_function);
const auto _next0 = llvm::BasicBlock::Create(m_context, "", m_function);
const auto _fail = llvm::BasicBlock::Create(m_context, "", m_function);
@ -1409,6 +1445,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto _eal = (get_reg_fixed<u32>(s_reg_mfc_eal) & -128).eval(m_ir);
const auto _raddr = m_ir->CreateLoad(get_type<u32>(), spu_ptr(&spu_thread::raddr));
if (info.required_pc && info.required_pc != SPU_LS_SIZE)
{
const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc0_short_op", m_function);
const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc0_heavy_op", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op);
m_ir->SetInsertPoint(heavy_op);
update_pc();
call("spu_exec_mfc_cmd", &exec_mfc_cmd<false>, m_thread);
m_ir->CreateBr(_final);
m_ir->SetInsertPoint(short_op);
}
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpEQ(_eal, _raddr), m_ir->CreateIsNotNull(_raddr)), _next, _fail, m_md_likely);
m_ir->SetInsertPoint(_next);
@ -2143,12 +2192,12 @@ public:
{
case inst_attr::putllc0:
{
putllc0_pattern(func, m_patterns.at(m_pos - start).range);
putllc0_pattern(func, m_patterns.at(m_pos - start).info);
continue;
}
case inst_attr::putllc16:
{
putllc16_pattern(func, m_patterns.at(m_pos - start).range);
putllc16_pattern(func, m_patterns.at(m_pos - start).info);
continue;
}
case inst_attr::omit:

View file

@ -397,12 +397,12 @@ protected:
struct pattern_info
{
utils::address_range32 range;
u64 info;
};
std::unordered_map<u32, pattern_info> m_patterns;
void add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end = -1);
void add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info);
private:
// For private use

View file

@ -1036,7 +1036,6 @@ lv2_file::open_result_t lv2_file::open(std::string_view vpath, s32 flags, s32 mo
error_code sys_fs_open(ppu_thread& ppu, vm::cptr<char> path, s32 flags, vm::ptr<u32> fd, s32 mode, vm::cptr<void> arg, u64 size)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_open(path=%s, flags=%#o, fd=*0x%x, mode=%#o, arg=*0x%x, size=0x%llx)", path, flags, fd, mode, arg, size);
@ -1085,7 +1084,6 @@ error_code sys_fs_open(ppu_thread& ppu, vm::cptr<char> path, s32 flags, vm::ptr<
error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, vm::ptr<u64> nread)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_read(fd=%d, buf=*0x%x, nbytes=0x%llx, nread=*0x%x)", fd, buf, nbytes, nread);
@ -1122,6 +1120,11 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, v
return CELL_OK;
}
if (nbytes >= 0x100000 && file->type != lv2_file_type::regular)
{
lv2_obj::sleep(ppu);
}
std::unique_lock lock(file->mp->mutex);
if (!file->file)
@ -1154,7 +1157,6 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, v
error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr<void> buf, u64 nbytes, vm::ptr<u64> nwrite)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_write(fd=%d, buf=*0x%x, nbytes=0x%llx, nwrite=*0x%x)", fd, buf, nbytes, nwrite);
@ -1237,7 +1239,6 @@ error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr<void> buf, u64 nbytes,
error_code sys_fs_close(ppu_thread& ppu, u32 fd)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
const auto file = idm::get_unlocked<lv2_fs_object, lv2_file>(fd);
@ -1314,7 +1315,6 @@ error_code sys_fs_close(ppu_thread& ppu, u32 fd)
error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u32> fd)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_opendir(path=%s, fd=*0x%x)", path, fd);
@ -1491,7 +1491,6 @@ error_code sys_fs_readdir(ppu_thread& ppu, u32 fd, vm::ptr<CellFsDirent> dir, vm
error_code sys_fs_closedir(ppu_thread& ppu, u32 fd)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_closedir(fd=%d)", fd);
@ -1506,7 +1505,6 @@ error_code sys_fs_closedir(ppu_thread& ppu, u32 fd)
error_code sys_fs_stat(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<CellFsStat> sb)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_stat(path=%s, sb=*0x%x)", path, sb);
@ -1610,7 +1608,6 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<CellFsStat>
error_code sys_fs_fstat(ppu_thread& ppu, u32 fd, vm::ptr<CellFsStat> sb)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_fstat(fd=%d, sb=*0x%x)", fd, sb);
@ -1666,7 +1663,6 @@ error_code sys_fs_link(ppu_thread&, vm::cptr<char> from, vm::cptr<char> to)
error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr<char> path, s32 mode)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_mkdir(path=%s, mode=%#o)", path, mode);
@ -1728,7 +1724,6 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr<char> path, s32 mode)
error_code sys_fs_rename(ppu_thread& ppu, vm::cptr<char> from, vm::cptr<char> to)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_rename(from=%s, to=%s)", from, to);
@ -1794,7 +1789,6 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr<char> from, vm::cptr<char> to
error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr<char> path)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_rmdir(path=%s)", path);
@ -1850,7 +1844,6 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr<char> path)
error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr<char> path)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_unlink(path=%s)", path);
@ -1951,8 +1944,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
case 0x8000000a: // cellFsReadWithOffset
case 0x8000000b: // cellFsWriteWithOffset
{
lv2_obj::sleep(ppu);
const auto arg = vm::static_ptr_cast<lv2_file_op_rw>(_arg);
if (_size < arg.size())
@ -1992,6 +1983,11 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
sys_fs.error("%s type: Writing %u bytes to FD=%d (path=%s)", file->type, arg->size, file->name.data());
}
if (op == 0x8000000a && file->type != lv2_file_type::regular && arg->size >= 0x100000)
{
lv2_obj::sleep(ppu);
}
std::unique_lock wlock(file->mp->mutex, std::defer_lock);
std::shared_lock rlock(file->mp->mutex, std::defer_lock);
@ -2047,8 +2043,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
case 0x80000009: // cellFsSdataOpenByFd
{
lv2_obj::sleep(ppu);
const auto arg = vm::static_ptr_cast<lv2_file_op_09>(_arg);
if (_size < arg.size())
@ -2102,8 +2096,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
case 0xc0000002: // cellFsGetFreeSize (TODO)
{
lv2_obj::sleep(ppu);
const auto arg = vm::static_ptr_cast<lv2_file_c0000002>(_arg);
const auto& mp = g_fxo->get<lv2_fs_mount_info_map>().lookup("/dev_hdd0");
@ -2418,8 +2410,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
case 0xe0000012: // cellFsGetDirectoryEntries
{
lv2_obj::sleep(ppu);
const auto arg = vm::static_ptr_cast<lv2_file_op_dir::dir_info>(_arg);
if (_size < arg.size())
@ -2434,8 +2424,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
return CELL_EBADF;
}
ppu.check_state();
u32 read_count = 0;
// NOTE: This function is actually capable of reading only one entry at a time
@ -2593,7 +2581,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr<u64> pos)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_lseek(fd=%d, offset=0x%llx, whence=0x%x, pos=*0x%x)", fd, offset, whence, pos);
@ -2639,7 +2626,6 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr
error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_fdadasync(fd=%d)", fd);
@ -2650,6 +2636,8 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
return CELL_EBADF;
}
lv2_obj::sleep(ppu);
std::lock_guard lock(file->mp->mutex);
if (!file->file)
@ -2664,7 +2652,6 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
error_code sys_fs_fsync(ppu_thread& ppu, u32 fd)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_fsync(fd=%d)", fd);
@ -2675,6 +2662,8 @@ error_code sys_fs_fsync(ppu_thread& ppu, u32 fd)
return CELL_EBADF;
}
lv2_obj::sleep(ppu);
std::lock_guard lock(file->mp->mutex);
if (!file->file)
@ -2763,7 +2752,6 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u
error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr<char> path, u64 size)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_truncate(path=%s, size=0x%llx)", path, size);
@ -2815,7 +2803,6 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr<char> path, u64 size)
error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_ftruncate(fd=%d, size=0x%llx)", fd, size);
@ -3021,7 +3008,6 @@ error_code sys_fs_disk_free(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u64> t
error_code sys_fs_utime(ppu_thread& ppu, vm::cptr<char> path, vm::cptr<CellFsUtimbuf> timep)
{
ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_utime(path=%s, timep=*0x%x)", path, timep);
sys_fs.warning("** actime=%u, modtime=%u", timep->actime, timep->modtime);

View file

@ -80,6 +80,7 @@ namespace rsx
std::span<T> as_span() const
{
auto bytes = data();
ensure((reinterpret_cast<uintptr_t>(bytes) & (sizeof(T) - 1)) == 0, "IO buffer span cast requires naturally aligned pointers.");
return { utils::bless<T>(bytes), m_size / sizeof(T) };
}

View file

@ -3,11 +3,53 @@
#include <util/types.hpp>
#include <functional>
#include <algorithm>
#include <cstdlib>
#include "reverse_ptr.hpp"
namespace rsx
{
namespace aligned_allocator
{
template <size_t Align>
void* malloc(size_t size)
{
#ifdef _WIN32
return _aligned_malloc(size, Align);
#else
return std::aligned_alloc(Align, size);
#endif
}
template <size_t Align>
void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size)
{
if (prev_size >= new_size)
{
return prev_ptr;
}
ensure(reinterpret_cast<usz>(prev_ptr) % Align == 0, "Pointer not aligned to Align");
#ifdef _WIN32
return _aligned_realloc(prev_ptr, new_size, Align);
#else
void* ret = std::aligned_alloc(Align, new_size);
std::memcpy(ret, prev_ptr, std::min(prev_size, new_size));
std::free(prev_ptr);
return ret;
#endif
}
static inline void free(void* ptr)
{
#ifdef _WIN32
_aligned_free(ptr);
#else
std::free(ptr);
#endif
}
}
template <typename C, typename T>
concept span_like =
requires(C& c) {
@ -15,7 +57,13 @@ namespace rsx
{ c.size() } -> std::integral;
};
template <typename Ty>
template <typename T, typename U>
concept is_trivially_comparable_v =
requires (T t1, U t2) {
{ t1 == t2 } -> std::same_as<bool>;
};
template <typename Ty, size_t Align=alignof(Ty)>
requires std::is_trivially_destructible_v<Ty> && std::is_trivially_copyable_v<Ty>
struct simple_array
{
@ -28,7 +76,7 @@ namespace rsx
private:
static constexpr u32 _local_capacity = std::max<u32>(64u / sizeof(Ty), 1u);
char _local_storage[_local_capacity * sizeof(Ty)];
alignas(Align) char _local_storage[_local_capacity * sizeof(Ty)];
u32 _capacity = _local_capacity;
Ty* _data = _local_capacity ? reinterpret_cast<Ty*>(_local_storage) : nullptr;
@ -128,7 +176,7 @@ namespace rsx
{
if (!is_local_storage())
{
free(_data);
aligned_allocator::free(_data);
}
_data = nullptr;
@ -196,13 +244,13 @@ namespace rsx
if (is_local_storage())
{
// Switch to heap storage
ensure(_data = static_cast<Ty*>(std::malloc(sizeof(Ty) * size)));
ensure(_data = static_cast<Ty*>(aligned_allocator::malloc<Align>(sizeof(Ty) * size)));
std::memcpy(static_cast<void*>(_data), _local_storage, size_bytes());
}
else
{
// Extend heap storage
ensure(_data = static_cast<Ty*>(std::realloc(_data, sizeof(Ty) * size))); // "realloc() failed!"
ensure(_data = static_cast<Ty*>(aligned_allocator::realloc<Align>(_data, size_bytes(), sizeof(Ty) * size))); // "realloc() failed!"
}
_capacity = size;
@ -457,6 +505,50 @@ namespace rsx
return false;
}
/**
* Note that find and find_if return pointers to objects and not iterators for simplified usage.
* It is functionally equivalent to retrieve a nullptr meaning empty object stored and nullptr meaning not found for all practical uses of this container.
*/
template <typename T = Ty>
requires is_trivially_comparable_v<Ty, T>
Ty* find(const T& value)
{
for (auto it = begin(); it != end(); ++it)
{
if (*it == value)
{
return &(*it);
}
}
return nullptr;
}
// Remove when we switch to C++23
template <typename T = Ty>
requires is_trivially_comparable_v<Ty, T>
const Ty* find(const T& value) const
{
return const_cast<simple_array<Ty, Align>*>(this)->find(value);
}
Ty* find_if(std::predicate<const Ty&> auto predicate)
{
for (auto it = begin(); it != end(); ++it)
{
if (std::invoke(predicate, *it))
{
return &(*it);
}
}
return nullptr;
}
// Remove with C++23
const Ty* find_if(std::predicate<const Ty&> auto predicate) const
{
return const_cast<simple_array<Ty, Align>*>(this)->find_if(predicate);
}
bool erase_if(std::predicate<const Ty&> auto predicate)
{
if (!_size)

View file

@ -338,10 +338,10 @@ namespace gl
params.logd = rsx::ceil_log2(depth);
set_parameters(cmd);
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
compute_task::run(cmd, linear_invocations);
const u32 word_count_per_invocation = std::max<u32>(sizeof(_BlockType) / 4u, 1u);
const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size);
const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
compute_task::run(cmd, workgroup_invocations);
}
};

View file

@ -590,7 +590,7 @@ namespace gl
void fill_texture(gl::command_context& cmd, texture* dst, int format,
const std::vector<rsx::subresource_layout> &input_layouts,
bool is_swizzled, GLenum gl_format, GLenum gl_type, rsx::simple_array<std::byte>& staging_buffer)
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::span<std::byte> staging_buffer)
{
const auto& driver_caps = gl::get_driver_caps();
rsx::texture_uploader_capabilities caps
@ -841,7 +841,7 @@ namespace gl
void upload_texture(gl::command_context& cmd, texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout)
{
// Calculate staging buffer size
rsx::simple_array<std::byte> data_upload_buf;
rsx::simple_array<std::byte, sizeof(u128)> data_upload_buf;
rsx::texture_uploader_capabilities caps { .supports_dxt = gl::get_driver_caps().EXT_texture_compression_s3tc_supported };
if (rsx::is_compressed_host_format(caps, gcm_format))

View file

@ -0,0 +1,39 @@
#pragma once
#include <util/asm.hpp>
#include "IR.h"
#include <list>
struct RSXFragmentProgram;
namespace rsx::assembler
{
struct FlowGraph
{
std::list<BasicBlock> blocks;
BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0, EdgeType edge_type = EdgeType::NONE)
{
if (!parent && !blocks.empty())
{
parent = &blocks.back();
}
blocks.push_back({});
BasicBlock* new_block = &blocks.back();
if (parent)
{
parent->insert_succ(new_block, edge_type);
new_block->insert_pred(parent, edge_type);
}
new_block->id = pc;
return new_block;
}
};
FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog);
}

View file

@ -0,0 +1,193 @@
#include "stdafx.h"
#include "CFG.h"
#include "Emu/RSX/Common/simple_array.hpp"
#include "Emu/RSX/Program/RSXFragmentProgram.h"
#include <util/asm.hpp>
#include <util/v128.hpp>
#include <span>
#if defined(ARCH_ARM64)
#if !defined(_MSC_VER)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#pragma GCC diagnostic ignored "-Wold-style-cast"
#endif
#undef FORCE_INLINE
#include "Emu/CPU/sse2neon.h"
#if !defined(_MSC_VER)
#pragma GCC diagnostic pop
#endif
#endif
namespace rsx::assembler
{
inline v128 decode_instruction(const v128& raw_inst)
{
// Fixup of RSX's weird half-word shuffle for FP instructions
// Convert input stream into LE u16 array
__m128i _mask0 = _mm_set1_epi32(0xff00ff00);
__m128i _mask1 = _mm_set1_epi32(0x00ff00ff);
__m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8);
__m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8);
__m128i ret = _mm_or_si128(
_mm_and_si128(_mask0, a),
_mm_and_si128(_mask1, b)
);
return v128::loadu(&ret);
}
FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog)
{
// For a flowgraph, we don't care at all about the actual contents, just flow control instructions.
OPDEST dst{};
SRC0 src0{};
SRC1 src1{};
SRC2 src2{};
u32 pc = 0; // Program counter
bool end = false;
// Flow control data
rsx::simple_array<BasicBlock*> end_blocks;
rsx::simple_array<BasicBlock*> else_blocks;
// Data block
u32* data = static_cast<u32*>(prog.get_data());
// Output
FlowGraph graph{};
BasicBlock* bb = graph.push();
auto find_block_for_pc = [&](u32 id) -> BasicBlock*
{
auto found = std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == id));
if (found != graph.blocks.end())
{
return &(*found);
}
return nullptr;
};
auto safe_insert_block = [&](BasicBlock* parent, u32 id, EdgeType edge_type) -> BasicBlock*
{
if (auto found = find_block_for_pc(id))
{
parent->insert_succ(found, edge_type);
found->insert_pred(parent, edge_type);
return found;
}
return graph.push(parent, id, edge_type);
};
auto includes_literal_constant = [&]()
{
return src0.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT ||
src1.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT ||
src2.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT;
};
while (!end)
{
BasicBlock** found = end_blocks.find_if(FN(x->id == pc));
if (!found)
{
found = else_blocks.find_if(FN(x->id == pc));
}
if (found)
{
bb = *found;
}
const v128 raw_inst = v128::loadu(data, pc);
v128 decoded = decode_instruction(raw_inst);
dst.HEX = decoded._u32[0];
src0.HEX = decoded._u32[1];
src1.HEX = decoded._u32[2];
src2.HEX = decoded._u32[3];
end = !!dst.end;
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
if (opcode == RSX_FP_OPCODE_NOP)
{
pc++;
continue;
}
bb->instructions.push_back({});
auto& ir_inst = bb->instructions.back();
std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16);
ir_inst.length = 4;
ir_inst.addr = pc * 16;
switch (opcode)
{
case RSX_FP_OPCODE_BRK:
break;
case RSX_FP_OPCODE_CAL:
// Unimplemented. Also unused by the RSX compiler
fmt::throw_exception("Unimplemented FP CAL instruction.");
break;
case RSX_FP_OPCODE_FENCT:
break;
case RSX_FP_OPCODE_FENCB:
break;
case RSX_FP_OPCODE_RET:
// Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early.
// This will not alter flow control.
break;
case RSX_FP_OPCODE_IFE:
{
// Inserts if and else and end blocks
auto parent = bb;
bb = safe_insert_block(parent, pc + 1, EdgeType::IF);
if (src2.end_offset != src1.else_offset)
{
else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2, EdgeType::ELSE));
}
end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDIF));
break;
}
case RSX_FP_OPCODE_LOOP:
case RSX_FP_OPCODE_REP:
{
// Inserts for and end blocks
auto parent = bb;
bb = safe_insert_block(parent, pc + 1, EdgeType::LOOP);
end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDLOOP));
break;
}
default:
if (includes_literal_constant())
{
const v128 constant_literal = v128::loadu(data, pc);
v128 decoded_literal = decode_instruction(constant_literal);
std::memcpy(ir_inst.bytecode + 4, &decoded_literal._u32[0], 16);
ir_inst.length += 4;
pc++;
}
}
pc++;
}
// Sort edges for each block by distance
for (auto& block : graph.blocks)
{
std::sort(block.pred.begin(), block.pred.end(), FN(x.from->id > y.from->id));
std::sort(block.succ.begin(), block.succ.end(), FN(x.to->id < y.to->id));
}
// Sort block nodes by distance
graph.blocks.sort(FN(x.id < y.id));
return graph;
}
}

View file

@ -0,0 +1,95 @@
#pragma once
#include <util/asm.hpp>
namespace rsx::assembler
{
struct BasicBlock;
struct Register
{
int id = 0;
bool f16 = false;
};
struct RegisterRef
{
Register reg{};
// Vector information
union
{
u32 mask;
struct
{
bool x : 1;
bool y : 1;
bool z : 1;
bool w : 1;
};
};
};
struct Instruction
{
// Raw data. Every instruction is max 128 bits.
// Each instruction can also have 128 bits of literal/embedded data.
u32 bytecode[8]{ {} };
u32 addr = 0;
// Decoded
u32 opcode = 0;
u8 length = 4; // Length in dwords
// Padding
u8 reserved0 = 0;
u16 reserved1 = 0;
// References
std::vector<RegisterRef> srcs;
std::vector<RegisterRef> dsts;
};
enum class EdgeType
{
NONE,
IF,
ELSE,
ENDIF,
LOOP,
ENDLOOP,
};
struct FlowEdge
{
EdgeType type = EdgeType::NONE;
BasicBlock* from = nullptr;
BasicBlock* to = nullptr;
};
struct BasicBlock
{
u32 id = 0;
std::vector<Instruction> instructions; // Program instructions for the RSX processor
std::vector<FlowEdge> succ; // Forward edges. Sorted closest first.
std::vector<FlowEdge> pred; // Back edges. Sorted closest first.
std::vector<Instruction> prologue; // Prologue, created by passes
std::vector<Instruction> epilogue; // Epilogue, created by passes
FlowEdge* insert_succ(BasicBlock* b, EdgeType type = EdgeType::NONE)
{
FlowEdge e{ .type = type, .from = this, .to = b };
succ.push_back(e);
return &succ.back();
}
FlowEdge* insert_pred(BasicBlock* b, EdgeType type = EdgeType::NONE)
{
FlowEdge e{ .type = type, .from = b, .to = this };
pred.push_back(e);
return &pred.back();
}
};
}

View file

@ -234,7 +234,8 @@ std::string FragmentProgramDecompiler::AddCond()
std::string FragmentProgramDecompiler::AddConst()
{
const u32 constant_id = m_size + (4 * sizeof(u32));
ensure(m_instruction->length == 8);
const u32 constant_id = m_instruction->addr + 16;
u32 index = umax;
if (auto found = m_constant_offsets.find(constant_id);
@ -249,9 +250,6 @@ std::string FragmentProgramDecompiler::AddConst()
m_constant_offsets[constant_id] = index;
}
// Skip next instruction, its just a literal
m_offset = 2 * 4 * sizeof(u32);
// Return the next offset index
return "_fetch_constant(" + std::to_string(index) + ")";
}
@ -1297,7 +1295,7 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode)
std::string FragmentProgramDecompiler::Decompile()
{
auto data = static_cast<be_t<u32>*>(m_prog.get_data());
const auto graph = rsx::assembler::deconstruct_fragment_program(m_prog);
m_size = 0;
m_location = 0;
m_loop_count = 0;
@ -1314,141 +1312,130 @@ std::string FragmentProgramDecompiler::Decompile()
int forced_unit = FORCE_NONE;
while (true)
for (const auto &block : graph.blocks)
{
for (auto found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size);
found != m_end_offsets.end();
found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size))
// TODO: Handle block prologue if any
if (!block.pred.empty())
{
m_end_offsets.erase(found);
m_code_level--;
AddCode("}");
m_loop_count--;
// CFG guarantees predecessors are sorted, closest one first
for (const auto& pred : block.pred)
{
switch (pred.type)
{
case rsx::assembler::EdgeType::ENDLOOP:
m_loop_count--;
[[ fallthrough ]];
case rsx::assembler::EdgeType::ENDIF:
m_code_level--;
AddCode("}");
break;
case rsx::assembler::EdgeType::LOOP:
m_loop_count++;
[[ fallthrough ]];
case rsx::assembler::EdgeType::IF:
// Instruction will be inserted by the SIP decoder
AddCode("{");
m_code_level++;
break;
case rsx::assembler::EdgeType::ELSE:
// This one needs more testing
m_code_level--;
AddCode("}");
AddCode("else");
AddCode("{");
m_code_level++;
break;
default:
// Start a new block anyway
fmt::throw_exception("Unexpected block found");
}
}
}
for (auto found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size);
found != m_else_offsets.end();
found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size))
for (const auto& inst : block.instructions)
{
m_else_offsets.erase(found);
m_code_level--;
AddCode("}");
AddCode("else");
AddCode("{");
m_code_level++;
}
m_instruction = &inst;
dst.HEX = GetData(data[0]);
src0.HEX = GetData(data[1]);
src1.HEX = GetData(data[2]);
src2.HEX = GetData(data[3]);
dst.HEX = inst.bytecode[0];
src0.HEX = inst.bytecode[1];
src1.HEX = inst.bytecode[2];
src2.HEX = inst.bytecode[3];
m_offset = 4 * sizeof(u32);
opflags = 0;
opflags = 0;
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
auto SIP = [&]()
{
switch (opcode)
{
case RSX_FP_OPCODE_BRK:
if (m_loop_count) AddFlowOp("break");
else rsx_log.error("BRK opcode found outside of a loop");
break;
case RSX_FP_OPCODE_CAL:
rsx_log.error("Unimplemented SIP instruction: CAL");
break;
case RSX_FP_OPCODE_FENCT:
AddCode("//FENCT");
forced_unit = FORCE_SCT;
break;
case RSX_FP_OPCODE_FENCB:
AddCode("//FENCB");
forced_unit = FORCE_SCB;
break;
case RSX_FP_OPCODE_IFE:
AddCode("if($cond)");
break;
case RSX_FP_OPCODE_LOOP:
AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
break;
case RSX_FP_OPCODE_REP:
AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
break;
case RSX_FP_OPCODE_RET:
AddFlowOp("return");
break;
default:
return false;
}
return true;
};
auto SIP = [&]()
{
switch (opcode)
{
case RSX_FP_OPCODE_BRK:
if (m_loop_count) AddFlowOp("break");
else rsx_log.error("BRK opcode found outside of a loop");
case RSX_FP_OPCODE_NOP:
break;
case RSX_FP_OPCODE_CAL:
rsx_log.error("Unimplemented SIP instruction: CAL");
case RSX_FP_OPCODE_KIL:
properties.has_discard_op = true;
AddFlowOp("_kill()");
break;
case RSX_FP_OPCODE_FENCT:
AddCode("//FENCT");
forced_unit = FORCE_SCT;
break;
case RSX_FP_OPCODE_FENCB:
AddCode("//FENCB");
forced_unit = FORCE_SCB;
break;
case RSX_FP_OPCODE_IFE:
AddCode("if($cond)");
if (src2.end_offset != src1.else_offset)
m_else_offsets.push_back(src1.else_offset << 2);
m_end_offsets.push_back(src2.end_offset << 2);
AddCode("{");
m_code_level++;
break;
case RSX_FP_OPCODE_LOOP:
if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt)
{
AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset));
}
else
{
AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
m_loop_count++;
m_end_offsets.push_back(src2.end_offset << 2);
AddCode("{");
m_code_level++;
}
break;
case RSX_FP_OPCODE_REP:
if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt)
{
AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset));
}
else
{
AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
m_loop_count++;
m_end_offsets.push_back(src2.end_offset << 2);
AddCode("{");
m_code_level++;
}
break;
case RSX_FP_OPCODE_RET:
AddFlowOp("return");
break;
default:
return false;
int prev_force_unit = forced_unit;
// Some instructions do not respect forced unit
// Tested with Tales of Vesperia
if (SIP()) break;
if (handle_tex_srb(opcode)) break;
// FENCT/FENCB do not actually reject instructions if they dont match the forced unit
// Looks like they are optimization hints and not hard-coded forced paths
if (handle_sct_scb(opcode)) break;
forced_unit = FORCE_NONE;
rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit);
break;
}
return true;
};
switch (opcode)
{
case RSX_FP_OPCODE_NOP:
break;
case RSX_FP_OPCODE_KIL:
properties.has_discard_op = true;
AddFlowOp("_kill()");
break;
default:
int prev_force_unit = forced_unit;
// Some instructions do not respect forced unit
// Tested with Tales of Vesperia
if (SIP()) break;
if (handle_tex_srb(opcode)) break;
// FENCT/FENCB do not actually reject instructions if they dont match the forced unit
// Looks like they are optimization hints and not hard-coded forced paths
if (handle_sct_scb(opcode)) break;
forced_unit = FORCE_NONE;
rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit);
break;
m_size += m_instruction->length * 4;
if (dst.end) break;
}
m_size += m_offset;
if (dst.end) break;
ensure(m_offset % sizeof(u32) == 0);
data += m_offset / sizeof(u32);
// TODO: Handle block epilogue if needed
}
while (m_code_level > 1)

View file

@ -3,6 +3,8 @@
#include "FragmentProgramRegister.h"
#include "RSXFragmentProgram.h"
#include "Assembler/CFG.h"
#include <sstream>
#include <unordered_map>
@ -39,17 +41,16 @@ class FragmentProgramDecompiler
SRC2 src2;
u32 opflags;
const rsx::assembler::Instruction* m_instruction;
std::string main;
u32& m_size;
u32 m_const_index = 0;
u32 m_offset;
u32 m_location = 0;
bool m_is_valid_ucode = true;
u32 m_loop_count;
int m_code_level;
std::vector<u32> m_end_offsets;
std::vector<u32> m_else_offsets;
std::unordered_map<u32, u32> m_constant_offsets;
std::array<rsx::MixedPrecisionRegister, 64> temp_registers;

View file

@ -103,34 +103,50 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_)
#if USE_16BIT_ADDRESSING
void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z)
{
const uint masks[] = { 0x0000FFFF, 0xFFFF0000 };
accumulator |= data_in[src_id / 2] & masks[subword];
uint accumulator = 0;
if (subword == 1)
const uint subword_count = min(invocation.size.x, 2);
for (uint subword = 0; subword < subword_count; ++subword, ++x)
{
data_out[dst_id / 2] = %f(accumulator);
uint src_texel_id = get_z_index(x, y, z);
uint src_id = (src_texel_id + invocation.data_offset);
int src_bit_offset = int(src_id % 2) << 4;
uint src_value = bitfieldExtract(data_in[src_id / 2], src_bit_offset, 16);
accumulator = bitfieldInsert(accumulator, src_value, int(subword << 4), 16);
}
data_out[texel_id / 2] = %f(accumulator);
}
#elif USE_8BIT_ADDRESSING
void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z)
{
const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 };
accumulator |= data_in[src_id / 4] & masks[subword];
uint accumulator = 0;
if (subword == 3)
const uint subword_count = min(invocation.size.x, 4);
for (uint subword = 0; subword < subword_count; ++subword, ++x)
{
data_out[dst_id / 4] = accumulator;
uint src_texel_id = get_z_index(x, y, z);
uint src_id = (src_texel_id + invocation.data_offset);
int src_bit_offset = int(src_id % 4) << 3;
uint src_value = bitfieldExtract(data_in[src_id / 4], src_bit_offset, 8);
accumulator = bitfieldInsert(accumulator, src_value, int(subword << 3), 8);
}
data_out[texel_id / 4] = accumulator;
}
#else
void write32(const in uint word_count, in uint src_id, in uint dst_id)
void decode_32b(const in uint texel_id, const in uint word_count, const in uint x, const in uint y, const in uint z)
{
uint src_texel_id = get_z_index(x, y, z);
uint dst_id = (texel_id * word_count);
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
for (uint i = 0; i < word_count; ++i)
{
uint value = data_in[src_id++];
@ -165,23 +181,11 @@ void main()
uint x = (slice_offset % row_length);
#if USE_8BIT_ADDRESSING
for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) {
decode_8b(texel_id, x, y, z);
#elif USE_16BIT_ADDRESSING
for (uint subword = 0, accumulator = 0; subword < 2; ++subword, ++x) {
#endif
uint src_texel_id = get_z_index(x, y, z);
uint dst_id = (texel_id * word_count);
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
#if USE_8BIT_ADDRESSING
write8(accumulator, subword, src_id, dst_id);
}
#elif USE_16BIT_ADDRESSING
write16(accumulator, subword, src_id, dst_id);
}
decode_16b(texel_id, x, y, z);
#else
write32(word_count, src_id, dst_id);
decode_32b(texel_id, word_count, x, y, z);
#endif
}

View file

@ -475,10 +475,10 @@ namespace vk
params.logh = rsx::ceil_log2(height);
params.logd = rsx::ceil_log2(depth);
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
compute_task::run(cmd, linear_invocations);
const u32 word_count_per_invocation = std::max<u32>(sizeof(_BlockType) / 4u, 1u);
const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size);
const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
compute_task::run(cmd, workgroup_invocations);
}
};

View file

@ -69,8 +69,9 @@ namespace vk
void on_query_pool_released(std::unique_ptr<vk::query_pool>& pool);
template<template<class> class _List>
void free_queries(vk::command_buffer& cmd, _List<u32>& list)
template<typename T>
requires std::ranges::range<T> && std::same_as<std::ranges::range_value_t<T>, u32> // List of u32
void free_queries(vk::command_buffer& cmd, T& list)
{
for (const auto index : list)
{

View file

@ -39,11 +39,20 @@ namespace vk
return false;
}
buffer::buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool)
buffer::buffer(
const vk::render_device& dev,
u64 size,
const memory_type_info& memory_type,
u32 access_flags,
VkBufferUsageFlags usage,
VkBufferCreateFlags flags,
vmm_allocation_pool allocation_pool)
: m_device(dev)
{
const bool nullable = !!(flags & VK_BUFFER_CREATE_ALLOW_NULL_RPCS3);
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
info.flags = flags;
info.flags = flags & ~VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3;
info.size = size;
info.usage = usage;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
@ -60,8 +69,18 @@ namespace vk
fmt::throw_exception("No compatible memory type was found!");
}
memory = std::make_unique<memory_block>(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool);
vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset());
memory = std::make_unique<memory_block>(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool, nullable);
if (auto device_memory = memory->get_vk_device_memory();
device_memory != VK_NULL_HANDLE)
{
vkBindBufferMemory(dev, value, device_memory, memory->get_vk_device_memory_offset());
}
else
{
ensure(nullable);
vkDestroyBuffer(m_device, value, nullptr);
value = VK_NULL_HANDLE;
}
}
buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size)

View file

@ -7,6 +7,13 @@
namespace vk
{
enum : u32
{
VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 = 0x80000000,
VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3)
};
struct buffer_view : public unique_resource
{
VkBufferView value;
@ -30,8 +37,21 @@ namespace vk
VkBufferCreateInfo info = {};
std::unique_ptr<vk::memory_block> memory;
buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool);
buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size);
buffer(
const vk::render_device& dev,
u64 size,
const memory_type_info& memory_type,
u32 access_flags,
VkBufferUsageFlags usage,
VkBufferCreateFlags flags,
vmm_allocation_pool allocation_pool);
buffer(
const vk::render_device& dev,
VkBufferUsageFlags usage,
void* host_pointer,
u64 size);
~buffer();
void* map(u64 offset, u64 size);

View file

@ -47,9 +47,28 @@ namespace vk
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
memory_index = memory_map.device_local;
m_prefer_writethrough = false;
}
heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
VkFlags create_flags = 0;
if (m_prefer_writethrough)
{
create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3;
}
heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM);
if (!heap->value)
{
rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name);
ensure(m_prefer_writethrough);
// We failed to place the buffer in rebar memory. Try again in host-visible.
m_prefer_writethrough = false;
auto gc = get_resource_manager();
gc->dispose(heap);
heap = std::make_unique<buffer>(*g_render_device, size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
}
initial_size = size;
notify_on_grow = bool(notify);
@ -112,6 +131,7 @@ namespace vk
auto gc = get_resource_manager();
if (shadow)
{
ensure(!m_prefer_writethrough);
rsx_log.warning("Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", usage);
gc->dispose(shadow);
@ -122,7 +142,25 @@ namespace vk
}
gc->dispose(heap);
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
VkFlags create_flags = 0;
if (m_prefer_writethrough)
{
create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3;
}
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM);
if (!heap->value)
{
rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name);
ensure(m_prefer_writethrough);
// We failed to place the buffer in rebar memory. Try again in host-visible.
m_prefer_writethrough = false;
gc->dispose(heap);
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
}
if (notify_on_grow)
{

View file

@ -75,7 +75,7 @@ atomic_t<u64> g_watchdog_hold_ctr{0};
extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr);
extern void spu_load_exec(const spu_exec_object&);
extern void spu_load_rel_exec(const spu_rel_object&);
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_prx);
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_prx, bool is_fast_compilation);
extern bool ppu_initialize(const ppu_module<lv2_obj>&, bool check_only = false, u64 file_size = 0);
extern void ppu_finalize(const ppu_module<lv2_obj>&);
extern void ppu_unload_prx(const lv2_prx&);
@ -1684,7 +1684,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
}
}
g_fxo->init<named_thread>("SPRX Loader"sv, [this, dir_queue]() mutable
g_fxo->init<named_thread>("SPRX Loader"sv, [this, dir_queue, is_fast = m_precompilation_option.is_fast]() mutable
{
std::vector<ppu_module<lv2_obj>*> mod_list;
@ -1705,7 +1705,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
return;
}
ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list);
ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list, is_fast);
if (Emu.IsStopped())
{
@ -3230,6 +3230,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s
read_used_savestate_versions();
m_savestate_extension_flags1 = {};
m_emu_state_close_pending = false;
m_precompilation_option = {};
// Enable logging
rpcs3::utils::configure_logs(true);
@ -3824,6 +3825,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s
read_used_savestate_versions();
m_savestate_extension_flags1 = {};
m_emu_state_close_pending = false;
m_precompilation_option = {};
initialize_timebased_time(0, true);

View file

@ -120,6 +120,11 @@ namespace utils
struct serial;
};
struct emu_precompilation_option_t
{
bool is_fast = false;
};
class Emulator final
{
atomic_t<system_state> m_state{system_state::stopped};
@ -188,6 +193,7 @@ class Emulator final
};
bs_t<SaveStateExtentionFlags1> m_savestate_extension_flags1{};
emu_precompilation_option_t m_precompilation_option{};
public:
static constexpr std::string_view game_id_boot_prefix = "%RPCS3_GAMEID%:";
@ -245,6 +251,11 @@ public:
m_state = system_state::running;
}
void SetPrecompileCacheOption(emu_precompilation_option_t option)
{
m_precompilation_option = option;
}
void Init();
std::vector<std::string> argv;

View file

@ -36,7 +36,7 @@ struct cfg_root : cfg::node
cfg::_int<0, 16> spu_delay_penalty{ this, "SPU delay penalty", 3 }; // Number of milliseconds to block a thread if a virtual 'core' isn't free
cfg::_bool spu_loop_detection{ this, "SPU loop detection", false }; // Try to detect wait loops and trigger thread yield
cfg::_int<1, 6> max_spurs_threads{ this, "Max SPURS Threads", 6, true }; // HACK. If less then 6, max number of running SPURS threads in each thread group.
cfg::_enum<spu_block_size_type> spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe };
cfg::_enum<spu_block_size_type> spu_block_size{ this, "SPU Analyzer Block Size", spu_block_size_type::mega };
cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false };
cfg::_bool spu_accurate_reservations{ this, "Accurate SPU Reservations", true };
cfg::_bool accurate_cache_line_stores{ this, "Accurate Cache Line Stores", false };

View file

@ -101,6 +101,48 @@ namespace rpcs3::utils
return worker();
}
std::vector<std::pair<std::string, u64>> get_vfs_disk_usage()
{
std::vector<std::pair<std::string, u64>> disk_usage;
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd0_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_hdd0", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd1_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_hdd1", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_flash", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash2_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_flash2", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash3_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_flash3", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_bdvd_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_bdvd", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_games_dir(), 1); data_size != umax)
{
disk_usage.push_back({"games", data_size});
}
return disk_usage;
}
std::string get_emu_dir()
{
const std::string& emu_dir_ = g_cfg_vfs.emulator_dir;
@ -122,6 +164,36 @@ namespace rpcs3::utils
return g_cfg_vfs.get(g_cfg_vfs.dev_hdd1, get_emu_dir());
}
std::string get_flash_dir()
{
return g_cfg_vfs.get(g_cfg_vfs.dev_flash, get_emu_dir());
}
std::string get_flash2_dir()
{
return g_cfg_vfs.get(g_cfg_vfs.dev_flash2, get_emu_dir());
}
std::string get_flash3_dir()
{
return g_cfg_vfs.get(g_cfg_vfs.dev_flash3, get_emu_dir());
}
std::string get_bdvd_dir()
{
return g_cfg_vfs.get(g_cfg_vfs.dev_bdvd, get_emu_dir());
}
u64 get_cache_disk_usage()
{
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_cache_dir(), 1); data_size != umax)
{
return data_size;
}
return 0;
}
std::string get_cache_dir()
{
return fs::get_cache_dir() + "cache/";

View file

@ -23,10 +23,19 @@ namespace rpcs3::utils
bool install_pkg(const std::string& path);
// VFS directories and disk usage
std::vector<std::pair<std::string, u64>> get_vfs_disk_usage();
std::string get_emu_dir();
std::string get_games_dir();
std::string get_hdd0_dir();
std::string get_hdd1_dir();
std::string get_flash_dir();
std::string get_flash2_dir();
std::string get_flash3_dir();
std::string get_bdvd_dir();
// Cache directories and disk usage
u64 get_cache_disk_usage();
std::string get_cache_dir();
std::string get_cache_dir(std::string_view module_path);

View file

@ -156,6 +156,7 @@
<ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog.cpp" />
<ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog_native.cpp" />
<ClCompile Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.cpp" />
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp" />
<ClCompile Include="Emu\RSX\Program\FragmentProgramRegister.cpp" />
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
<ClCompile Include="Emu\RSX\Program\program_util.cpp" />
@ -699,6 +700,8 @@
<ClInclude Include="Emu\RSX\Overlays\overlay_progress_bar.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_video.h" />
<ClInclude Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.h" />
<ClInclude Include="Emu\RSX\Program\Assembler\CFG.h" />
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h" />
<ClInclude Include="Emu\RSX\Program\FragmentProgramRegister.h" />
<ClInclude Include="Emu\RSX\Program\GLSLTypes.h" />
<ClInclude Include="Emu\RSX\Program\ProgramStateCache.h" />

View file

@ -133,6 +133,9 @@
<Filter Include="Emu\GPU\RSX\Program\MSAA">
<UniqueIdentifier>{ce6d6b90-8313-4273-b46c-d92bd450c002}</UniqueIdentifier>
</Filter>
<Filter Include="Emu\GPU\RSX\Program\Assembler">
<UniqueIdentifier>{d99df916-8a99-428b-869a-9f14ac0ab411}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Crypto\aes.cpp">
@ -1372,6 +1375,9 @@
<ClCompile Include="Emu\Io\evdev_gun_handler.cpp">
<Filter>Emu\Io</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -2764,6 +2770,12 @@
<ClInclude Include="util\pair.hpp">
<Filter>Utilities</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Program\Assembler\CFG.h">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">

View file

@ -975,9 +975,9 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
case emu_settings_type::SPUBlockSize:
switch (static_cast<spu_block_size_type>(index))
{
case spu_block_size_type::safe: return tr("Safe", "SPU block size");
case spu_block_size_type::mega: return tr("Mega", "SPU block size");
case spu_block_size_type::giga: return tr("Giga", "SPU block size");
case spu_block_size_type::safe: return tr("Safe", "SPU Analyzer Block Size");
case spu_block_size_type::mega: return tr("Mega", "SPU Analyzer Block Size");
case spu_block_size_type::giga: return tr("Giga", "SPU Analyzer Block Size");
}
break;
case emu_settings_type::ThreadSchedulerMode:

View file

@ -239,7 +239,7 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
{ emu_settings_type::XFloatAccuracy, { "Core", "XFloat Accuracy"}},
{ emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}},
{ emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},
{ emu_settings_type::SPUBlockSize, { "Core", "SPU Block Size"}},
{ emu_settings_type::SPUBlockSize, { "Core", "SPU Analyzer Block Size"}},
{ emu_settings_type::SPUCache, { "Core", "SPU Cache"}},
{ emu_settings_type::DebugConsoleMode, { "Core", "Debug Console Mode"}},
{ emu_settings_type::MaxSPURSThreads, { "Core", "Max SPURS Threads"}},

View file

@ -2011,10 +2011,11 @@ void game_list_frame::ShowContextMenu(const QPoint &pos)
menu.exec(global_pos);
}
bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string& serial)
bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string& serial, bool is_fast_compilation)
{
Emu.GracefulShutdown(false);
Emu.SetForceBoot(true);
Emu.SetPrecompileCacheOption(emu_precompilation_option_t{.is_fast = is_fast_compilation});
if (const auto error = Emu.BootGame(fs::is_file(path) ? fs::get_parent_dir(path) : path, serial, true); error != game_boot_result::no_errors)
{
@ -2026,9 +2027,9 @@ bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string
return true;
}
bool game_list_frame::CreateCPUCaches(const game_info& game)
bool game_list_frame::CreateCPUCaches(const game_info& game, bool is_fast_compilation)
{
return game && CreateCPUCaches(game->info.path, game->info.serial);
return game && CreateCPUCaches(game->info.path, game->info.serial, is_fast_compilation);
}
bool game_list_frame::RemoveCustomConfiguration(const std::string& title_id, const game_info& game, bool is_interactive)
@ -2404,6 +2405,9 @@ void game_list_frame::BatchActionBySerials(progress_dialog* pdlg, const std::set
connect(pdlg, &progress_dialog::canceled, this, [pdlg](){ pdlg->deleteLater(); });
QApplication::beep();
// Signal termination back to the callback
action("");
if (refresh_on_finish && index)
{
Refresh(true);
@ -2414,7 +2418,7 @@ void game_list_frame::BatchActionBySerials(progress_dialog* pdlg, const std::set
QTimer::singleShot(1, this, *periodic_func);
}
void game_list_frame::BatchCreateCPUCaches(const std::vector<game_info>& game_data)
void game_list_frame::BatchCreateCPUCaches(const std::vector<game_info>& game_data, bool is_fast_compilation)
{
std::set<std::string> serials;
@ -2433,11 +2437,13 @@ void game_list_frame::BatchCreateCPUCaches(const std::vector<game_info>& game_da
if (total == 0)
{
QMessageBox::information(this, tr("LLVM Cache Batch Creation"), tr("No titles found"), QMessageBox::Ok);
Q_EMIT NotifyBatchedGameActionFinished();
return;
}
if (!m_gui_settings->GetBootConfirmation(this))
{
Q_EMIT NotifyBatchedGameActionFinished();
return;
}
@ -2459,13 +2465,19 @@ void game_list_frame::BatchCreateCPUCaches(const std::vector<game_info>& game_da
BatchActionBySerials(pdlg, serials, tr("%0\nProgress: %1/%2 caches compiled").arg(main_label),
[&, game_data](const std::string& serial)
{
if (serial.empty())
{
Q_EMIT NotifyBatchedGameActionFinished();
return false;
}
if (Emu.IsStopped(true))
{
const auto it = std::find_if(m_game_data.begin(), m_game_data.end(), FN(x->info.serial == serial));
if (it != m_game_data.end())
{
return CreateCPUCaches((*it)->info.path, serial);
return CreateCPUCaches((*it)->info.path, serial, is_fast_compilation);
}
}
@ -2512,7 +2524,7 @@ void game_list_frame::BatchRemovePPUCaches()
BatchActionBySerials(pdlg, serials, tr("%0/%1 caches cleared"),
[this](const std::string& serial)
{
return Emu.IsStopped(true) && RemovePPUCache(GetCacheDirBySerial(serial));
return !serial.empty() &&Emu.IsStopped(true) && RemovePPUCache(GetCacheDirBySerial(serial));
},
[this](u32, u32)
{
@ -2551,7 +2563,7 @@ void game_list_frame::BatchRemoveSPUCaches()
BatchActionBySerials(pdlg, serials, tr("%0/%1 caches cleared"),
[this](const std::string& serial)
{
return Emu.IsStopped(true) && RemoveSPUCache(GetCacheDirBySerial(serial));
return !serial.empty() && Emu.IsStopped(true) && RemoveSPUCache(GetCacheDirBySerial(serial));
},
[this](u32 removed, u32 total)
{
@ -2586,7 +2598,7 @@ void game_list_frame::BatchRemoveCustomConfigurations()
BatchActionBySerials(pdlg, serials, tr("%0/%1 custom configurations cleared"),
[this](const std::string& serial)
{
return Emu.IsStopped(true) && RemoveCustomConfiguration(serial);
return !serial.empty() && Emu.IsStopped(true) && RemoveCustomConfiguration(serial);
},
[this](u32 removed, u32 total)
{
@ -2620,7 +2632,7 @@ void game_list_frame::BatchRemoveCustomPadConfigurations()
BatchActionBySerials(pdlg, serials, tr("%0/%1 custom pad configurations cleared"),
[this](const std::string& serial)
{
return Emu.IsStopped(true) && RemoveCustomPadConfiguration(serial);
return !serial.empty() && Emu.IsStopped(true) && RemoveCustomPadConfiguration(serial);
},
[this](u32 removed, u32 total)
{
@ -2659,7 +2671,7 @@ void game_list_frame::BatchRemoveShaderCaches()
BatchActionBySerials(pdlg, serials, tr("%0/%1 shader caches cleared"),
[this](const std::string& serial)
{
return Emu.IsStopped(true) && RemoveShadersCache(GetCacheDirBySerial(serial));
return !serial.empty() && Emu.IsStopped(true) && RemoveShadersCache(GetCacheDirBySerial(serial));
},
[this](u32 removed, u32 total)
{

View file

@ -64,7 +64,7 @@ public:
bool IsEntryVisible(const game_info& game, bool search_fallback = false) const;
public Q_SLOTS:
void BatchCreateCPUCaches(const std::vector<game_info>& game_data = {});
void BatchCreateCPUCaches(const std::vector<game_info>& game_data = {}, bool is_fast_compilation = false);
void BatchRemovePPUCaches();
void BatchRemoveSPUCaches();
void BatchRemoveCustomConfigurations();
@ -96,6 +96,7 @@ Q_SIGNALS:
void FocusToSearchBar();
void Refreshed();
void RequestSaveStateManager(const game_info& game);
void NotifyBatchedGameActionFinished();
public:
template <typename KeyType>
@ -135,8 +136,8 @@ private:
bool RemovePPUCache(const std::string& base_dir, bool is_interactive = false);
bool RemoveSPUCache(const std::string& base_dir, bool is_interactive = false);
void RemoveHDD1Cache(const std::string& base_dir, const std::string& title_id, bool is_interactive = false);
static bool CreateCPUCaches(const std::string& path, const std::string& serial = {});
static bool CreateCPUCaches(const game_info& game);
static bool CreateCPUCaches(const std::string& path, const std::string& serial = {}, bool is_fast_compilation = false);
static bool CreateCPUCaches(const game_info& game, bool is_fast_compilation = false);
static bool RemoveContentPath(const std::string& path, const std::string& desc);
static u32 RemoveContentPathList(const std::vector<std::string>& path_list, const std::string& desc);

View file

@ -4,10 +4,14 @@
#include "hex_validator.h"
#include "memory_viewer_panel.h"
#include "Emu/System.h"
#include "Emu/system_utils.hpp"
#include "Utilities/lockless.h"
#include "util/asm.hpp"
#include <QtConcurrent>
#include <QMenu>
#include <QMessageBox>
#include <QActionGroup>
#include <QScrollBar>
#include <QVBoxLayout>
@ -17,6 +21,8 @@
#include <deque>
#include <mutex>
LOG_CHANNEL(sys_log, "SYS");
extern fs::file g_tty;
extern atomic_t<s64> g_tty_size;
extern std::array<std::deque<std::string>, 16> g_tty_input;
@ -165,6 +171,28 @@ log_frame::log_frame(std::shared_ptr<gui_settings> _gui_settings, QWidget* paren
connect(m_timer, &QTimer::timeout, this, &log_frame::UpdateUI);
}
void log_frame::show_disk_usage(const std::vector<std::pair<std::string, u64>>& vfs_disk_usage, u64 cache_disk_usage)
{
QString text;
u64 tot_data_size = 0;
for (const auto& [dev, data_size] : vfs_disk_usage)
{
text += tr("\n %0: %1").arg(QString::fromStdString(dev)).arg(gui::utils::format_byte_size(data_size));
tot_data_size += data_size;
}
if (!text.isEmpty())
{
text = tr("\n VFS disk usage: %0%1").arg(gui::utils::format_byte_size(tot_data_size)).arg(text);
}
text += tr("\n Cache disk usage: %0").arg(gui::utils::format_byte_size(cache_disk_usage));
sys_log.success("%s", text);
QMessageBox::information(this, tr("Disk usage"), text);
}
void log_frame::SetLogLevel(logs::level lev) const
{
switch (lev)
@ -245,6 +273,26 @@ void log_frame::CreateAndConnectActions()
m_tty->clear();
});
m_show_disk_usage_act = new QAction(tr("Show Disk Usage"), this);
connect(m_show_disk_usage_act, &QAction::triggered, [this]()
{
if (m_disk_usage_future.isRunning())
{
return; // Still running the last request
}
m_disk_usage_future = QtConcurrent::run([this]()
{
const std::vector<std::pair<std::string, u64>> vfs_disk_usage = rpcs3::utils::get_vfs_disk_usage();
const u64 cache_disk_usage = rpcs3::utils::get_cache_disk_usage();
Emu.CallFromMainThread([this, vfs_disk_usage, cache_disk_usage]()
{
show_disk_usage(vfs_disk_usage, cache_disk_usage);
}, nullptr, false);
});
});
m_perform_goto_on_debugger = new QAction(tr("Go-To On The Debugger"), this);
connect(m_perform_goto_on_debugger, &QAction::triggered, [this]()
{
@ -369,6 +417,9 @@ void log_frame::CreateAndConnectActions()
{
QMenu* menu = m_log->createStandardContextMenu();
menu->addAction(m_clear_act);
menu->addSeparator();
menu->addAction(m_show_disk_usage_act);
menu->addSeparator();
menu->addAction(m_perform_goto_on_debugger);
menu->addAction(m_perform_goto_thread_on_debugger);
menu->addAction(m_perform_show_in_mem_viewer);

View file

@ -8,6 +8,7 @@
#include <memory>
#include <QFuture>
#include <QTabWidget>
#include <QPlainTextEdit>
#include <QActionGroup>
@ -38,6 +39,7 @@ protected:
private Q_SLOTS:
void UpdateUI();
private:
void show_disk_usage(const std::vector<std::pair<std::string, u64>>& vfs_disk_usage, u64 cache_disk_usage);
void SetLogLevel(logs::level lev) const;
void SetTTYLogging(bool val) const;
@ -48,6 +50,7 @@ private:
std::unique_ptr<find_dialog> m_find_dialog;
QTimer* m_timer = nullptr;
QFuture<void> m_disk_usage_future;
std::vector<QColor> m_color;
QColor m_color_stack;
@ -72,6 +75,7 @@ private:
QAction* m_clear_act = nullptr;
QAction* m_clear_tty_act = nullptr;
QAction* m_show_disk_usage_act = nullptr;
QAction* m_perform_goto_on_debugger = nullptr;
QAction* m_perform_goto_thread_on_debugger = nullptr;
QAction* m_perform_show_in_mem_viewer = nullptr;

View file

@ -1187,7 +1187,13 @@ bool main_window::HandlePackageInstallation(QStringList file_paths, bool from_bo
}
}
ShowOptionalGamePreparations(tr("Success!"), tr("Successfully installed software from package(s)!"), std::move(paths));
// Executes after PrecompileCachesFromInstalledPackages
m_notify_batch_game_action_cb = [this, paths]() mutable
{
ShowOptionalGamePreparations(tr("Success!"), tr("Successfully installed software from package(s)!"), std::move(paths));
};
PrecompileCachesFromInstalledPackages(paths);
});
}
@ -2368,8 +2374,7 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
#else
QCheckBox* quick_check = new QCheckBox(tr("Add launcher shortcut(s)"));
#endif
QCheckBox* precompile_check = new QCheckBox(tr("Precompile caches"));
QLabel* label = new QLabel(tr("%1\nWould you like to install shortcuts to the installed software and precompile caches? (%2 new software detected)\n\n").arg(message).arg(bootable_paths.size()), dlg);
QLabel* label = new QLabel(tr("%1\nWould you like to install shortcuts to the installed software? (%2 new software detected)\n\n").arg(message).arg(bootable_paths.size()), dlg);
vlayout->addWidget(label);
vlayout->addStretch(10);
@ -2377,10 +2382,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
vlayout->addStretch(3);
vlayout->addWidget(quick_check);
vlayout->addStretch(3);
vlayout->addWidget(precompile_check);
vlayout->addStretch(3);
precompile_check->setToolTip(tr("Spend time building data needed for game boot now instead of at launch."));
QDialogButtonBox* btn_box = new QDialogButtonBox(QDialogButtonBox::Ok);
@ -2391,7 +2392,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
{
const bool create_desktop_shortcuts = desk_check->isChecked();
const bool create_app_shortcut = quick_check->isChecked();
const bool create_caches = precompile_check->isChecked();
dlg->hide();
dlg->accept();
@ -2411,12 +2411,11 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
locations.insert(gui::utils::shortcut_location::applications);
}
if (locations.empty() && !create_caches)
if (locations.empty())
{
return;
}
std::vector<game_info> game_data;
std::vector<game_info> game_data_shortcuts;
for (const auto& [boot_path, title_id] : paths)
@ -2431,11 +2430,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
{
game_data_shortcuts.push_back(gameinfo);
}
if (create_caches)
{
game_data.push_back(gameinfo);
}
}
break;
@ -2447,17 +2441,39 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
{
m_game_list_frame->CreateShortcuts(game_data_shortcuts, locations);
}
if (!game_data.empty())
{
m_game_list_frame->BatchCreateCPUCaches(game_data);
}
});
dlg->setAttribute(Qt::WA_DeleteOnClose);
dlg->open();
}
void main_window::PrecompileCachesFromInstalledPackages(const std::map<std::string, QString>& bootable_paths)
{
std::vector<game_info> game_data;
for (const auto& [boot_path, title_id] : bootable_paths)
{
for (const game_info& gameinfo : m_game_list_frame->GetGameInfo())
{
if (gameinfo && gameinfo->info.serial == title_id.toStdString())
{
if (Emu.IsPathInsideDir(boot_path, gameinfo->info.path))
{
game_data.push_back(gameinfo);
}
break;
}
}
}
if (!game_data.empty())
{
m_game_list_frame->BatchCreateCPUCaches(game_data, true);
}
}
void main_window::CreateActions()
{
ui->exitAct->setShortcuts(QKeySequence::Quit);
@ -3401,6 +3417,15 @@ void main_window::CreateConnects()
connect(ui->mw_searchbar, &QLineEdit::textChanged, m_game_list_frame, &game_list_frame::SetSearchText);
connect(ui->mw_searchbar, &QLineEdit::returnPressed, m_game_list_frame, &game_list_frame::FocusAndSelectFirstEntryIfNoneIs);
connect(m_game_list_frame, &game_list_frame::FocusToSearchBar, this, [this]() { ui->mw_searchbar->setFocus(); });
connect(m_game_list_frame, &game_list_frame::NotifyBatchedGameActionFinished, this, [this]() mutable
{
if (m_notify_batch_game_action_cb)
{
m_notify_batch_game_action_cb();
m_notify_batch_game_action_cb = {};
}
});
}
void main_window::CreateDockWindows()

View file

@ -48,6 +48,7 @@ class main_window : public QMainWindow
bool m_save_slider_pos = false;
bool m_requested_show_logs_on_exit = false;
int m_other_slider_pos = 0;
std::function<void()> m_notify_batch_game_action_cb;
QIcon m_app_icon;
QIcon m_icon_play;
@ -141,6 +142,7 @@ private:
void CreateDockWindows();
void EnableMenus(bool enabled) const;
void ShowTitleBars(bool show) const;
void PrecompileCachesFromInstalledPackages(const std::map<std::string, QString>& bootable_paths);
void ShowOptionalGamePreparations(const QString& title, const QString& message, std::map<std::string, QString> game_path);
static bool InstallFileInExData(const std::string& extension, const QString& path, const std::string& filename);

View file

@ -91,7 +91,7 @@ public:
const QString xfloat = tr("Control accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Dynamic or LLVM.");
const QString enable_thread_scheduler = tr("Control how RPCS3 utilizes the threads of your system.\nEach option heavily depends on the game and on your CPU. It's recommended to try each option to find out which performs the best.\nChanging the thread scheduler is not supported on CPUs with less than 12 threads.");
const QString spu_loop_detection = tr("Try to detect loop conditions in SPU kernels and use them as scheduling hints.\nImproves performance and reduces CPU usage.\nMay cause severe audio stuttering in rare cases.");
const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility.");
const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility at the cost of lower performance.");
const QString preferred_spu_threads = tr("Some SPU stages are sensitive to race conditions and allowing a limited number at a time helps alleviate performance stalls.\nSetting this to a smaller value might improve performance and reduce stuttering in some games.\nLeave this on auto if performance is negatively affected when setting a small value.");
const QString max_cpu_preempt = tr("Reduces CPU usage and power consumption, improving battery life on mobile devices. (0 means disabled)\nHigher values cause a more pronounced effect, but may cause audio or performance issues. A value of 50 or less is recommended.\nThis option forces an FPS limit because it's active when framerate is stable.\nThe lighter the game is on the hardware, the more power is saved by it. (until the preemption count barrier is reached)");

View file

@ -88,6 +88,7 @@
<ItemGroup>
<ClCompile Include="test.cpp" />
<ClCompile Include="test_fmt.cpp" />
<ClCompile Include="test_rsx_cfg.cpp" />
<ClCompile Include="test_simple_array.cpp" />
<ClCompile Include="test_address_range.cpp" />
<ClCompile Include="test_tuple.cpp" />

View file

@ -3,44 +3,47 @@
#include "util/types.hpp"
#include "util/pair.hpp"
struct some_struct
namespace utils
{
u64 v {};
char s[12] = "Hello World";
bool operator == (const some_struct& r) const
struct some_struct
{
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
u64 v{};
char s[12] = "Hello World";
bool operator == (const some_struct& r) const
{
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
}
};
TEST(Pair, General)
{
some_struct s{};
s.v = 1234;
utils::pair<int, some_struct> p;
EXPECT_EQ(sizeof(p), 32);
EXPECT_EQ(p.first, 0);
EXPECT_EQ(p.second, some_struct{});
p = { 666, s };
EXPECT_EQ(p.first, 666);
EXPECT_EQ(p.second, s);
const utils::pair<int, some_struct> p1 = p;
EXPECT_EQ(p.first, 666);
EXPECT_EQ(p.second, s);
EXPECT_EQ(p1.first, 666);
EXPECT_EQ(p1.second, s);
utils::pair<int, some_struct> p2 = p1;
EXPECT_EQ(p1.first, 666);
EXPECT_EQ(p1.second, s);
EXPECT_EQ(p2.first, 666);
EXPECT_EQ(p2.second, s);
utils::pair<int, some_struct> p3 = std::move(p);
EXPECT_EQ(p3.first, 666);
EXPECT_EQ(p3.second, s);
}
};
TEST(Utils, Pair)
{
some_struct s {};
s.v = 1234;
utils::pair<int, some_struct> p;
EXPECT_EQ(sizeof(p), 32);
EXPECT_EQ(p.first, 0);
EXPECT_EQ(p.second, some_struct{});
p = { 666, s };
EXPECT_EQ(p.first, 666);
EXPECT_EQ(p.second, s);
const utils::pair<int, some_struct> p1 = p;
EXPECT_EQ(p.first, 666);
EXPECT_EQ(p.second, s);
EXPECT_EQ(p1.first, 666);
EXPECT_EQ(p1.second, s);
utils::pair<int, some_struct> p2 = p1;
EXPECT_EQ(p1.first, 666);
EXPECT_EQ(p1.second, s);
EXPECT_EQ(p2.first, 666);
EXPECT_EQ(p2.second, s);
utils::pair<int, some_struct> p3 = std::move(p);
EXPECT_EQ(p3.first, 666);
EXPECT_EQ(p3.second, s);
}

View file

@ -0,0 +1,239 @@
#include <gtest/gtest.h>
#include "Emu/RSX/Common/simple_array.hpp"
#include "Emu/RSX/Program/Assembler/CFG.h"
#include "Emu/RSX/Program/RSXFragmentProgram.h"
#include <util/v128.hpp>
namespace rsx::assembler
{
auto swap_bytes16 = [](u32 dword) -> u32
{
// Lazy encode, but good enough for what we need here.
union v32
{
u32 HEX;
u8 _v[4];
};
u8* src_bytes = reinterpret_cast<u8*>(&dword);
v32 dst_bytes;
dst_bytes._v[0] = src_bytes[1];
dst_bytes._v[1] = src_bytes[0];
dst_bytes._v[2] = src_bytes[3];
dst_bytes._v[3] = src_bytes[2];
return dst_bytes.HEX;
};
// Instruction mocks because we don't have a working assember (yet)
auto encode_instruction = [](u32 opcode, bool end = false) -> v128
{
OPDEST dst{};
dst.opcode = opcode;
if (end)
{
dst.end = 1;
}
return v128::from32(swap_bytes16(dst.HEX), 0, 0, 0);
};
auto create_if(u32 end, u32 _else = 0)
{
OPDEST dst{};
dst.opcode = RSX_FP_OPCODE_IFE & 0x3Fu;
SRC1 src1{};
src1.else_offset = (_else ? _else : end) << 2;
src1.opcode_is_branch = 1;
SRC2 src2{};
src2.end_offset = end << 2;
return v128::from32(swap_bytes16(dst.HEX), 0, swap_bytes16(src1.HEX), swap_bytes16(src2.HEX));
};
TEST(CFG, FpToCFG_Basic)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD),
encode_instruction(RSX_FP_OPCODE_MOV, true)
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
EXPECT_EQ(graph.blocks.size(), 1);
EXPECT_EQ(graph.blocks.front().instructions.size(), 2);
EXPECT_EQ(graph.blocks.front().instructions.front().length, 4);
EXPECT_EQ(graph.blocks.front().instructions[0].addr, 0);
EXPECT_EQ(graph.blocks.front().instructions[1].addr, 16);
}
TEST(CFG, FpToCFG_IF)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(4), // 2 (BR, 4)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
encode_instruction(RSX_FP_OPCODE_MOV, true), // 4 (Merge block)
};
const std::pair<int, size_t> expected_block_data[3] = {
{ 0, 3 }, // Head
{ 3, 1 }, // Branch
{ 4, 1 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 3);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
// Check edges
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 3))->pred[0].type, EdgeType::IF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 4))->pred[0].type, EdgeType::ENDIF);
}
TEST(CFG, FpToCFG_NestedIF)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(8), // 2 (BR, 8)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
create_if(6), // 4 (BR, 6)
encode_instruction(RSX_FP_OPCODE_MOV), // 5
encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block 1)
encode_instruction(RSX_FP_OPCODE_ADD), // 7
encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 (merge block 2
};
const std::pair<int, size_t> expected_block_data[5] = {
{ 0, 3 }, // Head
{ 3, 2 }, // Branch 1
{ 5, 1 }, // Branch 2
{ 6, 2 }, // Merge 1
{ 8, 1 }, // Merge 2
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 5);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
}
TEST(CFG, FpToCFG_NestedIF_MultiplePred)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(6), // 2 (BR, 6)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
create_if(6), // 4 (BR, 6)
encode_instruction(RSX_FP_OPCODE_MOV), // 5
encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block)
encode_instruction(RSX_FP_OPCODE_ADD), // 7
encode_instruction(RSX_FP_OPCODE_MOV, true) // 8
};
const std::pair<int, size_t> expected_block_data[4] = {
{ 0, 3 }, // Head
{ 3, 2 }, // Branch 1
{ 5, 1 }, // Branch 2
{ 6, 3 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 4);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
// Predecessors must be ordered, closest first
ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred.size(), 2);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].type, EdgeType::ENDIF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].from->id, 3);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].type, EdgeType::ENDIF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].from->id, 0);
// Successors must also be ordered, closest first
ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ.size(), 2);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].to->id, 3);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].type, EdgeType::ENDIF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].to->id, 6);
}
TEST(CFG, FpToCFG_IF_ELSE)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(6, 4), // 2 (BR, 6)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
encode_instruction(RSX_FP_OPCODE_MOV), // 4 (Else)
encode_instruction(RSX_FP_OPCODE_ADD), // 5
encode_instruction(RSX_FP_OPCODE_MOV, true), // 6 (Merge)
};
const std::pair<int, size_t> expected_block_data[4] = {
{ 0, 3 }, // Head
{ 3, 1 }, // Branch positive
{ 4, 2 }, // Branch negative
{ 6, 1 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 4);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
}
}

View file

@ -267,4 +267,90 @@ namespace rsx
EXPECT_EQ(std::memcmp(arr[i].second.s, "Hello World", sizeof(arr[i].second.s)), 0);
}
}
TEST(SimpleArray, DataAlignment_SmallVector)
{
struct alignas(16) some_struct {
char data[16];
};
rsx::simple_array<some_struct> arr(2);
const auto data_ptr = reinterpret_cast<uintptr_t>(arr.data());
EXPECT_EQ(data_ptr & 15, 0);
}
TEST(SimpleArray, DataAlignment_HeapAlloc)
{
struct alignas(16) some_struct {
char data[16];
};
rsx::simple_array<some_struct> arr(128);
const auto data_ptr = reinterpret_cast<uintptr_t>(arr.data());
EXPECT_EQ(data_ptr & 15, 0);
}
TEST(SimpleArray, DataAlignment_Overrides)
{
rsx::simple_array<std::byte, 16> arr(4);
rsx::simple_array<std::byte, 128> arr2(4);
const auto data_ptr1 = reinterpret_cast<uintptr_t>(arr.data());
const auto data_ptr2 = reinterpret_cast<uintptr_t>(arr2.data());
EXPECT_EQ(data_ptr1 & 15, 0);
EXPECT_EQ(data_ptr2 & 127, 0);
}
TEST(SimpleArray, Find)
{
const rsx::simple_array<int> arr{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
};
EXPECT_EQ(*arr.find(8), 8);
EXPECT_EQ(arr.find(99), nullptr);
}
TEST(SimpleArray, FindIf)
{
const rsx::simple_array<int> arr{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
};
EXPECT_EQ(*arr.find_if(FN(x == 8)), 8);
EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr);
}
TEST(AlignedAllocator, Alloc)
{
auto ptr = rsx::aligned_allocator::malloc<256>(16);
const auto ptr_value = reinterpret_cast<uintptr_t>(ptr);
rsx::aligned_allocator::free(ptr);
EXPECT_NE(ptr_value, 0);
EXPECT_EQ(ptr_value % 256, 0);
}
TEST(AlignedAllocator, Realloc)
{
auto ptr = rsx::aligned_allocator::malloc<256>(16);
auto ptr2 = rsx::aligned_allocator::realloc<256>(ptr, 16, 32);
const auto ptr_value = reinterpret_cast<uintptr_t>(ptr2);
rsx::aligned_allocator::free(ptr2);
EXPECT_NE(ptr_value, 0);
EXPECT_EQ(ptr_value % 256, 0);
}
TEST(AlignedAllocator, Realloc_ReturnsPreviousPointerIfFits)
{
auto ptr = rsx::aligned_allocator::malloc<256>(16);
auto ptr2 = rsx::aligned_allocator::realloc<256>(ptr, 16, 8);
rsx::aligned_allocator::free(ptr2);
EXPECT_EQ(ptr, ptr2);
}
}

View file

@ -2,113 +2,116 @@
#include "util/tuple.hpp"
struct some_struct
namespace utils
{
u64 v {};
char s[12] = "Hello World";
bool operator == (const some_struct& r) const
struct some_struct
{
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
u64 v{};
char s[12] = "Hello World";
bool operator == (const some_struct& r) const
{
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
}
};
TEST(Tuple, General)
{
some_struct s{};
s.v = 1234;
utils::tuple t0 = {};
EXPECT_EQ(t0.size(), 0);
utils::tuple<int> t;
EXPECT_EQ(sizeof(t), sizeof(int));
EXPECT_TRUE((std::is_same_v<decltype(t.get<0>()), int&>));
EXPECT_EQ(t.size(), 1);
EXPECT_EQ(t.get<0>(), 0);
utils::tuple<int> t1 = 2;
EXPECT_EQ(sizeof(t1), sizeof(int));
EXPECT_TRUE((std::is_same_v<decltype(t1.get<0>()), int&>));
EXPECT_EQ(t1.size(), 1);
EXPECT_EQ(t1.get<0>(), 2);
t1 = {};
EXPECT_EQ(t1.size(), 1);
EXPECT_EQ(t1.get<0>(), 0);
utils::tuple<int, some_struct> t2 = { 2, s };
EXPECT_EQ(sizeof(t2), 32);
EXPECT_EQ(t2.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(t2.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(t2.get<1>()), some_struct&>));
EXPECT_EQ(t2.get<0>(), 2);
EXPECT_EQ(t2.get<1>(), s);
t2 = {};
EXPECT_EQ(t2.size(), 2);
EXPECT_EQ(t2.get<0>(), 0);
EXPECT_EQ(t2.get<1>(), some_struct{});
t2.get<0>() = 666;
t2.get<1>() = s;
EXPECT_EQ(t2.get<0>(), 666);
EXPECT_EQ(t2.get<1>(), s);
utils::tuple<int, some_struct, double> t3 = { 2, s, 1234.0 };
EXPECT_EQ(sizeof(t3), 40);
EXPECT_EQ(t3.size(), 3);
EXPECT_TRUE((std::is_same_v<decltype(t3.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(t3.get<1>()), some_struct&>));
EXPECT_TRUE((std::is_same_v<decltype(t3.get<2>()), double&>));
EXPECT_EQ(t3.get<0>(), 2);
EXPECT_EQ(t3.get<1>(), s);
EXPECT_EQ(t3.get<2>(), 1234.0);
t3 = {};
EXPECT_EQ(t3.size(), 3);
EXPECT_EQ(t3.get<0>(), 0);
EXPECT_EQ(t3.get<1>(), some_struct{});
EXPECT_EQ(t3.get<2>(), 0.0);
t3.get<0>() = 666;
t3.get<1>() = s;
t3.get<2>() = 7.0;
EXPECT_EQ(t3.get<0>(), 666);
EXPECT_EQ(t3.get<1>(), s);
EXPECT_EQ(t3.get<2>(), 7.0);
// const
const utils::tuple<int, some_struct> tc = { 2, s };
EXPECT_EQ(tc.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(tc.get<0>()), const int&>));
EXPECT_TRUE((std::is_same_v<decltype(tc.get<1>()), const some_struct&>));
EXPECT_EQ(tc.get<0>(), 2);
EXPECT_EQ(tc.get<1>(), s);
// assignment
const utils::tuple<int, some_struct> ta1 = { 2, s };
utils::tuple<int, some_struct> ta = ta1;
EXPECT_EQ(ta.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
EXPECT_EQ(ta.get<0>(), 2);
EXPECT_EQ(ta.get<1>(), s);
utils::tuple<int, some_struct> ta2 = { 2, s };
ta = ta2;
EXPECT_EQ(ta.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
EXPECT_EQ(ta.get<0>(), 2);
EXPECT_EQ(ta.get<1>(), s);
EXPECT_EQ(ta2.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<1>()), some_struct&>));
EXPECT_EQ(ta2.get<0>(), 2);
EXPECT_EQ(ta2.get<1>(), s);
ta = std::move(ta2);
EXPECT_EQ(ta.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
EXPECT_EQ(ta.get<0>(), 2);
EXPECT_EQ(ta.get<1>(), s);
}
};
TEST(Utils, Tuple)
{
some_struct s {};
s.v = 1234;
utils::tuple t0 = {};
EXPECT_EQ(t0.size(), 0);
utils::tuple<int> t;
EXPECT_EQ(sizeof(t), sizeof(int));
EXPECT_TRUE((std::is_same_v<decltype(t.get<0>()), int&>));
EXPECT_EQ(t.size(), 1);
EXPECT_EQ(t.get<0>(), 0);
utils::tuple<int> t1 = 2;
EXPECT_EQ(sizeof(t1), sizeof(int));
EXPECT_TRUE((std::is_same_v<decltype(t1.get<0>()), int&>));
EXPECT_EQ(t1.size(), 1);
EXPECT_EQ(t1.get<0>(), 2);
t1 = {};
EXPECT_EQ(t1.size(), 1);
EXPECT_EQ(t1.get<0>(), 0);
utils::tuple<int, some_struct> t2 = { 2, s };
EXPECT_EQ(sizeof(t2), 32);
EXPECT_EQ(t2.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(t2.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(t2.get<1>()), some_struct&>));
EXPECT_EQ(t2.get<0>(), 2);
EXPECT_EQ(t2.get<1>(), s);
t2 = {};
EXPECT_EQ(t2.size(), 2);
EXPECT_EQ(t2.get<0>(), 0);
EXPECT_EQ(t2.get<1>(), some_struct{});
t2.get<0>() = 666;
t2.get<1>() = s;
EXPECT_EQ(t2.get<0>(), 666);
EXPECT_EQ(t2.get<1>(), s);
utils::tuple<int, some_struct, double> t3 = { 2, s, 1234.0 };
EXPECT_EQ(sizeof(t3), 40);
EXPECT_EQ(t3.size(), 3);
EXPECT_TRUE((std::is_same_v<decltype(t3.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(t3.get<1>()), some_struct&>));
EXPECT_TRUE((std::is_same_v<decltype(t3.get<2>()), double&>));
EXPECT_EQ(t3.get<0>(), 2);
EXPECT_EQ(t3.get<1>(), s);
EXPECT_EQ(t3.get<2>(), 1234.0);
t3 = {};
EXPECT_EQ(t3.size(), 3);
EXPECT_EQ(t3.get<0>(), 0);
EXPECT_EQ(t3.get<1>(), some_struct{});
EXPECT_EQ(t3.get<2>(), 0.0);
t3.get<0>() = 666;
t3.get<1>() = s;
t3.get<2>() = 7.0;
EXPECT_EQ(t3.get<0>(), 666);
EXPECT_EQ(t3.get<1>(), s);
EXPECT_EQ(t3.get<2>(), 7.0);
// const
const utils::tuple<int, some_struct> tc = { 2, s };
EXPECT_EQ(tc.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(tc.get<0>()), const int&>));
EXPECT_TRUE((std::is_same_v<decltype(tc.get<1>()), const some_struct&>));
EXPECT_EQ(tc.get<0>(), 2);
EXPECT_EQ(tc.get<1>(), s);
// assignment
const utils::tuple<int, some_struct> ta1 = { 2, s };
utils::tuple<int, some_struct> ta = ta1;
EXPECT_EQ(ta.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
EXPECT_EQ(ta.get<0>(), 2);
EXPECT_EQ(ta.get<1>(), s);
utils::tuple<int, some_struct> ta2 = { 2, s };
ta = ta2;
EXPECT_EQ(ta.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
EXPECT_EQ(ta.get<0>(), 2);
EXPECT_EQ(ta.get<1>(), s);
EXPECT_EQ(ta2.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<1>()), some_struct&>));
EXPECT_EQ(ta2.get<0>(), 2);
EXPECT_EQ(ta2.get<1>(), s);
ta = std::move(ta2);
EXPECT_EQ(ta.size(), 2);
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
EXPECT_EQ(ta.get<0>(), 2);
EXPECT_EQ(ta.get<1>(), s);
}

View file

@ -60,7 +60,7 @@ namespace utils
#if defined(ARCH_X64)
return _m_prefetchw(const_cast<void*>(ptr));
#else
return __builtin_prefetch(ptr, 1, 0);
return __builtin_prefetch(ptr, 1, 3);
#endif
}

View file

@ -1,10 +1,12 @@
#pragma once
#include <type_traits>
namespace utils
{
// Hack. Pointer cast util to workaround UB. Use with extreme care.
template <typename T, typename U>
[[nodiscard]] T* bless(U* ptr)
template <typename T, typename U> requires (std::is_pointer_v<std::remove_reference_t<U>>)
[[nodiscard]] inline T* bless(const U& ptr)
{
#ifdef _MSC_VER
return (T*)ptr;
@ -21,3 +23,4 @@ namespace utils
#endif
}
}

View file

@ -999,17 +999,18 @@ template <typename To, typename From> requires (std::is_integral_v<decltype(std:
constexpr bool is_from_signed = std::is_signed_v<CommonFrom>;
constexpr bool is_to_signed = std::is_signed_v<CommonTo>;
constexpr auto from_mask = (is_from_signed && !is_to_signed) ? UnFrom{umax} >> 1 : UnFrom{umax};
// For unsigned/signed mismatch, create an "unsigned" compatible mask
constexpr auto from_mask = (is_from_signed && !is_to_signed && sizeof(CommonFrom) <= sizeof(CommonTo)) ? UnFrom{umax} >> 1 : UnFrom{umax};
constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax};
constexpr auto mask = ~(from_mask & to_mask);
constexpr auto mask = static_cast<UnFrom>(~(from_mask & to_mask));
// Signed to unsigned always require test
// Otherwise, this is bit-wise narrowing or conversion between types of different signedness of the same size
if constexpr ((is_from_signed && !is_to_signed) || to_mask < from_mask)
// If destination ("unsigned" compatible) mask is smaller than source ("unsigned" compatible) mask
// It requires narrowing.
if constexpr (!!mask)
{
// Try to optimize test if both are of the same signedness
if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast<CommonTo>(value) != value) [[unlikely]]
if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast<CommonFrom>(static_cast<CommonTo>(value)) != value) [[unlikely]]
{
fmt::raw_verify_error(src_loc, u8"Narrowing error", +value);
}